151 lines
6.2 KiB
Python
151 lines
6.2 KiB
Python
import argparse
|
|
import config
|
|
import sys
|
|
from enum import IntEnum
|
|
from scrap_jobs import *
|
|
from login import *
|
|
from time import sleep
|
|
from db import *
|
|
|
|
DEBUG = True
|
|
def log(*s):
|
|
if DEBUG:
|
|
print(s)
|
|
|
|
def choose_scraper(entry,session):
|
|
if not session:
|
|
session = requests.Session()
|
|
domain = extractDomain(entry.scrapurl)
|
|
match domain:
|
|
case 'https://www.jobs.ch':
|
|
runner(entry,session,scrap_jobs,next_url_jobs_ch)
|
|
case 'https://software-job.ch':
|
|
runner(entry,session,scrap_jobagent,next_url_jobagent)
|
|
case 'https://www.jobagent.ch':
|
|
runner(entry,session,scrap_jobagent,next_url_jobagent)
|
|
case 'https://ch.indeed.com':
|
|
runner(entry,session,scrap_indeed_com,next_url_indeed_com)
|
|
|
|
def parse(**kwargs):
|
|
session=0
|
|
if len(sys.argv)>1:
|
|
worker=0
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("-c","--config",help = "Specific a config file to use,from where to scrap the jobs")
|
|
parser.add_argument("-t","--test",help = "only for test purposes while developing",action="store_true")
|
|
parser.add_argument("--importregiondb",help = "Import a database used for querring by Regions or Cantons",action="store_true")
|
|
parser.add_argument("--initdb",help = "Initialice a new db from scratch without entrys",action="store_true")
|
|
parser.add_argument("--rmdb",help = "!!reove existing db!!DATALOSS!!",action="store_true")
|
|
# parser.add_argument("--help",help = "print help")
|
|
parser.add_argument("--login",nargs=3,help = "login by specifing login and passwor by a given url",metavar=('USERNAME','PASSWORD','URL'))
|
|
parser.add_argument("--createnwview",help = "Create a VIEW for the Region Nordwest Schweiz",action="store_true")
|
|
parser.add_argument("-VC","--ValidationCheck",help = "Check if links are still valid, if not remove them",action="store_true")
|
|
parser.add_argument("--initFilters",help = "insert filters table",action="store_true")
|
|
args = parser.parse_args()
|
|
|
|
if args.test:
|
|
addFineFilter("../db/sqlite3.db","filters","testfilterentry")
|
|
if args.importregiondb:
|
|
importdb("../db/sqlite3.db","../db/Cantons.db","Cantons")
|
|
if args.initdb:
|
|
initdb("../db/sqlite3.db")
|
|
createFilterTable("../db/sqlite3.db")
|
|
if args.rmdb:
|
|
rmdb("../db/sqlite3.db","jobs")
|
|
if args.login:
|
|
user,pw,url = args.login
|
|
session = login(user,pw,url,url)
|
|
choose_scraper(url,session)
|
|
if args.config:
|
|
login_loop(args.config,False,worker)
|
|
if args.createnwview:
|
|
createnwview("../db/sqlite3.db")
|
|
if args.ValidationCheck:
|
|
isStillValid("../db/sqlite3.db")
|
|
if args.initFilters:
|
|
createFilterTable("../db/sqlite3.db")
|
|
|
|
if len(kwargs)>0:
|
|
print("no sysargs fiven, running as a module")
|
|
vconfig = kwargs.get('config')
|
|
worker = kwargs.get('worker')
|
|
print("config:",vconfig)
|
|
if vconfig:
|
|
login_loop(vconfig,True,worker)
|
|
worker.finished.emit()
|
|
print("finished sync job")
|
|
|
|
|
|
def login_loop(config_file,gui,worker):
|
|
ret = -1
|
|
ret_login = 0
|
|
session = 0
|
|
while (ret != 0):
|
|
if gui:
|
|
worker.dialog_rejected = False
|
|
ret = entry2 = config.readConfig(config_file,gui,worker)
|
|
print(entry2)
|
|
if(ret != 0 and ret_login != 1):
|
|
if(entry2.loginurl != 'NONE'):
|
|
session = -1
|
|
log("[pre while] worker.dialog_rejected = ",worker.dialog_rejected)
|
|
worker.dialog_rejected = False
|
|
while (session == -1 and worker.dialog_rejected == False):
|
|
log("worker.dialog_rejected = ",worker.dialog_rejected)
|
|
session = login(entry2)
|
|
ret_login = entry2.input_pw(gui,entry2.user,worker)
|
|
if worker.dialog_rejected == False:
|
|
choose_scraper(entry2,session)
|
|
if not gui:
|
|
ret = entry2 = config.readConfig(config_file,gui,worker)
|
|
#print(entry2)
|
|
if(ret != 0 and ret_login != 1):
|
|
if(entry2.loginurl != 'NONE'):
|
|
session = -1
|
|
while (session == -1):
|
|
session = login(entry2)
|
|
if session == -1:
|
|
ret_login = entry2.input_pw(gui,entry2.user,worker)
|
|
log("[login_loop] session:",session)
|
|
choose_scraper(entry2,session)
|
|
|
|
def runner(entry,session,scrap_func,next_url_func):
|
|
i=0
|
|
b_url = entry.scrapurl
|
|
while b_url != 0 and i<50:
|
|
sleep(0.5)
|
|
if b_url:
|
|
domain = extractDomain(b_url)
|
|
print(domain)
|
|
if domain == 'https://www.jobagent.ch' or domain == 'https://software-job.ch':
|
|
jobs = scrap_func(b_url,entry,session)
|
|
if jobs:
|
|
writedb(jobs)
|
|
else:
|
|
print("nothing found on this page")
|
|
b_url = next_url_func(b_url,session,0)
|
|
elif domain == 'https://www.jobs.ch':
|
|
jobs = scrap_func(b_url,entry,session)
|
|
if jobs:
|
|
writedb(jobs)
|
|
else:
|
|
print("nothing found on this page")
|
|
b_url = next_url_func(b_url,session,"https://www.jobs.ch")
|
|
elif domain == 'https://ch.indeed.com':
|
|
jobs = scrap_func(b_url,entry,session)
|
|
if jobs:
|
|
writedb(jobs)
|
|
else:
|
|
print("nothing found on this page")
|
|
b_url = next_url_func(b_url,session,domain)
|
|
|
|
|
|
|
|
if b_url != 0:
|
|
print("main:" + b_url)
|
|
if b_url==0:
|
|
print("End of listed items, or did not find any other Nächste Seite Buttons")
|
|
|
|
i=i+1
|
|
print(i)
|