import argparse import config import sys from enum import IntEnum from scrap_jobs import * from login import * from time import sleep from db import * DEBUG = True def log(*s): if DEBUG: print(s) def choose_scraper(entry,session): if not session: session = requests.Session() domain = extractDomain(entry.scrapurl) match domain: case 'https://www.jobs.ch': runner(entry,session,scrap_jobs,next_url_jobs_ch) case 'https://software-job.ch': runner(entry,session,scrap_jobagent,next_url_jobagent) case 'https://www.jobagent.ch': runner(entry,session,scrap_jobagent,next_url_jobagent) case 'https://ch.indeed.com': runner(entry,session,scrap_indeed_com,next_url_indeed_com) def parse(**kwargs): session=0 if len(sys.argv)>1: worker=0 parser = argparse.ArgumentParser() parser.add_argument("-c","--config",help = "Specific a config file to use,from where to scrap the jobs") parser.add_argument("-t","--test",help = "only for test purposes while developing",action="store_true") parser.add_argument("--importregiondb",help = "Import a database used for querring by Regions or Cantons",action="store_true") parser.add_argument("--initdb",help = "Initialice a new db from scratch without entrys",action="store_true") parser.add_argument("--rmdb",help = "!!reove existing db!!DATALOSS!!",action="store_true") # parser.add_argument("--help",help = "print help") parser.add_argument("--login",nargs=3,help = "login by specifing login and passwor by a given url",metavar=('USERNAME','PASSWORD','URL')) parser.add_argument("--createnwview",help = "Create a VIEW for the Region Nordwest Schweiz",action="store_true") parser.add_argument("-VC","--ValidationCheck",help = "Check if links are still valid, if not remove them",action="store_true") parser.add_argument("--initFilters",help = "insert filters table",action="store_true") args = parser.parse_args() if args.test: addFineFilter("../db/sqlite3.db","filters","testfilterentry") if args.importregiondb: importdb("../db/sqlite3.db","../db/Cantons.db","Cantons") if args.initdb: initdb("../db/sqlite3.db") createFilterTable("../db/sqlite3.db") if args.rmdb: rmdb("../db/sqlite3.db","jobs") if args.login: user,pw,url = args.login session = login(user,pw,url,url) choose_scraper(url,session) if args.config: login_loop(args.config,False,worker) if args.createnwview: createnwview("../db/sqlite3.db") if args.ValidationCheck: isStillValid("../db/sqlite3.db") if args.initFilters: createFilterTable("../db/sqlite3.db") if len(kwargs)>0: print("no sysargs fiven, running as a module") vconfig = kwargs.get('config') worker = kwargs.get('worker') print("config:",vconfig) if vconfig: login_loop(vconfig,True,worker) worker.finished.emit() print("finished sync job") def login_loop(config_file,gui,worker): ret = -1 ret_login = 0 session = 0 while (ret != 0): if gui: worker.dialog_rejected = False ret = entry2 = config.readConfig(config_file,gui,worker) print(entry2) if(ret != 0 and ret_login != 1): if(entry2.loginurl != 'NONE'): session = -1 log("[pre while] worker.dialog_rejected = ",worker.dialog_rejected) worker.dialog_rejected = False while (session == -1 and worker.dialog_rejected == False): log("worker.dialog_rejected = ",worker.dialog_rejected) session = login(entry2) ret_login = entry2.input_pw(gui,entry2.user,worker) if worker.dialog_rejected == False: choose_scraper(entry2,session) if not gui: ret = entry2 = config.readConfig(config_file,gui,worker) #print(entry2) if(ret != 0 and ret_login != 1): if(entry2.loginurl != 'NONE'): session = -1 while (session == -1): session = login(entry2) if session == -1: ret_login = entry2.input_pw(gui,entry2.user,worker) log("[login_loop] session:",session) choose_scraper(entry2,session) def runner(entry,session,scrap_func,next_url_func): i=0 b_url = entry.scrapurl while b_url != 0 and i<50: sleep(0.5) if b_url: domain = extractDomain(b_url) print(domain) if domain == 'https://www.jobagent.ch' or domain == 'https://software-job.ch': jobs = scrap_func(b_url,entry,session) if jobs: writedb(jobs) else: print("nothing found on this page") b_url = next_url_func(b_url,session,0) elif domain == 'https://www.jobs.ch': jobs = scrap_func(b_url,entry,session) if jobs: writedb(jobs) else: print("nothing found on this page") b_url = next_url_func(b_url,session,"https://www.jobs.ch") elif domain == 'https://ch.indeed.com': jobs = scrap_func(b_url,entry,session) if jobs: writedb(jobs) else: print("nothing found on this page") b_url = next_url_func(b_url,session,domain) if b_url != 0: print("main:" + b_url) if b_url==0: print("End of listed items, or did not find any other Nächste Seite Buttons") i=i+1 print(i)