import sqlite3 import mmh3 import sys #import requests import httplib2 DEBUG = True def log(*s): if DEBUG: print(s) def initdb(file): with sqlite3.connect(file) as connection: print("db connection", connection.total_changes) cursor = connection.cursor() cursor.execute("CREATE TABLE jobs (star TEXT,tag INT ,title TEXT, location TEXT, company TEXT,link TEXT,pubdate TEXT,hash INT)") sys.exit() def rmdb(file,table): with sqlite3.connect(file) as connection: question = input("Do you really wont to empty the db(press Y)?") if(question == "Y"): cursor = connection.cursor() drop_cmd = f"""DROP TABLE {table}""" cursor.execute(drop_cmd) else: print("abroting removing table") sys.exit() def importdb(file,importdb,table): with sqlite3.connect(file) as connection: print("db connection",connection.total_changes) cmd = f"""ATTACH "{importdb}" AS regions""" cmd2 = f"""CREATE TABLE IF NOT EXISTS {table} AS SELECT * from regions.{table}""" cmd_view = f""" CREATE VIEW Canoton_Filter AS SELECT * FROM jobs as b WHERE EXISTS (SELECT GDENAME FROM {table} as w where w.GDEKT = 'ZH' AND b.location LIKE GDENAME);""" cursor = connection.cursor() cursor.execute(cmd) print(cmd,cmd2) cursor.execute(cmd2) cursor.execute(cmd_view) print("db connection",connection.total_changes) def createnwview(file): with sqlite3.connect(file) as connection: cmd_create_nw_table = f"""CREATE VIEW IF NOT EXISTS "Nordwest-SCHWEIZ" AS SELECT * FROM jobs as b WHERE EXISTS (SELECT GDENAME FROM Cantons as w where w.GDEKT = 'ZH' AND b.location LIKE GDENAME) OR EXISTS (SELECT GDENAME FROM Cantons as w where w.GDEKT = 'AG' AND b.location LIKE GDENAME) OR EXISTS (SELECT GDENAME FROM Cantons as w where w.GDEKT = 'SO' AND b.location LIKE GDENAME)""" cursor = connection.cursor() cursor.execute(cmd_create_nw_table) print("db connection",connection.total_changes) createFilterTable(file) def createFilterTable(file): with sqlite3.connect(file,timeout=10) as connection: cmd_create_filter_table = f"""CREATE TABLE IF NOT EXISTS filters(cmd TEXT);""" cursor = connection.cursor() cursor.execute(cmd_create_filter_table) print("db connection:",connection.total_changes) def addFineFilter(file,table,filterstr): with sqlite3.connect(file,timeout=10) as connection: cmd_createFineFilter = f"""INSERT INTO {table}(cmd) VALUES(?);""" cmd_checkIfExists = f"""SELECT * FROM {table} WHERE cmd = ?""" cursor = connection.cursor() if cursor.execute(cmd_checkIfExists,(filterstr,)).fetchone() == None: cursor.execute(cmd_createFineFilter,(filterstr,)) def writedb(jobs): with sqlite3.connect("../db/sqlite3.db",timeout=10) as connection: connection.execute("pragma journal_mode=wal") print("db connection", connection.total_changes) cursor = connection.cursor() # cursor.execute("CREATE TABLE jobs (title TEXT, location TEXT, company TEXT,link TEXT,hash INT)") for i3,job in enumerate(jobs): hash1 = mmh3.hash(job.title+job.company+job.location+job.date) log(hash1); if(cursor.execute("SELECT * FROM jobs WHERE hash = ?",(hash1,)).fetchone() != None): log("Hash already exist") else: print("NEW_ENTRY") cursor.execute("INSERT INTO jobs (star,tag,title,company,location,link,pubdate,hash) VALUES (?,?,?,?,?,?,?,?)",(job.starred,job.tag,job.title,job.company,job.location,job.link,job.date,hash1)) def isStillValid(file,skiprows): rows = [0,0,0] with sqlite3.connect(file,timeout=10) as connection: cmd_read_chunk = f"""SELECT link from jobs;""" connection.execute("pragma journal_mode=wal") cursor = connection.cursor() cursor.execute(cmd_read_chunk) #cursor.fetchmany(skiprows)#drop rows while(len(rows)!=0): isLink = True rows = [] rows = cursor.fetchmany(256) h = httplib2.Http() for row in rows: #with requests.Session() as session: print("row: ",row[0]) try: (resp,content) = h.request(row[0], 'HEAD') #page = session.get(row[0]) except IOError as e: print("link is no valid URL so remove item") print("error: ",e) isLink = False except httplib2.error.RelativeURIError: isLink = False print("RelativeURIError: Not a valid link") rm_cursor = connection.cursor() rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ?;""",(row[0],)) finally: try: resp except NameError: print("Not a valid link") rm_cursor = connection.cursor() rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ?;""",(row[0],)) else: if resp.status >= 400 or isLink == False: print("link is no more valid, remove item") rm_cursor = connection.cursor() rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ?;""",(row[0],)) print ("Deletion resultet in: ", rm_itm) print("result of commit: ", connection.commit()) return 0