diff --git a/lib/db.py b/lib/db.py index 4aef0b5..f739385 100644 --- a/lib/db.py +++ b/lib/db.py @@ -1,6 +1,7 @@ import sqlite3 import mmh3 import sys +import requests DEBUG = False def log(*s): @@ -91,3 +92,30 @@ def writedb(jobs): else: print("NEW_ENTRY") cursor.execute("INSERT INTO jobs (star,tag,title,company,location,link,pubdate,hash) VALUES (?,?,?,?,?,?,?,?)",(job.starred,job.tag,job.title,job.company,job.location,job.link,job.date,hash1)) + +def isStillValid(file): + rows = [0,0,0] + with sqlite3.connect(file) as connection: + cmd_read_chunk = f"""SELECT link from jobs;""" + cursor = connection.cursor() + cursor.execute(cmd_read_chunk) + + while(len(rows)!=0): + isLink = True + rows = [] + rows = cursor.fetchmany(256) + for row in rows: + with requests.Session() as session: + print("row: ",row[0]) + try: + page = session.get(row[0]) + except: + print("link is no valid URL so remove item") + isLink = False + + finally: + if page.ok == False or isLink == False: + print("link is no more valid, remove item") + rm_cursor = connection.cursor() + rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ?;""",(row[0],)) + print ("Deletion resultet in: ", rm_itm) diff --git a/lib/sysparse.py b/lib/sysparse.py index 02f2b5e..b067237 100644 --- a/lib/sysparse.py +++ b/lib/sysparse.py @@ -37,6 +37,7 @@ def parse(**kwargs): # parser.add_argument("--help",help = "print help") parser.add_argument("--login",nargs=3,help = "login by specifing login and passwor by a given url",metavar=('USERNAME','PASSWORD','URL')) parser.add_argument("--createnwview",help = "Create a VIEW for the Region Nordwest Schweiz",action="store_true") + parser.add_argument("-VC","--ValidationCheck",help = "Check if links are still valid, if not remove them",action="store_true") args = parser.parse_args() if args.test: @@ -55,7 +56,8 @@ def parse(**kwargs): login_loop(args.config,False,worker) if args.createnwview: createnwview("../db/sqlite3.db") - + if args.ValidationCheck: + isStillValid("../db/sqlite3.db") if len(kwargs)>0: print("no sysargs fiven, running as a module") vconfig = kwargs.get('config')