add link validation checker, which removes invalid entries

This commit is contained in:
ccppi 2024-07-22 11:50:52 +02:00
parent 0870255544
commit dd60c722d8
2 changed files with 31 additions and 1 deletions

View File

@ -1,6 +1,7 @@
import sqlite3 import sqlite3
import mmh3 import mmh3
import sys import sys
import requests
DEBUG = False DEBUG = False
def log(*s): def log(*s):
@ -91,3 +92,30 @@ def writedb(jobs):
else: else:
print("NEW_ENTRY") print("NEW_ENTRY")
cursor.execute("INSERT INTO jobs (star,tag,title,company,location,link,pubdate,hash) VALUES (?,?,?,?,?,?,?,?)",(job.starred,job.tag,job.title,job.company,job.location,job.link,job.date,hash1)) cursor.execute("INSERT INTO jobs (star,tag,title,company,location,link,pubdate,hash) VALUES (?,?,?,?,?,?,?,?)",(job.starred,job.tag,job.title,job.company,job.location,job.link,job.date,hash1))
def isStillValid(file):
rows = [0,0,0]
with sqlite3.connect(file) as connection:
cmd_read_chunk = f"""SELECT link from jobs;"""
cursor = connection.cursor()
cursor.execute(cmd_read_chunk)
while(len(rows)!=0):
isLink = True
rows = []
rows = cursor.fetchmany(256)
for row in rows:
with requests.Session() as session:
print("row: ",row[0])
try:
page = session.get(row[0])
except:
print("link is no valid URL so remove item")
isLink = False
finally:
if page.ok == False or isLink == False:
print("link is no more valid, remove item")
rm_cursor = connection.cursor()
rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ?;""",(row[0],))
print ("Deletion resultet in: ", rm_itm)

View File

@ -37,6 +37,7 @@ def parse(**kwargs):
# parser.add_argument("--help",help = "print help") # parser.add_argument("--help",help = "print help")
parser.add_argument("--login",nargs=3,help = "login by specifing login and passwor by a given url",metavar=('USERNAME','PASSWORD','URL')) parser.add_argument("--login",nargs=3,help = "login by specifing login and passwor by a given url",metavar=('USERNAME','PASSWORD','URL'))
parser.add_argument("--createnwview",help = "Create a VIEW for the Region Nordwest Schweiz",action="store_true") parser.add_argument("--createnwview",help = "Create a VIEW for the Region Nordwest Schweiz",action="store_true")
parser.add_argument("-VC","--ValidationCheck",help = "Check if links are still valid, if not remove them",action="store_true")
args = parser.parse_args() args = parser.parse_args()
if args.test: if args.test:
@ -55,7 +56,8 @@ def parse(**kwargs):
login_loop(args.config,False,worker) login_loop(args.config,False,worker)
if args.createnwview: if args.createnwview:
createnwview("../db/sqlite3.db") createnwview("../db/sqlite3.db")
if args.ValidationCheck:
isStillValid("../db/sqlite3.db")
if len(kwargs)>0: if len(kwargs)>0:
print("no sysargs fiven, running as a module") print("no sysargs fiven, running as a module")
vconfig = kwargs.get('config') vconfig = kwargs.get('config')