add link validation checker, which removes invalid entries
This commit is contained in:
parent
0870255544
commit
dd60c722d8
28
lib/db.py
28
lib/db.py
@ -1,6 +1,7 @@
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
import mmh3
|
import mmh3
|
||||||
import sys
|
import sys
|
||||||
|
import requests
|
||||||
DEBUG = False
|
DEBUG = False
|
||||||
|
|
||||||
def log(*s):
|
def log(*s):
|
||||||
@ -91,3 +92,30 @@ def writedb(jobs):
|
|||||||
else:
|
else:
|
||||||
print("NEW_ENTRY")
|
print("NEW_ENTRY")
|
||||||
cursor.execute("INSERT INTO jobs (star,tag,title,company,location,link,pubdate,hash) VALUES (?,?,?,?,?,?,?,?)",(job.starred,job.tag,job.title,job.company,job.location,job.link,job.date,hash1))
|
cursor.execute("INSERT INTO jobs (star,tag,title,company,location,link,pubdate,hash) VALUES (?,?,?,?,?,?,?,?)",(job.starred,job.tag,job.title,job.company,job.location,job.link,job.date,hash1))
|
||||||
|
|
||||||
|
def isStillValid(file):
|
||||||
|
rows = [0,0,0]
|
||||||
|
with sqlite3.connect(file) as connection:
|
||||||
|
cmd_read_chunk = f"""SELECT link from jobs;"""
|
||||||
|
cursor = connection.cursor()
|
||||||
|
cursor.execute(cmd_read_chunk)
|
||||||
|
|
||||||
|
while(len(rows)!=0):
|
||||||
|
isLink = True
|
||||||
|
rows = []
|
||||||
|
rows = cursor.fetchmany(256)
|
||||||
|
for row in rows:
|
||||||
|
with requests.Session() as session:
|
||||||
|
print("row: ",row[0])
|
||||||
|
try:
|
||||||
|
page = session.get(row[0])
|
||||||
|
except:
|
||||||
|
print("link is no valid URL so remove item")
|
||||||
|
isLink = False
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if page.ok == False or isLink == False:
|
||||||
|
print("link is no more valid, remove item")
|
||||||
|
rm_cursor = connection.cursor()
|
||||||
|
rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ?;""",(row[0],))
|
||||||
|
print ("Deletion resultet in: ", rm_itm)
|
||||||
|
@ -37,6 +37,7 @@ def parse(**kwargs):
|
|||||||
# parser.add_argument("--help",help = "print help")
|
# parser.add_argument("--help",help = "print help")
|
||||||
parser.add_argument("--login",nargs=3,help = "login by specifing login and passwor by a given url",metavar=('USERNAME','PASSWORD','URL'))
|
parser.add_argument("--login",nargs=3,help = "login by specifing login and passwor by a given url",metavar=('USERNAME','PASSWORD','URL'))
|
||||||
parser.add_argument("--createnwview",help = "Create a VIEW for the Region Nordwest Schweiz",action="store_true")
|
parser.add_argument("--createnwview",help = "Create a VIEW for the Region Nordwest Schweiz",action="store_true")
|
||||||
|
parser.add_argument("-VC","--ValidationCheck",help = "Check if links are still valid, if not remove them",action="store_true")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.test:
|
if args.test:
|
||||||
@ -55,7 +56,8 @@ def parse(**kwargs):
|
|||||||
login_loop(args.config,False,worker)
|
login_loop(args.config,False,worker)
|
||||||
if args.createnwview:
|
if args.createnwview:
|
||||||
createnwview("../db/sqlite3.db")
|
createnwview("../db/sqlite3.db")
|
||||||
|
if args.ValidationCheck:
|
||||||
|
isStillValid("../db/sqlite3.db")
|
||||||
if len(kwargs)>0:
|
if len(kwargs)>0:
|
||||||
print("no sysargs fiven, running as a module")
|
print("no sysargs fiven, running as a module")
|
||||||
vconfig = kwargs.get('config')
|
vconfig = kwargs.get('config')
|
||||||
|
Loading…
Reference in New Issue
Block a user