change checking to httplib2
because we can then make a head request and do not take bandwith
This commit is contained in:
parent
dd60c722d8
commit
a96d4b59de
33
lib/db.py
33
lib/db.py
@ -1,7 +1,8 @@
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
import mmh3
|
import mmh3
|
||||||
import sys
|
import sys
|
||||||
import requests
|
#import requests
|
||||||
|
import httplib2
|
||||||
DEBUG = False
|
DEBUG = False
|
||||||
|
|
||||||
def log(*s):
|
def log(*s):
|
||||||
@ -104,18 +105,32 @@ def isStillValid(file):
|
|||||||
isLink = True
|
isLink = True
|
||||||
rows = []
|
rows = []
|
||||||
rows = cursor.fetchmany(256)
|
rows = cursor.fetchmany(256)
|
||||||
|
h = httplib2.Http()
|
||||||
for row in rows:
|
for row in rows:
|
||||||
with requests.Session() as session:
|
#with requests.Session() as session:
|
||||||
print("row: ",row[0])
|
print("row: ",row[0])
|
||||||
try:
|
try:
|
||||||
page = session.get(row[0])
|
(resp,content) = h.request(row[0], 'HEAD')
|
||||||
except:
|
#page = session.get(row[0])
|
||||||
|
except IOError as e:
|
||||||
print("link is no valid URL so remove item")
|
print("link is no valid URL so remove item")
|
||||||
|
print("error: ",e)
|
||||||
isLink = False
|
isLink = False
|
||||||
|
except RelativeURIError:
|
||||||
finally:
|
isLink = False
|
||||||
if page.ok == False or isLink == False:
|
print("Not a valid link")
|
||||||
print("link is no more valid, remove item")
|
|
||||||
rm_cursor = connection.cursor()
|
rm_cursor = connection.cursor()
|
||||||
rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ?;""",(row[0],))
|
rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ?;""",(row[0],))
|
||||||
print ("Deletion resultet in: ", rm_itm)
|
finally:
|
||||||
|
try:
|
||||||
|
resp
|
||||||
|
except NameError:
|
||||||
|
print("Not a valid link")
|
||||||
|
rm_cursor = connection.cursor()
|
||||||
|
rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ?;""",(row[0],))
|
||||||
|
else:
|
||||||
|
if resp.status >= 400 or isLink == False:
|
||||||
|
print("link is no more valid, remove item")
|
||||||
|
rm_cursor = connection.cursor()
|
||||||
|
rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ?;""",(row[0],))
|
||||||
|
print ("Deletion resultet in: ", rm_itm)
|
||||||
|
Loading…
Reference in New Issue
Block a user