change checking to httplib2
because we can then make a head request and do not take bandwith
This commit is contained in:
parent
dd60c722d8
commit
a96d4b59de
27
lib/db.py
27
lib/db.py
@ -1,7 +1,8 @@
|
||||
import sqlite3
|
||||
import mmh3
|
||||
import sys
|
||||
import requests
|
||||
#import requests
|
||||
import httplib2
|
||||
DEBUG = False
|
||||
|
||||
def log(*s):
|
||||
@ -104,17 +105,31 @@ def isStillValid(file):
|
||||
isLink = True
|
||||
rows = []
|
||||
rows = cursor.fetchmany(256)
|
||||
h = httplib2.Http()
|
||||
for row in rows:
|
||||
with requests.Session() as session:
|
||||
#with requests.Session() as session:
|
||||
print("row: ",row[0])
|
||||
try:
|
||||
page = session.get(row[0])
|
||||
except:
|
||||
(resp,content) = h.request(row[0], 'HEAD')
|
||||
#page = session.get(row[0])
|
||||
except IOError as e:
|
||||
print("link is no valid URL so remove item")
|
||||
print("error: ",e)
|
||||
isLink = False
|
||||
|
||||
except RelativeURIError:
|
||||
isLink = False
|
||||
print("Not a valid link")
|
||||
rm_cursor = connection.cursor()
|
||||
rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ?;""",(row[0],))
|
||||
finally:
|
||||
if page.ok == False or isLink == False:
|
||||
try:
|
||||
resp
|
||||
except NameError:
|
||||
print("Not a valid link")
|
||||
rm_cursor = connection.cursor()
|
||||
rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ?;""",(row[0],))
|
||||
else:
|
||||
if resp.status >= 400 or isLink == False:
|
||||
print("link is no more valid, remove item")
|
||||
rm_cursor = connection.cursor()
|
||||
rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ?;""",(row[0],))
|
||||
|
Loading…
Reference in New Issue
Block a user