2024-06-13 09:14:04 +00:00
import sqlite3
import mmh3
import sys
2024-07-22 10:23:17 +00:00
#import requests
import httplib2
2024-08-23 11:01:20 +00:00
DEBUG = False
2024-06-13 09:14:04 +00:00
def log ( * s ) :
if DEBUG :
print ( s )
def initdb ( file ) :
with sqlite3 . connect ( file ) as connection :
print ( " db connection " , connection . total_changes )
cursor = connection . cursor ( )
2024-08-07 07:56:07 +00:00
cursor . execute ( " CREATE TABLE jobs (star INT,tag INT ,title TEXT, location TEXT, company TEXT,link TEXT,pubdate TEXT,hash INT,viewed INT) " )
2024-06-13 09:14:04 +00:00
sys . exit ( )
def rmdb ( file , table ) :
with sqlite3 . connect ( file ) as connection :
question = input ( " Do you really wont to empty the db(press Y)? " )
if ( question == " Y " ) :
cursor = connection . cursor ( )
drop_cmd = f """ DROP TABLE { table } """
cursor . execute ( drop_cmd )
else :
print ( " abroting removing table " )
sys . exit ( )
def importdb ( file , importdb , table ) :
with sqlite3 . connect ( file ) as connection :
print ( " db connection " , connection . total_changes )
cmd = f """ ATTACH " { importdb } " AS regions """
cmd2 = f """ CREATE TABLE IF NOT EXISTS { table } AS SELECT * from regions. { table } """
cmd_view = f """
CREATE VIEW Canoton_Filter
AS
SELECT * FROM jobs as b
WHERE EXISTS
( SELECT GDENAME FROM { table } as w
where w . GDEKT = ' ZH ' AND
b . location LIKE GDENAME ) ; """
cursor = connection . cursor ( )
cursor . execute ( cmd )
print ( cmd , cmd2 )
cursor . execute ( cmd2 )
cursor . execute ( cmd_view )
print ( " db connection " , connection . total_changes )
def createnwview ( file ) :
with sqlite3 . connect ( file ) as connection :
2024-06-18 07:58:54 +00:00
cmd_create_nw_table = f """ CREATE VIEW IF NOT EXISTS " Nordwest-SCHWEIZ " AS SELECT * FROM jobs as b
2024-06-13 09:14:04 +00:00
WHERE EXISTS
( SELECT GDENAME FROM Cantons as w
where w . GDEKT = ' ZH ' AND
b . location LIKE GDENAME )
OR EXISTS
( SELECT GDENAME FROM Cantons as w
where w . GDEKT = ' AG ' AND
b . location LIKE GDENAME )
OR EXISTS
( SELECT GDENAME FROM Cantons as w
where w . GDEKT = ' SO ' AND
b . location LIKE GDENAME ) """
cursor = connection . cursor ( )
cursor . execute ( cmd_create_nw_table )
print ( " db connection " , connection . total_changes )
2024-06-18 07:58:54 +00:00
createFilterTable ( file )
def createFilterTable ( file ) :
2024-07-26 10:46:36 +00:00
with sqlite3 . connect ( file , timeout = 10 ) as connection :
2024-06-18 07:58:54 +00:00
cmd_create_filter_table = f """ CREATE TABLE IF NOT EXISTS filters(cmd TEXT); """
cursor = connection . cursor ( )
cursor . execute ( cmd_create_filter_table )
print ( " db connection: " , connection . total_changes )
def addFineFilter ( file , table , filterstr ) :
2024-07-26 10:46:36 +00:00
with sqlite3 . connect ( file , timeout = 10 ) as connection :
2024-06-18 10:52:18 +00:00
cmd_createFineFilter = f """ INSERT INTO { table } (cmd) VALUES(?); """
cmd_checkIfExists = f """ SELECT * FROM { table } WHERE cmd = ? """
2024-06-18 07:58:54 +00:00
cursor = connection . cursor ( )
2024-06-18 10:52:18 +00:00
if cursor . execute ( cmd_checkIfExists , ( filterstr , ) ) . fetchone ( ) == None :
cursor . execute ( cmd_createFineFilter , ( filterstr , ) )
2024-06-13 09:14:04 +00:00
def writedb ( jobs ) :
2024-07-26 10:46:36 +00:00
with sqlite3 . connect ( " ../db/sqlite3.db " , timeout = 10 ) as connection :
connection . execute ( " pragma journal_mode=wal " )
2024-06-13 09:14:04 +00:00
print ( " db connection " , connection . total_changes )
cursor = connection . cursor ( )
# cursor.execute("CREATE TABLE jobs (title TEXT, location TEXT, company TEXT,link TEXT,hash INT)")
for i3 , job in enumerate ( jobs ) :
hash1 = mmh3 . hash ( job . title + job . company + job . location + job . date )
log ( hash1 ) ;
if ( cursor . execute ( " SELECT * FROM jobs WHERE hash = ? " , ( hash1 , ) ) . fetchone ( ) != None ) :
log ( " Hash already exist " )
2024-08-08 09:58:50 +00:00
elif ( cursor . execute ( " SELECT * FROM jobs where link = ? " , ( job . link , ) ) . fetchone ( ) != None ) :
log ( " link already exist " )
2024-06-13 09:14:04 +00:00
else :
2024-08-08 09:58:50 +00:00
log ( " NEW_ENTRY " )
2024-08-07 07:56:07 +00:00
cursor . execute ( " INSERT INTO jobs (star,tag,title,company,location,link,pubdate,hash,viewed) VALUES (?,?,?,?,?,?,?,?,?) " , ( job . starred , job . tag , job . title , job . company , job . location , job . link , job . date , hash1 , 0 ) )
2024-07-22 09:50:52 +00:00
2024-08-06 11:42:58 +00:00
def viewedEntry ( hash1 ) :
2024-08-07 09:08:39 +00:00
viewedEntry . list = [ ]
viewedEntry . list . append ( hash1 )
print ( " viewedEntry.list: " , viewedEntry . list )
if len ( viewedEntry . list ) > = 5 :
with sqlite3 . connect ( " ../db/sqlite3.db " , timeout = 10 ) as connection :
cursor = connection . cursor ( )
for x in viewedEntry . list :
print ( " hash: " , x )
cursor . execute ( " UPDATE jobs SET viewed = ' 1 ' WHERE hash = ? " , ( x , ) )
viewedEntry . list = [ ]
2024-08-06 11:42:58 +00:00
print ( " modified rows: " , cursor . rowcount )
2024-07-26 10:46:36 +00:00
def isStillValid ( file , skiprows ) :
2024-07-22 09:50:52 +00:00
rows = [ 0 , 0 , 0 ]
2024-07-26 10:46:36 +00:00
with sqlite3 . connect ( file , timeout = 10 ) as connection :
2024-07-22 09:50:52 +00:00
cmd_read_chunk = f """ SELECT link from jobs; """
2024-07-26 10:46:36 +00:00
connection . execute ( " pragma journal_mode=wal " )
2024-07-22 09:50:52 +00:00
cursor = connection . cursor ( )
cursor . execute ( cmd_read_chunk )
2024-07-26 10:46:36 +00:00
#cursor.fetchmany(skiprows)#drop rows
2024-07-22 09:50:52 +00:00
while ( len ( rows ) != 0 ) :
isLink = True
rows = [ ]
2024-07-26 10:46:36 +00:00
2024-07-22 09:50:52 +00:00
rows = cursor . fetchmany ( 256 )
2024-07-22 10:23:17 +00:00
h = httplib2 . Http ( )
2024-07-22 09:50:52 +00:00
for row in rows :
print ( " row: " , row [ 0 ] )
try :
2024-07-22 10:23:17 +00:00
( resp , content ) = h . request ( row [ 0 ] , ' HEAD ' )
except IOError as e :
2024-07-22 09:50:52 +00:00
print ( " link is no valid URL so remove item " )
2024-07-22 10:23:17 +00:00
print ( " error: " , e )
2024-07-22 09:50:52 +00:00
isLink = False
2024-07-22 10:56:38 +00:00
except httplib2 . error . RelativeURIError :
2024-08-08 06:50:43 +00:00
isLink = False
print ( " RelativeURIError: Not a valid link " )
#rm_cursor = connection.cursor()
#rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ?;""",(row[0],))
2024-07-22 09:50:52 +00:00
finally :
2024-07-22 10:23:17 +00:00
try :
resp
except NameError :
print ( " Not a valid link " )
2024-07-22 09:50:52 +00:00
rm_cursor = connection . cursor ( )
2024-08-08 06:50:43 +00:00
rm_itm = rm_cursor . execute ( f """ DELETE from jobs WHERE link = ? AND star != 1; """ , ( row [ 0 ] , ) )
2024-07-22 10:23:17 +00:00
else :
if resp . status > = 400 or isLink == False :
print ( " link is no more valid, remove item " )
rm_cursor = connection . cursor ( )
2024-08-19 10:55:56 +00:00
rm_itm = rm_cursor . execute ( " DELETE from jobs WHERE link = ? AND star != 1; " , ( row [ 0 ] , ) )
2024-07-22 10:23:17 +00:00
print ( " Deletion resultet in: " , rm_itm )
2024-07-22 10:56:38 +00:00
print ( " result of commit: " , connection . commit ( ) )
2024-07-26 10:46:36 +00:00
return 0