restructing

This commit is contained in:
ccppi 2024-08-23 12:13:15 +02:00
parent a2b97e9f21
commit 2d9b7afa9d
4 changed files with 120 additions and 80 deletions

View File

@ -9,15 +9,13 @@ from datetime import datetime
import os
import sqlite3
import webbrowser
from time import sleep
import mozilla
DEBUG = True
number = ['0','1','2','3','4','5','6','7','8','9']
homePath = os.path.expanduser('~')
cookiePath = homePath + "/.mozilla/firefox/imibizoh.default/cookies.sqlite"
tmpPath = "/tmp/cookies.sqlite"
DBFILE = "../db/sqlite3.db"
winFirefoxPath = f"""C:\Program Files\Mozilla Firefox\firefox.exe"""
def log(*s):
if DEBUG:
print(s)
@ -226,77 +224,4 @@ def indeedExtractDays(datestr):
#print("int:",cleannumint,"today:",today,"cleandate:",datetime.fromtimestamp(cleandate).strftime('%Y-%m-%d'))
return datetime.fromtimestamp(cleandate).strftime('%Y-%m-%d')
return "NOTFound"
def getCookiesFromBrowser(url):
#workaround for loked database
tries=0
cookie = ''
rows = [0]
while(cookie == '' and tries < 2):
tries+=1;
shutil.copyfile(cookiePath,tmpPath)
with sqlite3.connect(tmpPath) as connection:
cmd_read_cookies = f"""SELECT name,value FROM moz_cookies WHERE host like ?;"""
print(cmd_read_cookies)
cursor = connection.cursor()
cursor.execute(cmd_read_cookies,(urlToDomain(url),))
while len(rows)!=0:
rows = cursor.fetchmany(25)
for row in rows:
print("row:",row)
cookie = cookie + row[0] + '=' + row[1]
cookie += ";"
print("Cookies:",cookie)
if cookie == '':
if os.name == 'posix':
webbrowser.register("firefox",None,webbrowser.BackgroundBrowser("firefox"))
webbrowser.get('firefox').open(url)
elif os.name == 'nt':
webbrowser.register("firefox",None,webbrowser.BackgroundBrowser(winFirefoxPath))
webbrowser.get('firefox').open(url)
sleep(1)
return cookie
#access cookies from firefox:
#copy (because locked): cp .mozilla/firefox/imibizoh.default/cookies.sqlite cookies.sqlite
#Select value from moz_cookies where host like '%indeed%'
def urlToDomain(url):
pos = patternSearch(url,"https://")
urlCut = dropBeforePos(url,pos)
posDot = skipAfterChar(urlCut,'.') - 1
urlCut = dropBeforePos(urlCut,posDot)
posDot = skipAfterChar(urlCut,'/')
urlCut = dropAfterPos(urlCut,posDot)
print("url after cut dot:",urlCut)
return urlCut
def patternSearch(url,pattern):
x = 0
for a,i in enumerate(url):
print("i:",i)
if i == pattern[x]:
if x<len(pattern)-1:
x = x + 1
elif x==len(pattern)-1:
print("FULL PATTERN FOUND at pos :",a)
break
else:
x = 0
return a
def skipAfterChar(aString,char):
for a,i in enumerate(aString):
if i == char:
break
return a
def dropBeforePos(aString,pos):
aString2=''
pos+=1
if pos < len(aString):
for i in range(pos,len(aString)):
aString2 += aString[i]
return aString2
def dropAfterPos(aString,pos):
aString2=''
if pos < len(aString):
for i in range(0,pos):
aString2 += aString[i]
return aString2

42
lib/manipulateString.py Normal file
View File

@ -0,0 +1,42 @@
def urlToDomain(url):
pos = patternSearch(url,"https://")
urlCut = dropBeforePos(url,pos)
posDot = skipAfterChar(urlCut,'.') - 1
urlCut = dropBeforePos(urlCut,posDot)
posDot = skipAfterChar(urlCut,'/')
urlCut = dropAfterPos(urlCut,posDot)
print("url after cut dot:",urlCut)
return urlCut
def patternSearch(url,pattern):
x = 0
for a,i in enumerate(url):
print("i:",i)
if i == pattern[x]:
if x<len(pattern)-1:
x = x + 1
elif x==len(pattern)-1:
print("FULL PATTERN FOUND at pos :",a)
break
else:
x = 0
return a
def skipAfterChar(aString,char):
for a,i in enumerate(aString):
if i == char:
break
return a
def dropBeforePos(aString,pos):
aString2=''
pos+=1
if pos < len(aString):
for i in range(pos,len(aString)):
aString2 += aString[i]
return aString2
def dropAfterPos(aString,pos):
aString2=''
if pos < len(aString):
for i in range(0,pos):
aString2 += aString[i]
return aString2

72
lib/mozilla.py Normal file
View File

@ -0,0 +1,72 @@
#access cookies from firefox:
#copy (because locked): cp .mozilla/firefox/imibizoh.default/cookies.sqlite cookies.sqlite
#Select value from moz_cookies where host like '%indeed%'
import webbrowser
import tempfile
import os
import sqlite3
import shutil
from time import sleep
import manipulateString as ms
DEBUG = True
def log(*s):
if DEBUG:
print(s)
def findDefaultProfile(path):
target = ''
dirlist = os.listdir(path)
for directory in dirlist:
posDot = ms.skipAfterChar(directory,'.')
stringParse = ms.dropBeforePos(directory,posDot)
log(stringParse)
if stringParse == "default":
target = directory
break;
if target == '':
return -1
else:
return target
def getCookiesFromBrowser(url):
homePath = os.path.expanduser('~')
cookiePath = homePath + "/.mozilla/firefox/" + findDefaultProfile(homePath + "/.mozilla/firefox/") + "/cookies.sqlite"
tmpPath = "/tmp/cookies.sqlite"
DBFILE = "../db/sqlite3.db"
winFirefoxPath = f"""C:\\Program Files\\Mozilla Firefox\\firefox.exe"""
wintTmpPath = tempfile.gettempdir()
#workaround for loked database
tries=0
cookie = ''
rows = [0]
while(cookie == '' and tries < 2):
tries+=1
if os.name == 'posix':
shutil.copyfile(cookiePath,tmpPath)
elif os.name == 'nt':
appdata = os.getenv('APPDATA')
winCookiePath = appdata + "\\Mozilla\\Firefox\\Profiles\\" + profile + "cookies.sqlite"
shutil.copyfile(winCookiePath,wintTmpPath)
with sqlite3.connect(tmpPath) as connection:
cmd_read_cookies = f"""SELECT name,value FROM moz_cookies WHERE host like ?;"""
print(cmd_read_cookies)
cursor = connection.cursor()
cursor.execute(cmd_read_cookies,(ms.urlToDomain(url),))
while len(rows)!=0:
rows = cursor.fetchmany(25)
for row in rows:
print("row:",row)
cookie = cookie + row[0] + '=' + row[1]
cookie += ";"
print("Cookies:",cookie)
if cookie == '':
if os.name == 'posix':
webbrowser.register("firefox",None,webbrowser.BackgroundBrowser("firefox"))
webbrowser.get('firefox').open(url)
elif os.name == 'nt':
webbrowser.register("firefox",None,webbrowser.BackgroundBrowser(winFirefoxPath))
webbrowser.get('firefox').open(url)
sleep(1)
return cookie

View File

@ -1,5 +1,6 @@
from helpers import *
from login import solveCaptcha
import mozilla
DEBUG = True
def log(*s):
@ -7,7 +8,7 @@ def log(*s):
print(s)
def scrap_indeed_com(url,entry,session):
moz_cookies = getCookiesFromBrowser(url)
moz_cookies = mozilla.getCookiesFromBrowser(url)
print("[scrap]cookies:", moz_cookies)
session.headers = {
"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0",