From a2b97e9f21d56c053a963adc64958588b1e6d550 Mon Sep 17 00:00:00 2001 From: ccppi Date: Thu, 22 Aug 2024 11:59:40 +0200 Subject: [PATCH] helpers.py --- lib/helpers.py | 41 +++++++++++++++++++++++++++-------------- lib/scrap_jobs.py | 2 +- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/lib/helpers.py b/lib/helpers.py index 272d9f4..8492f36 100644 --- a/lib/helpers.py +++ b/lib/helpers.py @@ -8,6 +8,8 @@ from dateconverter import * from datetime import datetime import os import sqlite3 +import webbrowser +from time import sleep DEBUG = True number = ['0','1','2','3','4','5','6','7','8','9'] @@ -15,6 +17,7 @@ homePath = os.path.expanduser('~') cookiePath = homePath + "/.mozilla/firefox/imibizoh.default/cookies.sqlite" tmpPath = "/tmp/cookies.sqlite" DBFILE = "../db/sqlite3.db" +winFirefoxPath = f"""C:\Program Files\Mozilla Firefox\firefox.exe""" def log(*s): if DEBUG: print(s) @@ -225,23 +228,33 @@ def indeedExtractDays(datestr): return "NOTFound" def getCookiesFromBrowser(url): #workaround for loked database - shutil.copyfile(cookiePath,tmpPath) + tries=0 cookie = '' rows = [0] + while(cookie == '' and tries < 2): + tries+=1; + shutil.copyfile(cookiePath,tmpPath) + with sqlite3.connect(tmpPath) as connection: + cmd_read_cookies = f"""SELECT name,value FROM moz_cookies WHERE host like ?;""" + print(cmd_read_cookies) + cursor = connection.cursor() + cursor.execute(cmd_read_cookies,(urlToDomain(url),)) + while len(rows)!=0: + rows = cursor.fetchmany(25) + for row in rows: + print("row:",row) + cookie = cookie + row[0] + '=' + row[1] + cookie += ";" - with sqlite3.connect(tmpPath) as connection: - cmd_read_cookies = f"""SELECT name,value FROM moz_cookies WHERE host like ?;""" - print(cmd_read_cookies) - cursor = connection.cursor() - cursor.execute(cmd_read_cookies,(urlToDomain(url),)) - while len(rows)!=0: - rows = cursor.fetchmany(25) - for row in rows: - print("row:",row) - cookie = cookie + row[0] + '=' + row[1] - cookie += ";" - - print("Cookies:",cookie) + print("Cookies:",cookie) + if cookie == '': + if os.name == 'posix': + webbrowser.register("firefox",None,webbrowser.BackgroundBrowser("firefox")) + webbrowser.get('firefox').open(url) + elif os.name == 'nt': + webbrowser.register("firefox",None,webbrowser.BackgroundBrowser(winFirefoxPath)) + webbrowser.get('firefox').open(url) + sleep(1) return cookie #access cookies from firefox: #copy (because locked): cp .mozilla/firefox/imibizoh.default/cookies.sqlite cookies.sqlite diff --git a/lib/scrap_jobs.py b/lib/scrap_jobs.py index 24cb3fe..451a7f5 100644 --- a/lib/scrap_jobs.py +++ b/lib/scrap_jobs.py @@ -25,7 +25,7 @@ def scrap_indeed_com(url,entry,session): log(page) solveCaptcha(session,page) soup = BeautifulSoup(page.content,"html.parser") - print(soup.prettify()) + #print(soup.prettify()) results = soup.find_all("li",class_= 'css-5lfssm eu4oa1w0') #top level list element