add force retry if first try fails, in cookie stealing

This commit is contained in:
ccppi 2024-08-23 13:01:20 +02:00
parent c35f43ce8a
commit fa98c36096
5 changed files with 16 additions and 22 deletions

View File

@ -3,7 +3,7 @@ import mmh3
import sys import sys
#import requests #import requests
import httplib2 import httplib2
DEBUG = True DEBUG = False
def log(*s): def log(*s):
if DEBUG: if DEBUG:

View File

@ -11,7 +11,7 @@ import sqlite3
import webbrowser import webbrowser
import mozilla import mozilla
DEBUG = True DEBUG = False
number = ['0','1','2','3','4','5','6','7','8','9'] number = ['0','1','2','3','4','5','6','7','8','9']

View File

@ -43,23 +43,14 @@ def login(entry):
#somehow getting the cookie maype? #somehow getting the cookie maype?
def solveCaptcha(session,resp): def solveCaptcha(session,resp):
found = 0
if "captcha" or "Enable JavaScript" in resp :
#soup = BeautifulSoup(resp,"html.parser")
#result = soup.find("iframe")
#while found==0:
# if "captcha" in resp:#result:
print("captcha link!! found:")
found=1
#else:
# result.find_next()
print("exit loop")
print("response:",resp) print("response:",resp)
#if found: if "captcha" or "Enable JavaScript" in resp :
#print("captchaurl:", result["src"]) print("captcha link!! found:")
#x = input("continue") return 1
#else: else:
# print("did not recognise a captcha") return 0
def checkBlockers(session,resp): def checkBlockers(session,resp):
print("response from login attempt",resp) print("response from login attempt",resp)
if resp: if resp:

View File

@ -29,7 +29,7 @@ def findDefaultProfile(path):
else: else:
return target return target
def getCookiesFromBrowser(url): def getCookiesFromBrowser(url,force=False):
DBFILE = "../db/sqlite3.db" DBFILE = "../db/sqlite3.db"
if os.name == 'posix': if os.name == 'posix':
homePath = os.path.expanduser('~') homePath = os.path.expanduser('~')
@ -63,7 +63,7 @@ def getCookiesFromBrowser(url):
cookie += ";" cookie += ";"
print("Cookies:",cookie) print("Cookies:",cookie)
if cookie == '': if cookie == '' and force == False:
if os.name == 'posix': if os.name == 'posix':
webbrowser.register("firefox",None,webbrowser.BackgroundBrowser("firefox")) webbrowser.register("firefox",None,webbrowser.BackgroundBrowser("firefox"))
webbrowser.get('firefox').open(url) webbrowser.get('firefox').open(url)

View File

@ -13,7 +13,7 @@ def scrap_indeed_com(url,entry,session):
session.headers = { session.headers = {
"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0", "User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0",
"Referer" : "https://ch.indeed.com/jobs?&from=searchOnHP", "Referer" : "https://ch.indeed.com/jobs?&from=searchOnHP",
"Cookie" : moz_cookies# """CTK=1i5q619l6jvkj800; indeed_rcc="LV:CTK:RQ"; CSRF=aEb4JWpfbYPy3j3g2rsUPDSixXqBe1Oe; INDEED_CSRF_TOKEN=4p83HqsTMan9QrVZun2Q0wrFeCoGm9mG; LV="LA=1724238852:LV=1724234376:CV=1724238852:TS=1724234376"; _cfuvid=27ptQm94DDaFeIjNGSNxW3g9GyDAJExtQz_RNr0jvE0-1724238843162-0.0.1.1-604800000; JSESSIONID=F196631331EF16D28C0E00AC7A43CB10; OptanonConsent=isGpcEnabled=1&datestamp=Wed+Aug+21+2024+13%3A14%3A47+GMT%2B0200+(Central+European+Summer+Time)&version=202210.1.0&isIABGlobal=false&hosts=&consentId=b0f6c692-930d-4929-9251-9a4f7bc72f61&interactionCount=1&landingPath=NotLandingPage&groups=C0001%3A1%2CC0002%3A0%2CC0003%3A0%2CC0004%3A0%2CC0007%3A0&AwaitingReconsent=false; _ga_LYNT3BTHPG=GS1.1.1724238849.2.1.1724238908.0.0.454081609; _ga=GA1.1.1356051481.1724234379; SURF=WCl9mMSuWXP2jp3GlLMyXzkQkAdKDg7W; FPID=FPID2.2.Dd22VS9g0Vfjh5dQoT9s%2Bws7zDmpmQlIzsYP9ZLW8kg%3D.1724234379; FPLC=Qmy8DxSR81EJxewKgZ7RfgP%2BdXEXWWU4RKVUs2Pn1vEIp%2Fu2Upaqz5%2Blgf05XLqfdY7S4qGRwWAbQqAbKQZb%2FBWQxZwpmvOzw%2Bhgpkfvj320PLIwamECv9iYH%2Bx%2FrQ%3D%3D; RQ="q=quereinsteiger&l=&ts=1724238933002&rbsalmin=0&rbsalmax=0:q=python+qt&l=&ts=1724234491003&rbsalmin=0&rbsalmax=0"; __cf_bm=X3BsfEnAGodB.ELxHVfYTAYd4K4n3TUbHVV7OyKMjBg-1724238843-1.0.1.1-4QMaUgbvnumBKmzwOcY2o0Taikgpvn72OoTXG_ZtU8q3qOCuf06riyYIJlXD.zsd7JxmZ_VdN1S9cCbGwXid6w; gonetap=closed; SHARED_INDEED_CSRF_TOKEN=4p83HqsTMan9QrVZun2Q0wrFeCoGm9mG""" "Cookie" : moz_cookies
} }
jobs = [] jobs = []
log("in scrap jobs,url",url) log("in scrap jobs,url",url)
@ -24,7 +24,10 @@ def scrap_indeed_com(url,entry,session):
else: else:
page = session.get(url) page = session.get(url)
log(page) log(page)
solveCaptcha(session,page) if solveCaptcha(session,page) == 1:
print("Cookie stealing unsuccesfull retry with force")
moz_cookies = mozilla.getCookiesFromBrowser(url,force=True)
soup = BeautifulSoup(page.content,"html.parser") soup = BeautifulSoup(page.content,"html.parser")
#print(soup.prettify()) #print(soup.prettify())