add force retry if first try fails, in cookie stealing
This commit is contained in:
parent
c35f43ce8a
commit
fa98c36096
@ -3,7 +3,7 @@ import mmh3
|
|||||||
import sys
|
import sys
|
||||||
#import requests
|
#import requests
|
||||||
import httplib2
|
import httplib2
|
||||||
DEBUG = True
|
DEBUG = False
|
||||||
|
|
||||||
def log(*s):
|
def log(*s):
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
|
@ -11,7 +11,7 @@ import sqlite3
|
|||||||
import webbrowser
|
import webbrowser
|
||||||
import mozilla
|
import mozilla
|
||||||
|
|
||||||
DEBUG = True
|
DEBUG = False
|
||||||
number = ['0','1','2','3','4','5','6','7','8','9']
|
number = ['0','1','2','3','4','5','6','7','8','9']
|
||||||
|
|
||||||
|
|
||||||
|
23
lib/login.py
23
lib/login.py
@ -43,23 +43,14 @@ def login(entry):
|
|||||||
#somehow getting the cookie maype?
|
#somehow getting the cookie maype?
|
||||||
|
|
||||||
def solveCaptcha(session,resp):
|
def solveCaptcha(session,resp):
|
||||||
found = 0
|
|
||||||
if "captcha" or "Enable JavaScript" in resp :
|
|
||||||
#soup = BeautifulSoup(resp,"html.parser")
|
|
||||||
#result = soup.find("iframe")
|
|
||||||
#while found==0:
|
|
||||||
# if "captcha" in resp:#result:
|
|
||||||
print("captcha link!! found:")
|
|
||||||
found=1
|
|
||||||
#else:
|
|
||||||
# result.find_next()
|
|
||||||
print("exit loop")
|
|
||||||
print("response:",resp)
|
print("response:",resp)
|
||||||
#if found:
|
if "captcha" or "Enable JavaScript" in resp :
|
||||||
#print("captchaurl:", result["src"])
|
print("captcha link!! found:")
|
||||||
#x = input("continue")
|
return 1
|
||||||
#else:
|
else:
|
||||||
# print("did not recognise a captcha")
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def checkBlockers(session,resp):
|
def checkBlockers(session,resp):
|
||||||
print("response from login attempt",resp)
|
print("response from login attempt",resp)
|
||||||
if resp:
|
if resp:
|
||||||
|
@ -29,7 +29,7 @@ def findDefaultProfile(path):
|
|||||||
else:
|
else:
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def getCookiesFromBrowser(url):
|
def getCookiesFromBrowser(url,force=False):
|
||||||
DBFILE = "../db/sqlite3.db"
|
DBFILE = "../db/sqlite3.db"
|
||||||
if os.name == 'posix':
|
if os.name == 'posix':
|
||||||
homePath = os.path.expanduser('~')
|
homePath = os.path.expanduser('~')
|
||||||
@ -63,7 +63,7 @@ def getCookiesFromBrowser(url):
|
|||||||
cookie += ";"
|
cookie += ";"
|
||||||
|
|
||||||
print("Cookies:",cookie)
|
print("Cookies:",cookie)
|
||||||
if cookie == '':
|
if cookie == '' and force == False:
|
||||||
if os.name == 'posix':
|
if os.name == 'posix':
|
||||||
webbrowser.register("firefox",None,webbrowser.BackgroundBrowser("firefox"))
|
webbrowser.register("firefox",None,webbrowser.BackgroundBrowser("firefox"))
|
||||||
webbrowser.get('firefox').open(url)
|
webbrowser.get('firefox').open(url)
|
||||||
|
@ -13,7 +13,7 @@ def scrap_indeed_com(url,entry,session):
|
|||||||
session.headers = {
|
session.headers = {
|
||||||
"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0",
|
"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0",
|
||||||
"Referer" : "https://ch.indeed.com/jobs?&from=searchOnHP",
|
"Referer" : "https://ch.indeed.com/jobs?&from=searchOnHP",
|
||||||
"Cookie" : moz_cookies# """CTK=1i5q619l6jvkj800; indeed_rcc="LV:CTK:RQ"; CSRF=aEb4JWpfbYPy3j3g2rsUPDSixXqBe1Oe; INDEED_CSRF_TOKEN=4p83HqsTMan9QrVZun2Q0wrFeCoGm9mG; LV="LA=1724238852:LV=1724234376:CV=1724238852:TS=1724234376"; _cfuvid=27ptQm94DDaFeIjNGSNxW3g9GyDAJExtQz_RNr0jvE0-1724238843162-0.0.1.1-604800000; JSESSIONID=F196631331EF16D28C0E00AC7A43CB10; OptanonConsent=isGpcEnabled=1&datestamp=Wed+Aug+21+2024+13%3A14%3A47+GMT%2B0200+(Central+European+Summer+Time)&version=202210.1.0&isIABGlobal=false&hosts=&consentId=b0f6c692-930d-4929-9251-9a4f7bc72f61&interactionCount=1&landingPath=NotLandingPage&groups=C0001%3A1%2CC0002%3A0%2CC0003%3A0%2CC0004%3A0%2CC0007%3A0&AwaitingReconsent=false; _ga_LYNT3BTHPG=GS1.1.1724238849.2.1.1724238908.0.0.454081609; _ga=GA1.1.1356051481.1724234379; SURF=WCl9mMSuWXP2jp3GlLMyXzkQkAdKDg7W; FPID=FPID2.2.Dd22VS9g0Vfjh5dQoT9s%2Bws7zDmpmQlIzsYP9ZLW8kg%3D.1724234379; FPLC=Qmy8DxSR81EJxewKgZ7RfgP%2BdXEXWWU4RKVUs2Pn1vEIp%2Fu2Upaqz5%2Blgf05XLqfdY7S4qGRwWAbQqAbKQZb%2FBWQxZwpmvOzw%2Bhgpkfvj320PLIwamECv9iYH%2Bx%2FrQ%3D%3D; RQ="q=quereinsteiger&l=&ts=1724238933002&rbsalmin=0&rbsalmax=0:q=python+qt&l=&ts=1724234491003&rbsalmin=0&rbsalmax=0"; __cf_bm=X3BsfEnAGodB.ELxHVfYTAYd4K4n3TUbHVV7OyKMjBg-1724238843-1.0.1.1-4QMaUgbvnumBKmzwOcY2o0Taikgpvn72OoTXG_ZtU8q3qOCuf06riyYIJlXD.zsd7JxmZ_VdN1S9cCbGwXid6w; gonetap=closed; SHARED_INDEED_CSRF_TOKEN=4p83HqsTMan9QrVZun2Q0wrFeCoGm9mG"""
|
"Cookie" : moz_cookies
|
||||||
}
|
}
|
||||||
jobs = []
|
jobs = []
|
||||||
log("in scrap jobs,url",url)
|
log("in scrap jobs,url",url)
|
||||||
@ -24,7 +24,10 @@ def scrap_indeed_com(url,entry,session):
|
|||||||
else:
|
else:
|
||||||
page = session.get(url)
|
page = session.get(url)
|
||||||
log(page)
|
log(page)
|
||||||
solveCaptcha(session,page)
|
if solveCaptcha(session,page) == 1:
|
||||||
|
print("Cookie stealing unsuccesfull retry with force")
|
||||||
|
moz_cookies = mozilla.getCookiesFromBrowser(url,force=True)
|
||||||
|
|
||||||
soup = BeautifulSoup(page.content,"html.parser")
|
soup = BeautifulSoup(page.content,"html.parser")
|
||||||
#print(soup.prettify())
|
#print(soup.prettify())
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user