job-scrapper/lib/login.py

70 lines
3.2 KiB
Python
Raw Normal View History

2024-06-13 09:14:04 +00:00
import requests
2024-08-22 09:12:11 +00:00
from requests_html import HTMLSession
2024-06-13 09:14:04 +00:00
from helpers import *
def login(entry):
user = entry.user
pw = entry.pw
loginurl = entry.loginurl
scrapurl = entry.scrapurl
with requests.Session() as session:
2024-08-19 10:55:56 +00:00
session.headers = {
2024-06-13 09:14:04 +00:00
"Host": "www.jobagent.ch",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:124.0) Gecko/20100101 Firefox/124.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
2024-08-19 10:55:56 +00:00
# "Content-Type": "application/x-www-form-urlencoded",
# "Content-Length": "58",
2024-06-13 09:14:04 +00:00
"Origin": "https://www.jobagent.ch",
2024-08-19 10:55:56 +00:00
"Connection": "keep-alive",
"Referer": "https://jobagent.ch",
2024-08-22 09:12:11 +00:00
#"Cookie": "datadome=BbGio7V9YBqYELb~B2a7DqE9Zr5EWb315OypbcxGQOFKbhkJR48etFSooYwtnKF2sK5leCh7Q_0o6W5YMwl0qEH~Fw3BU0m~48MgrkuaxO3Z1s5MTqCnTZVW3PcQv7KM; _uc=ad_storage=granted:analytics_storage=granted; _gcl_au=1.1.1328234550.1724056973.1502078804.1724062620.1724062680; _ga=GA1.1.1840632117.1724056971; _ga_T0E2JNNRW2=GS1.1.1724062555.3.1.1724062683.0.1.2098134382; JSESSIONID=AB8CC398C2576A6A87C53A74CCD8F7F5; _pin_unauth=dWlkPU56Y3pabU14WW1JdFptTTFNeTAwTkdFMkxUbGlZV0V0TWprNVkyTXpZemd4WldNNA; _fbp=fb.1.1724056975123.543612652217076856; _clck=16bp9by%7C2%7Cfog%7C0%7C1692; _clsk=1o7y6b9%7C1724062683361%7C9%7C1%7Cu.clarity.ms%2Fcollect; _rm=ai53eXNzJTQwa29sYWJub3cuY2g6MTcyNjY1NDY4MTA0NDpTSEEyNTY6ZGRkMmZhYTRjZWY3MWZkZDU1M2VlMTI4ZjYzOGY1NmFiYmRkNjNiMmI3ZjE1NWRhNmU3YzcwZWU1NjQ2Mjc0Mw; _uetsid=0737af805e0711efbe7bdd027b00b063; _uetvid=0737b3005e0711efb7c7035382896421",
2024-06-13 09:14:04 +00:00
# "Upgrade-Insecure-Requests": "1",
# "Sec-Fetch-Dest": "document",
# "Sec-Fetch-Mode": "navigate",
#"Sec-Fetch-Site": "same-origin",
# "DNT": "1",
# "Sec-GPC": "1"
}
r = session.get(loginurl)
payload = {"redirectUrl":"","email":user,"password":pw}
2024-08-19 10:55:56 +00:00
resp = session.post(loginurl,data=payload)
2024-06-13 09:14:04 +00:00
print(payload)
2024-08-22 09:12:11 +00:00
checkBlockers(session,resp)
2024-06-13 09:14:04 +00:00
r = session.get(scrapurl)
2024-08-19 10:55:56 +00:00
print(session.headers)
print("response:",r)
2024-06-13 09:14:04 +00:00
return session
2024-08-22 09:12:11 +00:00
#solveCaptcha when :
#string "captcha" is in response
#search for <iframe
#get src tag
#open a webbrowser to solve the captcha
#somehow getting the cookie maype?
def solveCaptcha(session,resp):
found = 0
if "captcha" or "Enable JavaScript" in resp :
#soup = BeautifulSoup(resp,"html.parser")
#result = soup.find("iframe")
#while found==0:
# if "captcha" in resp:#result:
print("captcha link!! found:")
found=1
#else:
# result.find_next()
print("exit loop")
print("response:",resp)
#if found:
#print("captchaurl:", result["src"])
#x = input("continue")
#else:
# print("did not recognise a captcha")
def checkBlockers(session,resp):
print("response from login attempt",resp)
if resp:
if resp.url == 'https://www.jobagent.ch/user/login?error':
print("Error on login")
return -1
solveCaptcha(session,resp)