change user agent
This commit is contained in:
parent
45237337df
commit
1bf8198c70
@ -150,7 +150,7 @@ def isStillValid(file,skiprows):
|
|||||||
if resp.status >= 400 or isLink == False:
|
if resp.status >= 400 or isLink == False:
|
||||||
print("link is no more valid, remove item")
|
print("link is no more valid, remove item")
|
||||||
rm_cursor = connection.cursor()
|
rm_cursor = connection.cursor()
|
||||||
rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ? AND star != 1;""",(row[0],))
|
rm_itm = rm_cursor.execute("DELETE from jobs WHERE link = ? AND star != 1;",(row[0],))
|
||||||
print ("Deletion resultet in: ", rm_itm)
|
print ("Deletion resultet in: ", rm_itm)
|
||||||
print("result of commit: ", connection.commit())
|
print("result of commit: ", connection.commit())
|
||||||
return 0
|
return 0
|
||||||
|
19
lib/login.py
19
lib/login.py
@ -6,17 +6,18 @@ def login(entry):
|
|||||||
loginurl = entry.loginurl
|
loginurl = entry.loginurl
|
||||||
scrapurl = entry.scrapurl
|
scrapurl = entry.scrapurl
|
||||||
with requests.Session() as session:
|
with requests.Session() as session:
|
||||||
headers = {
|
session.headers = {
|
||||||
"Host": "www.jobagent.ch",
|
"Host": "www.jobagent.ch",
|
||||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:124.0) Gecko/20100101 Firefox/124.0",
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:124.0) Gecko/20100101 Firefox/124.0",
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||||
"Accept-Language": "en-US,en;q=0.5",
|
"Accept-Language": "en-US,en;q=0.5",
|
||||||
"Accept-Encoding": "gzip, deflate, br",
|
"Accept-Encoding": "gzip, deflate, br",
|
||||||
"Content-Type": "application/x-www-form-urlencoded",
|
# "Content-Type": "application/x-www-form-urlencoded",
|
||||||
"Content-Length": "58",
|
# "Content-Length": "58",
|
||||||
"Origin": "https://www.jobagent.ch",
|
"Origin": "https://www.jobagent.ch",
|
||||||
# "Connection": "keep-alive",
|
"Connection": "keep-alive",
|
||||||
"Referer": "https://www.jobagent.ch/user/login",
|
"Referer": "https://jobagent.ch",
|
||||||
|
"Cookie": "datadome=BbGio7V9YBqYELb~B2a7DqE9Zr5EWb315OypbcxGQOFKbhkJR48etFSooYwtnKF2sK5leCh7Q_0o6W5YMwl0qEH~Fw3BU0m~48MgrkuaxO3Z1s5MTqCnTZVW3PcQv7KM; _uc=ad_storage=granted:analytics_storage=granted; _gcl_au=1.1.1328234550.1724056973.1502078804.1724062620.1724062680; _ga=GA1.1.1840632117.1724056971; _ga_T0E2JNNRW2=GS1.1.1724062555.3.1.1724062683.0.1.2098134382; JSESSIONID=AB8CC398C2576A6A87C53A74CCD8F7F5; _pin_unauth=dWlkPU56Y3pabU14WW1JdFptTTFNeTAwTkdFMkxUbGlZV0V0TWprNVkyTXpZemd4WldNNA; _fbp=fb.1.1724056975123.543612652217076856; _clck=16bp9by%7C2%7Cfog%7C0%7C1692; _clsk=1o7y6b9%7C1724062683361%7C9%7C1%7Cu.clarity.ms%2Fcollect; _rm=ai53eXNzJTQwa29sYWJub3cuY2g6MTcyNjY1NDY4MTA0NDpTSEEyNTY6ZGRkMmZhYTRjZWY3MWZkZDU1M2VlMTI4ZjYzOGY1NmFiYmRkNjNiMmI3ZjE1NWRhNmU3YzcwZWU1NjQ2Mjc0Mw; _uetsid=0737af805e0711efbe7bdd027b00b063; _uetvid=0737b3005e0711efb7c7035382896421",
|
||||||
# "Upgrade-Insecure-Requests": "1",
|
# "Upgrade-Insecure-Requests": "1",
|
||||||
# "Sec-Fetch-Dest": "document",
|
# "Sec-Fetch-Dest": "document",
|
||||||
# "Sec-Fetch-Mode": "navigate",
|
# "Sec-Fetch-Mode": "navigate",
|
||||||
@ -24,15 +25,15 @@ def login(entry):
|
|||||||
# "DNT": "1",
|
# "DNT": "1",
|
||||||
# "Sec-GPC": "1"
|
# "Sec-GPC": "1"
|
||||||
}
|
}
|
||||||
|
|
||||||
r = session.get(loginurl)
|
r = session.get(loginurl)
|
||||||
payload = {"redirectUrl":"","email":user,"password":pw}
|
payload = {"redirectUrl":"","email":user,"password":pw}
|
||||||
resp = session.post(loginurl,data=payload,headers=headers)
|
resp = session.post(loginurl,data=payload)
|
||||||
print(payload)
|
print(payload)
|
||||||
print("response from login attempt",resp.url)
|
print("response from login attempt",resp)
|
||||||
if resp.url == 'https://www.jobagent.ch/user/login?error':
|
if resp.url == 'https://www.jobagent.ch/user/login?error':
|
||||||
print("Error on login")
|
print("Error on login")
|
||||||
return -1
|
return -1
|
||||||
r = session.get(scrapurl)
|
r = session.get(scrapurl)
|
||||||
|
print(session.headers)
|
||||||
|
print("response:",r)
|
||||||
return session
|
return session
|
||||||
|
@ -43,6 +43,9 @@ def scrap_indeed_com(url,entry,session):
|
|||||||
return arrayToClass(ar_title,ar_company,ar_location,ar_date,ar_link,tag)
|
return arrayToClass(ar_title,ar_company,ar_location,ar_date,ar_link,tag)
|
||||||
|
|
||||||
def scrap_jobs(url,entry,session):
|
def scrap_jobs(url,entry,session):
|
||||||
|
session.headers = {
|
||||||
|
"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0"
|
||||||
|
}
|
||||||
jobs = []
|
jobs = []
|
||||||
log("in scrap jobs,url",url)
|
log("in scrap jobs,url",url)
|
||||||
if(session == 0 or session == -1):
|
if(session == 0 or session == -1):
|
||||||
@ -157,17 +160,19 @@ def next_url_jobagent(base_url,session,c):#depreacted will be removed in the fut
|
|||||||
|
|
||||||
def scrap_jobagent(url,entry,session):
|
def scrap_jobagent(url,entry,session):
|
||||||
jobs = []
|
jobs = []
|
||||||
log("in scrap jobs,url",url)
|
log("[scrap_jobagent],url",url)
|
||||||
if(session == 0 or session == -1):
|
if(session == 0 or session == -1):
|
||||||
|
log("session not sucessful transmitted ",session)
|
||||||
with requests.Session() as session:
|
with requests.Session() as session:
|
||||||
page = session.get(url)
|
page = session.get(url)
|
||||||
log(page)
|
log(page)
|
||||||
else:
|
else:
|
||||||
page = session.get(url)
|
page = session.get(url)
|
||||||
log(page)
|
page = session.get(url)
|
||||||
|
log("[scrap_jobagent]page:",page)
|
||||||
soup = BeautifulSoup(page.content,"html.parser")
|
soup = BeautifulSoup(page.content,"html.parser")
|
||||||
#print(soup.prettify())
|
print(soup.prettify())
|
||||||
|
print(session.headers)
|
||||||
results = soup.find_all("li",class_="item")
|
results = soup.find_all("li",class_="item")
|
||||||
if not results:
|
if not results:
|
||||||
print("no li items found")
|
print("no li items found")
|
||||||
|
@ -78,20 +78,30 @@ def login_loop(config_file,gui,worker):
|
|||||||
while (ret != 0):
|
while (ret != 0):
|
||||||
if gui:
|
if gui:
|
||||||
worker.dialog_rejected = False
|
worker.dialog_rejected = False
|
||||||
ret = entry2 = config.readConfig(config_file,gui,worker)
|
ret = entry2 = config.readConfig(config_file,gui,worker)
|
||||||
print(entry2)
|
print(entry2)
|
||||||
if(ret != 0 and ret_login != 1):
|
if(ret != 0 and ret_login != 1):
|
||||||
if(entry2.loginurl != 'NONE'):
|
if(entry2.loginurl != 'NONE'):
|
||||||
session = -1
|
session = -1
|
||||||
log("[pre while] worker.dialog_rejected = ",worker.dialog_rejected)
|
log("[pre while] worker.dialog_rejected = ",worker.dialog_rejected)
|
||||||
while (session == -1 and worker.dialog_rejected == False):
|
worker.dialog_rejected = False
|
||||||
log("worker.dialog_rejected = ",worker.dialog_rejected)
|
while (session == -1 and worker.dialog_rejected == False):
|
||||||
session = login(entry2)
|
log("worker.dialog_rejected = ",worker.dialog_rejected)
|
||||||
ret_login = entry2.input_pw(gui,entry2.user,worker)
|
session = login(entry2)
|
||||||
if gui:
|
ret_login = entry2.input_pw(gui,entry2.user,worker)
|
||||||
if worker.dialog_rejected == False:
|
if worker.dialog_rejected == False:
|
||||||
choose_scraper(entry2,session)
|
choose_scraper(entry2,session)
|
||||||
if not gui:
|
if not gui:
|
||||||
|
ret = entry2 = config.readConfig(config_file,gui,worker)
|
||||||
|
#print(entry2)
|
||||||
|
if(ret != 0 and ret_login != 1):
|
||||||
|
if(entry2.loginurl != 'NONE'):
|
||||||
|
session = -1
|
||||||
|
while (session == -1):
|
||||||
|
session = login(entry2)
|
||||||
|
if session == -1:
|
||||||
|
ret_login = entry2.input_pw(gui,entry2.user,worker)
|
||||||
|
log("[login_loop] session:",session)
|
||||||
choose_scraper(entry2,session)
|
choose_scraper(entry2,session)
|
||||||
|
|
||||||
def runner(entry,session,scrap_func,next_url_func):
|
def runner(entry,session,scrap_func,next_url_func):
|
||||||
|
Loading…
Reference in New Issue
Block a user