From e7f634d19aa5c086593c095fe44f8321b09eff01 Mon Sep 17 00:00:00 2001 From: ccppi Date: Mon, 17 Jun 2024 10:22:28 +0200 Subject: [PATCH] logic error when pubdate not found --- lib/conf | 4 ++-- lib/helpers.py | 12 ++++++------ lib/scrap_jobs.py | 6 +++++- lib/sysparse.py | 11 +++++++++-- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/lib/conf b/lib/conf index 1bcddef..f3928a3 100644 --- a/lib/conf +++ b/lib/conf @@ -1,6 +1,6 @@ [jobagent.ch] -USER = test@gmx.ch +USER = j.wyss@kolabnow.ch PW = ASK LOGINURL = https://www.jobagent.ch/user/login SCRAPURL = https://www.jobagent.ch/search?terms=Automatiker&lra=0&as=0 @@ -30,7 +30,7 @@ TAG = Informatiker,Linux [jobagent.ch-2] -USER = test@gmx.ch +USER = j.wyss@kolabnow.ch PW = ASK LOGINURL = https://www.jobagent.ch/user/login SCRAPURL = https://www.jobagent.ch/search?terms=Informatiker&lra=0&as=0 diff --git a/lib/helpers.py b/lib/helpers.py index 68014ee..30e4a25 100644 --- a/lib/helpers.py +++ b/lib/helpers.py @@ -5,7 +5,7 @@ from enum import Enum import re from dateconverter import * from datetime import datetime -DEBUG = False +DEBUG = True def log(*s): if DEBUG: @@ -51,7 +51,6 @@ class job(): self.link = link self.tag = tag self.starred = starred - def __str__(self): return "%s| %s|%s|%s|%s|%s|%s" % (self.title,self.profession,self.company,self.location,self.date,self.description,self.link) @@ -72,7 +71,7 @@ def finder(results,item,**modes): log(item.tag_content) else: result = entry.findAll(item.tag,class_=item.tag_content) - log("found:",len(result)) + log("found count count results:",len(result)) if result: log("theres a result") if i>(len(result)-1): @@ -99,7 +98,8 @@ def finder(results,item,**modes): if CLEANDATE or SWAPDATE: today = datetime.today().strftime('%Y-%M-%D') content.append(today) - content.append("NOTFound") + else: + content.append("NOTFound") return content @@ -114,10 +114,10 @@ def arrayToClass(titles,companys,locations,dates,links,tag): log("len:",len(titles)) for i, title in enumerate(titles): jobs.append(job(title,"test_prof",companys[i],locations[i],dates[i],"test_desc",links[i],tag,0)) - log(jobs[i]) + log("class job:",jobs[i]) return jobs else: - print("Something went wrong unequal length of data arrays") + print("Something went wrong unequal length of data arrays: ",len(titles),len(companys),len(locations),len(dates)) return 0 def jobs_ch_clean_date(date): newdate='' diff --git a/lib/scrap_jobs.py b/lib/scrap_jobs.py index 5ed9bac..b71b249 100644 --- a/lib/scrap_jobs.py +++ b/lib/scrap_jobs.py @@ -1,5 +1,5 @@ from helpers import * -DEBUG = False +DEBUG = True def log(*s): if DEBUG: @@ -135,6 +135,10 @@ def scrap_jobagent(url,entry,session): #print(soup.prettify()) results = soup.find_all("li",class_="item") + if not results: + print("no li items found") + print("page:",page) + input("Press key to continue") title = item("span","jobtitle",0) ar_title = finder(results,title) diff --git a/lib/sysparse.py b/lib/sysparse.py index 6535fbd..a46b821 100644 --- a/lib/sysparse.py +++ b/lib/sysparse.py @@ -89,11 +89,18 @@ def runner(entry,session,scrap_func,next_url_func): print(domain) if domain == 'https://www.jobagent.ch' or domain == 'https://software-job.ch': jobs = scrap_func(b_url,entry,session) - writedb(jobs) + print("jobs passing to db:",jobs) + if jobs: + writedb(jobs) + else: + print("nothing found on this page") b_url = next_url_func(b_url,session,0) elif domain == 'https://www.jobs.ch': jobs = scrap_func(b_url,entry,session) - writedb(jobs) + if jobs: + writedb(jobs) + else: + print("nothing found on this page") b_url = next_url_func(b_url,session,"https://www.jobs.ch") if b_url != 0: