logic error when pubdate not found

This commit is contained in:
ccppi 2024-06-17 10:22:28 +02:00
parent 61ce809d9f
commit e7f634d19a
4 changed files with 22 additions and 11 deletions

View File

@ -1,6 +1,6 @@
[jobagent.ch] [jobagent.ch]
USER = test@gmx.ch USER = j.wyss@kolabnow.ch
PW = ASK PW = ASK
LOGINURL = https://www.jobagent.ch/user/login LOGINURL = https://www.jobagent.ch/user/login
SCRAPURL = https://www.jobagent.ch/search?terms=Automatiker&lra=0&as=0 SCRAPURL = https://www.jobagent.ch/search?terms=Automatiker&lra=0&as=0
@ -30,7 +30,7 @@ TAG = Informatiker,Linux
[jobagent.ch-2] [jobagent.ch-2]
USER = test@gmx.ch USER = j.wyss@kolabnow.ch
PW = ASK PW = ASK
LOGINURL = https://www.jobagent.ch/user/login LOGINURL = https://www.jobagent.ch/user/login
SCRAPURL = https://www.jobagent.ch/search?terms=Informatiker&lra=0&as=0 SCRAPURL = https://www.jobagent.ch/search?terms=Informatiker&lra=0&as=0

View File

@ -5,7 +5,7 @@ from enum import Enum
import re import re
from dateconverter import * from dateconverter import *
from datetime import datetime from datetime import datetime
DEBUG = False DEBUG = True
def log(*s): def log(*s):
if DEBUG: if DEBUG:
@ -51,7 +51,6 @@ class job():
self.link = link self.link = link
self.tag = tag self.tag = tag
self.starred = starred self.starred = starred
def __str__(self): def __str__(self):
return "%s| %s|%s|%s|%s|%s|%s" % (self.title,self.profession,self.company,self.location,self.date,self.description,self.link) return "%s| %s|%s|%s|%s|%s|%s" % (self.title,self.profession,self.company,self.location,self.date,self.description,self.link)
@ -72,7 +71,7 @@ def finder(results,item,**modes):
log(item.tag_content) log(item.tag_content)
else: else:
result = entry.findAll(item.tag,class_=item.tag_content) result = entry.findAll(item.tag,class_=item.tag_content)
log("found:",len(result)) log("found count count results:",len(result))
if result: if result:
log("theres a result") log("theres a result")
if i>(len(result)-1): if i>(len(result)-1):
@ -99,7 +98,8 @@ def finder(results,item,**modes):
if CLEANDATE or SWAPDATE: if CLEANDATE or SWAPDATE:
today = datetime.today().strftime('%Y-%M-%D') today = datetime.today().strftime('%Y-%M-%D')
content.append(today) content.append(today)
content.append("NOTFound") else:
content.append("NOTFound")
return content return content
@ -114,10 +114,10 @@ def arrayToClass(titles,companys,locations,dates,links,tag):
log("len:",len(titles)) log("len:",len(titles))
for i, title in enumerate(titles): for i, title in enumerate(titles):
jobs.append(job(title,"test_prof",companys[i],locations[i],dates[i],"test_desc",links[i],tag,0)) jobs.append(job(title,"test_prof",companys[i],locations[i],dates[i],"test_desc",links[i],tag,0))
log(jobs[i]) log("class job:",jobs[i])
return jobs return jobs
else: else:
print("Something went wrong unequal length of data arrays") print("Something went wrong unequal length of data arrays: ",len(titles),len(companys),len(locations),len(dates))
return 0 return 0
def jobs_ch_clean_date(date): def jobs_ch_clean_date(date):
newdate='' newdate=''

View File

@ -1,5 +1,5 @@
from helpers import * from helpers import *
DEBUG = False DEBUG = True
def log(*s): def log(*s):
if DEBUG: if DEBUG:
@ -135,6 +135,10 @@ def scrap_jobagent(url,entry,session):
#print(soup.prettify()) #print(soup.prettify())
results = soup.find_all("li",class_="item") results = soup.find_all("li",class_="item")
if not results:
print("no li items found")
print("page:",page)
input("Press key to continue")
title = item("span","jobtitle",0) title = item("span","jobtitle",0)
ar_title = finder(results,title) ar_title = finder(results,title)

View File

@ -89,11 +89,18 @@ def runner(entry,session,scrap_func,next_url_func):
print(domain) print(domain)
if domain == 'https://www.jobagent.ch' or domain == 'https://software-job.ch': if domain == 'https://www.jobagent.ch' or domain == 'https://software-job.ch':
jobs = scrap_func(b_url,entry,session) jobs = scrap_func(b_url,entry,session)
writedb(jobs) print("jobs passing to db:",jobs)
if jobs:
writedb(jobs)
else:
print("nothing found on this page")
b_url = next_url_func(b_url,session,0) b_url = next_url_func(b_url,session,0)
elif domain == 'https://www.jobs.ch': elif domain == 'https://www.jobs.ch':
jobs = scrap_func(b_url,entry,session) jobs = scrap_func(b_url,entry,session)
writedb(jobs) if jobs:
writedb(jobs)
else:
print("nothing found on this page")
b_url = next_url_func(b_url,session,"https://www.jobs.ch") b_url = next_url_func(b_url,session,"https://www.jobs.ch")
if b_url != 0: if b_url != 0: