logic error when pubdate not found
This commit is contained in:
parent
61ce809d9f
commit
e7f634d19a
4
lib/conf
4
lib/conf
@ -1,6 +1,6 @@
|
|||||||
|
|
||||||
[jobagent.ch]
|
[jobagent.ch]
|
||||||
USER = test@gmx.ch
|
USER = j.wyss@kolabnow.ch
|
||||||
PW = ASK
|
PW = ASK
|
||||||
LOGINURL = https://www.jobagent.ch/user/login
|
LOGINURL = https://www.jobagent.ch/user/login
|
||||||
SCRAPURL = https://www.jobagent.ch/search?terms=Automatiker&lra=0&as=0
|
SCRAPURL = https://www.jobagent.ch/search?terms=Automatiker&lra=0&as=0
|
||||||
@ -30,7 +30,7 @@ TAG = Informatiker,Linux
|
|||||||
|
|
||||||
|
|
||||||
[jobagent.ch-2]
|
[jobagent.ch-2]
|
||||||
USER = test@gmx.ch
|
USER = j.wyss@kolabnow.ch
|
||||||
PW = ASK
|
PW = ASK
|
||||||
LOGINURL = https://www.jobagent.ch/user/login
|
LOGINURL = https://www.jobagent.ch/user/login
|
||||||
SCRAPURL = https://www.jobagent.ch/search?terms=Informatiker&lra=0&as=0
|
SCRAPURL = https://www.jobagent.ch/search?terms=Informatiker&lra=0&as=0
|
||||||
|
@ -5,7 +5,7 @@ from enum import Enum
|
|||||||
import re
|
import re
|
||||||
from dateconverter import *
|
from dateconverter import *
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
DEBUG = False
|
DEBUG = True
|
||||||
|
|
||||||
def log(*s):
|
def log(*s):
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
@ -51,7 +51,6 @@ class job():
|
|||||||
self.link = link
|
self.link = link
|
||||||
self.tag = tag
|
self.tag = tag
|
||||||
self.starred = starred
|
self.starred = starred
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "%s| %s|%s|%s|%s|%s|%s" % (self.title,self.profession,self.company,self.location,self.date,self.description,self.link)
|
return "%s| %s|%s|%s|%s|%s|%s" % (self.title,self.profession,self.company,self.location,self.date,self.description,self.link)
|
||||||
|
|
||||||
@ -72,7 +71,7 @@ def finder(results,item,**modes):
|
|||||||
log(item.tag_content)
|
log(item.tag_content)
|
||||||
else:
|
else:
|
||||||
result = entry.findAll(item.tag,class_=item.tag_content)
|
result = entry.findAll(item.tag,class_=item.tag_content)
|
||||||
log("found:",len(result))
|
log("found count count results:",len(result))
|
||||||
if result:
|
if result:
|
||||||
log("theres a result")
|
log("theres a result")
|
||||||
if i>(len(result)-1):
|
if i>(len(result)-1):
|
||||||
@ -99,6 +98,7 @@ def finder(results,item,**modes):
|
|||||||
if CLEANDATE or SWAPDATE:
|
if CLEANDATE or SWAPDATE:
|
||||||
today = datetime.today().strftime('%Y-%M-%D')
|
today = datetime.today().strftime('%Y-%M-%D')
|
||||||
content.append(today)
|
content.append(today)
|
||||||
|
else:
|
||||||
content.append("NOTFound")
|
content.append("NOTFound")
|
||||||
return content
|
return content
|
||||||
|
|
||||||
@ -114,10 +114,10 @@ def arrayToClass(titles,companys,locations,dates,links,tag):
|
|||||||
log("len:",len(titles))
|
log("len:",len(titles))
|
||||||
for i, title in enumerate(titles):
|
for i, title in enumerate(titles):
|
||||||
jobs.append(job(title,"test_prof",companys[i],locations[i],dates[i],"test_desc",links[i],tag,0))
|
jobs.append(job(title,"test_prof",companys[i],locations[i],dates[i],"test_desc",links[i],tag,0))
|
||||||
log(jobs[i])
|
log("class job:",jobs[i])
|
||||||
return jobs
|
return jobs
|
||||||
else:
|
else:
|
||||||
print("Something went wrong unequal length of data arrays")
|
print("Something went wrong unequal length of data arrays: ",len(titles),len(companys),len(locations),len(dates))
|
||||||
return 0
|
return 0
|
||||||
def jobs_ch_clean_date(date):
|
def jobs_ch_clean_date(date):
|
||||||
newdate=''
|
newdate=''
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from helpers import *
|
from helpers import *
|
||||||
DEBUG = False
|
DEBUG = True
|
||||||
|
|
||||||
def log(*s):
|
def log(*s):
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
@ -135,6 +135,10 @@ def scrap_jobagent(url,entry,session):
|
|||||||
#print(soup.prettify())
|
#print(soup.prettify())
|
||||||
|
|
||||||
results = soup.find_all("li",class_="item")
|
results = soup.find_all("li",class_="item")
|
||||||
|
if not results:
|
||||||
|
print("no li items found")
|
||||||
|
print("page:",page)
|
||||||
|
input("Press key to continue")
|
||||||
|
|
||||||
title = item("span","jobtitle",0)
|
title = item("span","jobtitle",0)
|
||||||
ar_title = finder(results,title)
|
ar_title = finder(results,title)
|
||||||
|
@ -89,11 +89,18 @@ def runner(entry,session,scrap_func,next_url_func):
|
|||||||
print(domain)
|
print(domain)
|
||||||
if domain == 'https://www.jobagent.ch' or domain == 'https://software-job.ch':
|
if domain == 'https://www.jobagent.ch' or domain == 'https://software-job.ch':
|
||||||
jobs = scrap_func(b_url,entry,session)
|
jobs = scrap_func(b_url,entry,session)
|
||||||
|
print("jobs passing to db:",jobs)
|
||||||
|
if jobs:
|
||||||
writedb(jobs)
|
writedb(jobs)
|
||||||
|
else:
|
||||||
|
print("nothing found on this page")
|
||||||
b_url = next_url_func(b_url,session,0)
|
b_url = next_url_func(b_url,session,0)
|
||||||
elif domain == 'https://www.jobs.ch':
|
elif domain == 'https://www.jobs.ch':
|
||||||
jobs = scrap_func(b_url,entry,session)
|
jobs = scrap_func(b_url,entry,session)
|
||||||
|
if jobs:
|
||||||
writedb(jobs)
|
writedb(jobs)
|
||||||
|
else:
|
||||||
|
print("nothing found on this page")
|
||||||
b_url = next_url_func(b_url,session,"https://www.jobs.ch")
|
b_url = next_url_func(b_url,session,"https://www.jobs.ch")
|
||||||
|
|
||||||
if b_url != 0:
|
if b_url != 0:
|
||||||
|
Loading…
Reference in New Issue
Block a user