logic error when pubdate not found
This commit is contained in:
parent
c35c23f073
commit
ac8c7251e8
@ -5,7 +5,7 @@ from enum import Enum
|
|||||||
import re
|
import re
|
||||||
from dateconverter import *
|
from dateconverter import *
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
DEBUG = True
|
DEBUG = False
|
||||||
|
|
||||||
def log(*s):
|
def log(*s):
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
@ -117,7 +117,7 @@ def arrayToClass(titles,companys,locations,dates,links,tag):
|
|||||||
log("class job:",jobs[i])
|
log("class job:",jobs[i])
|
||||||
return jobs
|
return jobs
|
||||||
else:
|
else:
|
||||||
print("Something went wrong unequal length of data arrays: ",len(titles),len(companys),len(locations),len(dates))
|
log("Something went wrong unequal length of data arrays: ",len(titles),len(companys),len(locations),len(dates))
|
||||||
return 0
|
return 0
|
||||||
def jobs_ch_clean_date(date):
|
def jobs_ch_clean_date(date):
|
||||||
newdate=''
|
newdate=''
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from helpers import *
|
from helpers import *
|
||||||
DEBUG = True
|
DEBUG = False
|
||||||
|
|
||||||
def log(*s):
|
def log(*s):
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
@ -9,10 +9,10 @@ def indeed_com(url,session):
|
|||||||
if(session == 0):
|
if(session == 0):
|
||||||
with requests.Session() as session:
|
with requests.Session() as session:
|
||||||
page = session.get(url)
|
page = session.get(url)
|
||||||
print(page)
|
log(page)
|
||||||
else:
|
else:
|
||||||
page = session.get(url)
|
page = session.get(url)
|
||||||
print(page)
|
log(page)
|
||||||
soup = BeautifulSoup(page.content,"html.parser")
|
soup = BeautifulSoup(page.content,"html.parser")
|
||||||
#print(soup.prettify())
|
#print(soup.prettify())
|
||||||
|
|
||||||
@ -109,7 +109,7 @@ def next_url_jobagent(base_url,session,c):#depreacted will be removed in the fut
|
|||||||
for i2 in next_url_names:
|
for i2 in next_url_names:
|
||||||
striped_string = i2.text.strip()
|
striped_string = i2.text.strip()
|
||||||
log(i2.text.strip(),"stripped:",striped_string)
|
log(i2.text.strip(),"stripped:",striped_string)
|
||||||
# print("Printable characters?",striped_string.isprintable())
|
log("Printable characters?",striped_string.isprintable())
|
||||||
if (striped_string) == "Nächste Seite":
|
if (striped_string) == "Nächste Seite":
|
||||||
log(i2)
|
log(i2)
|
||||||
next_url = i2.get("href")
|
next_url = i2.get("href")
|
||||||
@ -137,8 +137,7 @@ def scrap_jobagent(url,entry,session):
|
|||||||
results = soup.find_all("li",class_="item")
|
results = soup.find_all("li",class_="item")
|
||||||
if not results:
|
if not results:
|
||||||
print("no li items found")
|
print("no li items found")
|
||||||
print("page:",page)
|
log("page:",page)
|
||||||
input("Press key to continue")
|
|
||||||
|
|
||||||
title = item("span","jobtitle",0)
|
title = item("span","jobtitle",0)
|
||||||
ar_title = finder(results,title)
|
ar_title = finder(results,title)
|
||||||
|
@ -7,6 +7,11 @@ from login import *
|
|||||||
from time import sleep
|
from time import sleep
|
||||||
from db import *
|
from db import *
|
||||||
|
|
||||||
|
DEBUG = False
|
||||||
|
def log(*s):
|
||||||
|
if DEBUG:
|
||||||
|
print(s)
|
||||||
|
|
||||||
def choose_scraper(entry,session):
|
def choose_scraper(entry,session):
|
||||||
if not session:
|
if not session:
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
@ -89,7 +94,7 @@ def runner(entry,session,scrap_func,next_url_func):
|
|||||||
print(domain)
|
print(domain)
|
||||||
if domain == 'https://www.jobagent.ch' or domain == 'https://software-job.ch':
|
if domain == 'https://www.jobagent.ch' or domain == 'https://software-job.ch':
|
||||||
jobs = scrap_func(b_url,entry,session)
|
jobs = scrap_func(b_url,entry,session)
|
||||||
print("jobs passing to db:",jobs)
|
log("jobs passing to db:",jobs)
|
||||||
if jobs:
|
if jobs:
|
||||||
writedb(jobs)
|
writedb(jobs)
|
||||||
else:
|
else:
|
||||||
|
Loading…
Reference in New Issue
Block a user