logic error when pubdate not found
This commit is contained in:
parent
c35c23f073
commit
ac8c7251e8
@ -5,7 +5,7 @@ from enum import Enum
|
||||
import re
|
||||
from dateconverter import *
|
||||
from datetime import datetime
|
||||
DEBUG = True
|
||||
DEBUG = False
|
||||
|
||||
def log(*s):
|
||||
if DEBUG:
|
||||
@ -117,7 +117,7 @@ def arrayToClass(titles,companys,locations,dates,links,tag):
|
||||
log("class job:",jobs[i])
|
||||
return jobs
|
||||
else:
|
||||
print("Something went wrong unequal length of data arrays: ",len(titles),len(companys),len(locations),len(dates))
|
||||
log("Something went wrong unequal length of data arrays: ",len(titles),len(companys),len(locations),len(dates))
|
||||
return 0
|
||||
def jobs_ch_clean_date(date):
|
||||
newdate=''
|
||||
|
@ -1,5 +1,5 @@
|
||||
from helpers import *
|
||||
DEBUG = True
|
||||
DEBUG = False
|
||||
|
||||
def log(*s):
|
||||
if DEBUG:
|
||||
@ -9,10 +9,10 @@ def indeed_com(url,session):
|
||||
if(session == 0):
|
||||
with requests.Session() as session:
|
||||
page = session.get(url)
|
||||
print(page)
|
||||
log(page)
|
||||
else:
|
||||
page = session.get(url)
|
||||
print(page)
|
||||
log(page)
|
||||
soup = BeautifulSoup(page.content,"html.parser")
|
||||
#print(soup.prettify())
|
||||
|
||||
@ -109,7 +109,7 @@ def next_url_jobagent(base_url,session,c):#depreacted will be removed in the fut
|
||||
for i2 in next_url_names:
|
||||
striped_string = i2.text.strip()
|
||||
log(i2.text.strip(),"stripped:",striped_string)
|
||||
# print("Printable characters?",striped_string.isprintable())
|
||||
log("Printable characters?",striped_string.isprintable())
|
||||
if (striped_string) == "Nächste Seite":
|
||||
log(i2)
|
||||
next_url = i2.get("href")
|
||||
@ -137,8 +137,7 @@ def scrap_jobagent(url,entry,session):
|
||||
results = soup.find_all("li",class_="item")
|
||||
if not results:
|
||||
print("no li items found")
|
||||
print("page:",page)
|
||||
input("Press key to continue")
|
||||
log("page:",page)
|
||||
|
||||
title = item("span","jobtitle",0)
|
||||
ar_title = finder(results,title)
|
||||
|
@ -7,6 +7,11 @@ from login import *
|
||||
from time import sleep
|
||||
from db import *
|
||||
|
||||
DEBUG = False
|
||||
def log(*s):
|
||||
if DEBUG:
|
||||
print(s)
|
||||
|
||||
def choose_scraper(entry,session):
|
||||
if not session:
|
||||
session = requests.Session()
|
||||
@ -89,7 +94,7 @@ def runner(entry,session,scrap_func,next_url_func):
|
||||
print(domain)
|
||||
if domain == 'https://www.jobagent.ch' or domain == 'https://software-job.ch':
|
||||
jobs = scrap_func(b_url,entry,session)
|
||||
print("jobs passing to db:",jobs)
|
||||
log("jobs passing to db:",jobs)
|
||||
if jobs:
|
||||
writedb(jobs)
|
||||
else:
|
||||
|
Loading…
Reference in New Issue
Block a user