logic error when pubdate not found

This commit is contained in:
ccppi 2024-06-17 10:27:13 +02:00
parent c35c23f073
commit ac8c7251e8
3 changed files with 13 additions and 9 deletions

View File

@ -5,7 +5,7 @@ from enum import Enum
import re
from dateconverter import *
from datetime import datetime
DEBUG = True
DEBUG = False
def log(*s):
if DEBUG:
@ -117,7 +117,7 @@ def arrayToClass(titles,companys,locations,dates,links,tag):
log("class job:",jobs[i])
return jobs
else:
print("Something went wrong unequal length of data arrays: ",len(titles),len(companys),len(locations),len(dates))
log("Something went wrong unequal length of data arrays: ",len(titles),len(companys),len(locations),len(dates))
return 0
def jobs_ch_clean_date(date):
newdate=''

View File

@ -1,5 +1,5 @@
from helpers import *
DEBUG = True
DEBUG = False
def log(*s):
if DEBUG:
@ -9,10 +9,10 @@ def indeed_com(url,session):
if(session == 0):
with requests.Session() as session:
page = session.get(url)
print(page)
log(page)
else:
page = session.get(url)
print(page)
log(page)
soup = BeautifulSoup(page.content,"html.parser")
#print(soup.prettify())
@ -109,7 +109,7 @@ def next_url_jobagent(base_url,session,c):#depreacted will be removed in the fut
for i2 in next_url_names:
striped_string = i2.text.strip()
log(i2.text.strip(),"stripped:",striped_string)
# print("Printable characters?",striped_string.isprintable())
log("Printable characters?",striped_string.isprintable())
if (striped_string) == "Nächste Seite":
log(i2)
next_url = i2.get("href")
@ -137,8 +137,7 @@ def scrap_jobagent(url,entry,session):
results = soup.find_all("li",class_="item")
if not results:
print("no li items found")
print("page:",page)
input("Press key to continue")
log("page:",page)
title = item("span","jobtitle",0)
ar_title = finder(results,title)

View File

@ -7,6 +7,11 @@ from login import *
from time import sleep
from db import *
DEBUG = False
def log(*s):
if DEBUG:
print(s)
def choose_scraper(entry,session):
if not session:
session = requests.Session()
@ -89,7 +94,7 @@ def runner(entry,session,scrap_func,next_url_func):
print(domain)
if domain == 'https://www.jobagent.ch' or domain == 'https://software-job.ch':
jobs = scrap_func(b_url,entry,session)
print("jobs passing to db:",jobs)
log("jobs passing to db:",jobs)
if jobs:
writedb(jobs)
else: