logic error when pubdate not found

2024-06-17 10:27:13 +02:00 · 2024-06-17 10:27:13 +02:00 · ac8c7251e8
commit ac8c7251e8
parent c35c23f073
3 changed files with 13 additions and 9 deletions
--- a/lib/helpers.py
+++ b/lib/helpers.py
@ -5,7 +5,7 @@ from enum import Enum
 import re
 from dateconverter import *
 from datetime import datetime
-DEBUG = True
+DEBUG = False

 def log(*s):
    if DEBUG:
@ -117,7 +117,7 @@ def arrayToClass(titles,companys,locations,dates,links,tag):
            log("class job:",jobs[i])
        return jobs
    else:
-        print("Something went wrong unequal length of data arrays: ",len(titles),len(companys),len(locations),len(dates))
+        log("Something went wrong unequal length of data arrays: ",len(titles),len(companys),len(locations),len(dates))
        return 0
 def jobs_ch_clean_date(date):
    newdate=''
--- a/lib/scrap_jobs.py
+++ b/lib/scrap_jobs.py
@ -1,5 +1,5 @@
 from helpers import *
-DEBUG = True
+DEBUG = False

 def log(*s):
    if DEBUG:
@ -9,10 +9,10 @@ def indeed_com(url,session):
    if(session == 0):
        with requests.Session() as session:
            page = session.get(url)
-            print(page)
+            log(page)
    else:
        page = session.get(url)
-        print(page)
+        log(page)
    soup = BeautifulSoup(page.content,"html.parser")
    #print(soup.prettify())

@ -109,7 +109,7 @@ def next_url_jobagent(base_url,session,c):#depreacted will be removed in the fut
    for i2 in next_url_names:
        striped_string = i2.text.strip()
        log(i2.text.strip(),"stripped:",striped_string)
-       # print("Printable characters?",striped_string.isprintable())
+        log("Printable characters?",striped_string.isprintable())
        if (striped_string) == "Nächste Seite":
            log(i2)
            next_url = i2.get("href")
@ -137,8 +137,7 @@ def scrap_jobagent(url,entry,session):
    results = soup.find_all("li",class_="item")
    if not results:
        print("no li items found")
-        print("page:",page)
-        input("Press key to continue")
+        log("page:",page)
    
    title = item("span","jobtitle",0)
    ar_title = finder(results,title)
--- a/lib/sysparse.py
+++ b/lib/sysparse.py
@ -7,6 +7,11 @@ from login import *
 from time import sleep
 from db import *

+DEBUG = False
+def log(*s):
+    if DEBUG:
+        print(s)
+
 def choose_scraper(entry,session):
    if not session:
        session = requests.Session()
@ -89,7 +94,7 @@ def runner(entry,session,scrap_func,next_url_func):
            print(domain)
            if domain == 'https://www.jobagent.ch' or domain == 'https://software-job.ch':
                jobs = scrap_func(b_url,entry,session)
-                print("jobs passing to db:",jobs)
+                log("jobs passing to db:",jobs)
                if jobs:
                    writedb(jobs)
                else: