- change search classes

- change some to attributes - implement better debuging solution in finder()
2024-07-18 11:26:13 +02:00 · 2024-07-18 11:26:13 +02:00 · 4ede40c37c
commit 4ede40c37c
parent 2794dc8ed3
3 changed files with 26 additions and 9 deletions
--- a/lib/conf
+++ b/lib/conf
@ -1,3 +1,9 @@
+[jobs.ch_seilbahn]
+USER = NONE
+PW = NONE
+LOGINURL = NONE
+SCRAPURL = https://www.jobs.ch/en/vacancies/?term=seilbahn
+TAG = Seilbahn

 [jobagent.ch]
 USER = j.wyss@kolabnow.ch
--- a/lib/helpers.py
+++ b/lib/helpers.py
@ -5,7 +5,7 @@ from enum import Enum
 import re
 from dateconverter import *
 from datetime import datetime
-DEBUG = False
+DEBUG = True

 def log(*s):
    if DEBUG:
@ -35,10 +35,14 @@ months = [
    ('November','11'),
    ('December','12')]
 class item():
-    def __init__(self,tag,tag_content,index):
+    def __init__(self,tag,tag_content,index,name=None):
        self.tag = tag
        self.tag_content = tag_content
        self.index = index
+        if name is not None:
+            self.name = name
+        else:
+            self.name = "not defined"

 class job():
    def __init__(self,title,profession,company,location,date,description,link,tag,starred):
@ -63,7 +67,10 @@ def finder(results,item,**modes):
    BASEURL = modes.get('BASEURL','')
    content = []
    i = item.index
+    log("name",item.name)
+    log("Item tag: ",item.tag)
    log("Modes:",modes)
+    log("tag_content: ",item.tag_content) 

    for entry in results:
        if ATTRS==1:
@ -71,7 +78,11 @@ def finder(results,item,**modes):
            log(item.tag_content)
        else:
            result = entry.findAll(item.tag,class_=item.tag_content)
-            log("found count count results:",len(result))
+            log("found count results:",len(result))
+            if item.name == "TITLE!!" and len(result) == 0 and  DEBUG == True:
+                for x in results:
+                    log(x)
+                    input()
        if result:
            log("theres a result")
            if i>(len(result)-1):
--- a/lib/scrap_jobs.py
+++ b/lib/scrap_jobs.py
@ -1,5 +1,5 @@
 from helpers import *
-DEBUG = False
+DEBUG = True

 def log(*s):
    if DEBUG:
@ -43,7 +43,7 @@ def scrap_jobs(url,entry,session):
    soup = BeautifulSoup(page.content,"html.parser")
    #print(soup.prettify())

-    results = soup.find_all("div",attrs={"data-feat":"searched_jobs"})
+    results = soup.find_all("div",attrs={'data-feat':'searched_jobs'})
    
    location_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn"
    location = item("p",location_class,0)
@ -53,14 +53,14 @@ def scrap_jobs(url,entry,session):
    company  = item("p",company_class,3)
    ar_company = finder(results,company,DEFAULT=1)
    
-    title = item("span","Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 VacancyItem___StyledText2-sc-iugtv6-5 iaJYDR jlFpCz dMwMcR",0)
+    title = item("span","jlFpCz",0,"TITLE!!")
    ar_title = finder(results,title,DEFAULT=1)

    date = item("span","Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 krGudM hUhFmL",0)
    ar_date = finder(results,date,CLEANDATE=1)

-    link = item("a","VacancyLink___StyledLink-sc-ufp08j-0",0)
-    ar_link = finder(results,link,LINK=1,BASEURL="https://jobs.ch")
+    link = item("a",{'data-cy' :'job-link'},0)
+    ar_link = finder(results,link,LINK=1,ATTRS=1,BASEURL="https://jobs.ch")
    
    tag = entry.tag#get from config
    return arrayToClass(ar_title,ar_company,ar_location,ar_date,ar_link,tag)