- change search classes
- change some to attributes - implement better debuging solution in finder()
This commit is contained in:
parent
2794dc8ed3
commit
4ede40c37c
6
lib/conf
6
lib/conf
@ -1,3 +1,9 @@
|
|||||||
|
[jobs.ch_seilbahn]
|
||||||
|
USER = NONE
|
||||||
|
PW = NONE
|
||||||
|
LOGINURL = NONE
|
||||||
|
SCRAPURL = https://www.jobs.ch/en/vacancies/?term=seilbahn
|
||||||
|
TAG = Seilbahn
|
||||||
|
|
||||||
[jobagent.ch]
|
[jobagent.ch]
|
||||||
USER = j.wyss@kolabnow.ch
|
USER = j.wyss@kolabnow.ch
|
||||||
|
@ -5,7 +5,7 @@ from enum import Enum
|
|||||||
import re
|
import re
|
||||||
from dateconverter import *
|
from dateconverter import *
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
DEBUG = False
|
DEBUG = True
|
||||||
|
|
||||||
def log(*s):
|
def log(*s):
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
@ -35,10 +35,14 @@ months = [
|
|||||||
('November','11'),
|
('November','11'),
|
||||||
('December','12')]
|
('December','12')]
|
||||||
class item():
|
class item():
|
||||||
def __init__(self,tag,tag_content,index):
|
def __init__(self,tag,tag_content,index,name=None):
|
||||||
self.tag = tag
|
self.tag = tag
|
||||||
self.tag_content = tag_content
|
self.tag_content = tag_content
|
||||||
self.index = index
|
self.index = index
|
||||||
|
if name is not None:
|
||||||
|
self.name = name
|
||||||
|
else:
|
||||||
|
self.name = "not defined"
|
||||||
|
|
||||||
class job():
|
class job():
|
||||||
def __init__(self,title,profession,company,location,date,description,link,tag,starred):
|
def __init__(self,title,profession,company,location,date,description,link,tag,starred):
|
||||||
@ -63,15 +67,22 @@ def finder(results,item,**modes):
|
|||||||
BASEURL = modes.get('BASEURL','')
|
BASEURL = modes.get('BASEURL','')
|
||||||
content = []
|
content = []
|
||||||
i = item.index
|
i = item.index
|
||||||
|
log("name",item.name)
|
||||||
|
log("Item tag: ",item.tag)
|
||||||
log("Modes:",modes)
|
log("Modes:",modes)
|
||||||
|
log("tag_content: ",item.tag_content)
|
||||||
|
|
||||||
for entry in results:
|
for entry in results:
|
||||||
if ATTRS==1:
|
if ATTRS==1:
|
||||||
result = entry.findAll(item.tag,attrs=item.tag_content)
|
result = entry.findAll(item.tag,attrs=item.tag_content)
|
||||||
log(item.tag_content)
|
log(item.tag_content)
|
||||||
else:
|
else:
|
||||||
result = entry.findAll(item.tag,class_=item.tag_content)
|
result = entry.findAll(item.tag,class_=item.tag_content)
|
||||||
log("found count count results:",len(result))
|
log("found count results:",len(result))
|
||||||
|
if item.name == "TITLE!!" and len(result) == 0 and DEBUG == True:
|
||||||
|
for x in results:
|
||||||
|
log(x)
|
||||||
|
input()
|
||||||
if result:
|
if result:
|
||||||
log("theres a result")
|
log("theres a result")
|
||||||
if i>(len(result)-1):
|
if i>(len(result)-1):
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from helpers import *
|
from helpers import *
|
||||||
DEBUG = False
|
DEBUG = True
|
||||||
|
|
||||||
def log(*s):
|
def log(*s):
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
@ -43,7 +43,7 @@ def scrap_jobs(url,entry,session):
|
|||||||
soup = BeautifulSoup(page.content,"html.parser")
|
soup = BeautifulSoup(page.content,"html.parser")
|
||||||
#print(soup.prettify())
|
#print(soup.prettify())
|
||||||
|
|
||||||
results = soup.find_all("div",attrs={"data-feat":"searched_jobs"})
|
results = soup.find_all("div",attrs={'data-feat':'searched_jobs'})
|
||||||
|
|
||||||
location_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn"
|
location_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn"
|
||||||
location = item("p",location_class,0)
|
location = item("p",location_class,0)
|
||||||
@ -53,14 +53,14 @@ def scrap_jobs(url,entry,session):
|
|||||||
company = item("p",company_class,3)
|
company = item("p",company_class,3)
|
||||||
ar_company = finder(results,company,DEFAULT=1)
|
ar_company = finder(results,company,DEFAULT=1)
|
||||||
|
|
||||||
title = item("span","Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 VacancyItem___StyledText2-sc-iugtv6-5 iaJYDR jlFpCz dMwMcR",0)
|
title = item("span","jlFpCz",0,"TITLE!!")
|
||||||
ar_title = finder(results,title,DEFAULT=1)
|
ar_title = finder(results,title,DEFAULT=1)
|
||||||
|
|
||||||
date = item("span","Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 krGudM hUhFmL",0)
|
date = item("span","Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 krGudM hUhFmL",0)
|
||||||
ar_date = finder(results,date,CLEANDATE=1)
|
ar_date = finder(results,date,CLEANDATE=1)
|
||||||
|
|
||||||
link = item("a","VacancyLink___StyledLink-sc-ufp08j-0",0)
|
link = item("a",{'data-cy' :'job-link'},0)
|
||||||
ar_link = finder(results,link,LINK=1,BASEURL="https://jobs.ch")
|
ar_link = finder(results,link,LINK=1,ATTRS=1,BASEURL="https://jobs.ch")
|
||||||
|
|
||||||
tag = entry.tag#get from config
|
tag = entry.tag#get from config
|
||||||
return arrayToClass(ar_title,ar_company,ar_location,ar_date,ar_link,tag)
|
return arrayToClass(ar_title,ar_company,ar_location,ar_date,ar_link,tag)
|
||||||
|
Loading…
Reference in New Issue
Block a user