-implement GETCHILD from a tag like <strong>sdfsafd</strong>

This commit is contained in:
ccppi 2024-07-18 13:30:20 +02:00
parent 4ede40c37c
commit e3701c44ae
2 changed files with 17 additions and 6 deletions

View File

@ -59,6 +59,7 @@ class job():
return "%s| %s|%s|%s|%s|%s|%s" % (self.title,self.profession,self.company,self.location,self.date,self.description,self.link) return "%s| %s|%s|%s|%s|%s|%s" % (self.title,self.profession,self.company,self.location,self.date,self.description,self.link)
def finder(results,item,**modes): def finder(results,item,**modes):
GETCHILDREN = modes.get("GETCHILDREN",'')
ATTRS = modes.get('ATTRS',0) ATTRS = modes.get('ATTRS',0)
LOCATION_CLEANUP = modes.get('LOCATION_CLEANUP',0) LOCATION_CLEANUP = modes.get('LOCATION_CLEANUP',0)
LINK = modes.get('LINK',0) LINK = modes.get('LINK',0)
@ -71,7 +72,6 @@ def finder(results,item,**modes):
log("Item tag: ",item.tag) log("Item tag: ",item.tag)
log("Modes:",modes) log("Modes:",modes)
log("tag_content: ",item.tag_content) log("tag_content: ",item.tag_content)
for entry in results: for entry in results:
if ATTRS==1: if ATTRS==1:
result = entry.findAll(item.tag,attrs=item.tag_content) result = entry.findAll(item.tag,attrs=item.tag_content)
@ -79,7 +79,7 @@ def finder(results,item,**modes):
else: else:
result = entry.findAll(item.tag,class_=item.tag_content) result = entry.findAll(item.tag,class_=item.tag_content)
log("found count results:",len(result)) log("found count results:",len(result))
if item.name == "TITLE!!" and len(result) == 0 and DEBUG == True: if len(result) == 0 and DEBUG == True:
for x in results: for x in results:
log(x) log(x)
input() input()
@ -91,7 +91,19 @@ def finder(results,item,**modes):
# input("Press Enter..") # input("Press Enter..")
i=(len(result)-1) i=(len(result)-1)
result2 = result[i] result2 = result[i]
if LOCATION_CLEANUP==1: if GETCHILDREN!='':
found = False
for results in result:
child = results.find(GETCHILDREN)
log(child)
if child != None and found == False:
log("CHILD: ",child.text.strip())
found = True
content.append(child.text.strip())
if found == False:
content.append("CHILD_NOT_FOUND: " + GETCHILDREN)
elif LOCATION_CLEANUP==1:
location = CleanLocation(result2.text.strip()) location = CleanLocation(result2.text.strip())
content.append(location) content.append(location)
elif LINK==1: elif LINK==1:

View File

@ -48,10 +48,9 @@ def scrap_jobs(url,entry,session):
location_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn" location_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn"
location = item("p",location_class,0) location = item("p",location_class,0)
ar_location = finder(results,location,LOCATION_CLEANUP=1) ar_location = finder(results,location,LOCATION_CLEANUP=1)
company_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn" company_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn"
company = item("p",company_class,3) company = item("p",company_class,0)
ar_company = finder(results,company,DEFAULT=1) ar_company = finder(results,company,DEFAULT=1,GETCHILDREN='strong')
title = item("span","jlFpCz",0,"TITLE!!") title = item("span","jlFpCz",0,"TITLE!!")
ar_title = finder(results,title,DEFAULT=1) ar_title = finder(results,title,DEFAULT=1)