-implement GETCHILD from a tag like <strong>sdfsafd</strong>
This commit is contained in:
parent
4ede40c37c
commit
e3701c44ae
@ -59,6 +59,7 @@ class job():
|
|||||||
return "%s| %s|%s|%s|%s|%s|%s" % (self.title,self.profession,self.company,self.location,self.date,self.description,self.link)
|
return "%s| %s|%s|%s|%s|%s|%s" % (self.title,self.profession,self.company,self.location,self.date,self.description,self.link)
|
||||||
|
|
||||||
def finder(results,item,**modes):
|
def finder(results,item,**modes):
|
||||||
|
GETCHILDREN = modes.get("GETCHILDREN",'')
|
||||||
ATTRS = modes.get('ATTRS',0)
|
ATTRS = modes.get('ATTRS',0)
|
||||||
LOCATION_CLEANUP = modes.get('LOCATION_CLEANUP',0)
|
LOCATION_CLEANUP = modes.get('LOCATION_CLEANUP',0)
|
||||||
LINK = modes.get('LINK',0)
|
LINK = modes.get('LINK',0)
|
||||||
@ -71,7 +72,6 @@ def finder(results,item,**modes):
|
|||||||
log("Item tag: ",item.tag)
|
log("Item tag: ",item.tag)
|
||||||
log("Modes:",modes)
|
log("Modes:",modes)
|
||||||
log("tag_content: ",item.tag_content)
|
log("tag_content: ",item.tag_content)
|
||||||
|
|
||||||
for entry in results:
|
for entry in results:
|
||||||
if ATTRS==1:
|
if ATTRS==1:
|
||||||
result = entry.findAll(item.tag,attrs=item.tag_content)
|
result = entry.findAll(item.tag,attrs=item.tag_content)
|
||||||
@ -79,7 +79,7 @@ def finder(results,item,**modes):
|
|||||||
else:
|
else:
|
||||||
result = entry.findAll(item.tag,class_=item.tag_content)
|
result = entry.findAll(item.tag,class_=item.tag_content)
|
||||||
log("found count results:",len(result))
|
log("found count results:",len(result))
|
||||||
if item.name == "TITLE!!" and len(result) == 0 and DEBUG == True:
|
if len(result) == 0 and DEBUG == True:
|
||||||
for x in results:
|
for x in results:
|
||||||
log(x)
|
log(x)
|
||||||
input()
|
input()
|
||||||
@ -91,7 +91,19 @@ def finder(results,item,**modes):
|
|||||||
# input("Press Enter..")
|
# input("Press Enter..")
|
||||||
i=(len(result)-1)
|
i=(len(result)-1)
|
||||||
result2 = result[i]
|
result2 = result[i]
|
||||||
if LOCATION_CLEANUP==1:
|
if GETCHILDREN!='':
|
||||||
|
found = False
|
||||||
|
for results in result:
|
||||||
|
child = results.find(GETCHILDREN)
|
||||||
|
log(child)
|
||||||
|
if child != None and found == False:
|
||||||
|
log("CHILD: ",child.text.strip())
|
||||||
|
found = True
|
||||||
|
content.append(child.text.strip())
|
||||||
|
if found == False:
|
||||||
|
content.append("CHILD_NOT_FOUND: " + GETCHILDREN)
|
||||||
|
|
||||||
|
elif LOCATION_CLEANUP==1:
|
||||||
location = CleanLocation(result2.text.strip())
|
location = CleanLocation(result2.text.strip())
|
||||||
content.append(location)
|
content.append(location)
|
||||||
elif LINK==1:
|
elif LINK==1:
|
||||||
|
@ -48,10 +48,9 @@ def scrap_jobs(url,entry,session):
|
|||||||
location_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn"
|
location_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn"
|
||||||
location = item("p",location_class,0)
|
location = item("p",location_class,0)
|
||||||
ar_location = finder(results,location,LOCATION_CLEANUP=1)
|
ar_location = finder(results,location,LOCATION_CLEANUP=1)
|
||||||
|
|
||||||
company_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn"
|
company_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn"
|
||||||
company = item("p",company_class,3)
|
company = item("p",company_class,0)
|
||||||
ar_company = finder(results,company,DEFAULT=1)
|
ar_company = finder(results,company,DEFAULT=1,GETCHILDREN='strong')
|
||||||
|
|
||||||
title = item("span","jlFpCz",0,"TITLE!!")
|
title = item("span","jlFpCz",0,"TITLE!!")
|
||||||
ar_title = finder(results,title,DEFAULT=1)
|
ar_title = finder(results,title,DEFAULT=1)
|
||||||
|
Loading…
Reference in New Issue
Block a user