main file
This commit is contained in:
parent
aaff6694fd
commit
6598218abe
5
main.py
Normal file
5
main.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
import scrap
|
||||||
|
|
||||||
|
content = scrap.scrap("https://www.comparis.ch/immobilien/marktplatz/lenzburg/mieten",'div','css-1d60yla ehesakb4')
|
||||||
|
content = scrap.filter(content,"p","Wohnung","text")
|
||||||
|
print(content)
|
14
scrap.py
14
scrap.py
@ -47,13 +47,19 @@ def page_iterator(base_url,main_container_type,main_container_class,next_type,ne
|
|||||||
|
|
||||||
#url gets href param, text gets textparam stripped and formated
|
#url gets href param, text gets textparam stripped and formated
|
||||||
def filter(data,type_t,class_t,type_content):
|
def filter(data,type_t,class_t,type_content):
|
||||||
|
text=[]
|
||||||
|
url=[]
|
||||||
for entry in data:
|
for entry in data:
|
||||||
item = entry.find(type_t,class_=class_t)
|
item = entry.find_all(type_t,class_=class_t)
|
||||||
if item != None:
|
if item != None:
|
||||||
if type_content == "url":
|
if type_content == "url":
|
||||||
return item.get("href")
|
for it in item:
|
||||||
if type_of_sub_content == "text":
|
url.append(it.get("href"))
|
||||||
return item.text.strip()
|
return url
|
||||||
|
if type_content == "text":
|
||||||
|
for it in item:
|
||||||
|
text.append(it.text.strip())
|
||||||
|
return text
|
||||||
else:
|
else:
|
||||||
return item
|
return item
|
||||||
if item == None:
|
if item == None:
|
||||||
|
Loading…
Reference in New Issue
Block a user