From 6598218abeb73089d7e96099430c546b5c8c518a Mon Sep 17 00:00:00 2001 From: ccppi Date: Wed, 3 Apr 2024 08:50:24 +0200 Subject: [PATCH] main file --- main.py | 5 +++++ scrap.py | 14 ++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..fbbc281 --- /dev/null +++ b/main.py @@ -0,0 +1,5 @@ +import scrap + +content = scrap.scrap("https://www.comparis.ch/immobilien/marktplatz/lenzburg/mieten",'div','css-1d60yla ehesakb4') +content = scrap.filter(content,"p","Wohnung","text") +print(content) diff --git a/scrap.py b/scrap.py index 5a44d35..abf7d5a 100644 --- a/scrap.py +++ b/scrap.py @@ -47,13 +47,19 @@ def page_iterator(base_url,main_container_type,main_container_class,next_type,ne #url gets href param, text gets textparam stripped and formated def filter(data,type_t,class_t,type_content): + text=[] + url=[] for entry in data: - item = entry.find(type_t,class_=class_t) + item = entry.find_all(type_t,class_=class_t) if item != None: if type_content == "url": - return item.get("href") - if type_of_sub_content == "text": - return item.text.strip() + for it in item: + url.append(it.get("href")) + return url + if type_content == "text": + for it in item: + text.append(it.text.strip()) + return text else: return item if item == None: