diff --git a/main b/main index 2e523c6..a70366f 100755 Binary files a/main and b/main differ diff --git a/main.nim b/main.nim index bc11146..4d11c8d 100644 --- a/main.nim +++ b/main.nim @@ -3,6 +3,7 @@ import std/htmlparser import std/xmltree import std/strtabs import std/os +import scrap var client = newHttpClient() var html: string @@ -13,17 +14,23 @@ var url: string = readLine(stdin) echo "given url is: ",url try: - html = client.getContent(url) - let node = parseHtml(html) - echo node - for a in node.findAll("a"): - if a.attrs.hasKey "href": - echo "striping" - let (dir,filename,ext) = splitFile(a.attrs["href"]) - echo "found a link!",dir & "/" & filename - else: - echo "Key has no attribute href" + html = client.getContent(url) + let node = parseHtml(html) + + var htmlnode: XmlNode + var entry : Entry + entry.name = Descriptor(html_context_tag : "div", html_tag : "a",attrs : "href") + + entry.getEntryFromHtml(node) +# echo node +# for a in node.findAll("a"): +# if a.attrs.hasKey "href": +# echo "striping" +# let (dir,filename,ext) = splitFile(a.attrs["href"]) +# echo "found a link!",dir & "/" & filename +# else: +# echo "Key has no attribute href" finally: client.close() diff --git a/scrap b/scrap index 51d1ac1..cd63fd3 100755 Binary files a/scrap and b/scrap differ diff --git a/scrap.nim b/scrap.nim index 1a62914..e83040d 100644 --- a/scrap.nim +++ b/scrap.nim @@ -3,51 +3,46 @@ import std/xmltree import std/strtabs type - Descriptor = object - name : string - html_tag : string - html_context_tag : string - contains_string : string - attrs : string + Descriptor* = object + name* : string + html_tag* : string + html_context_tag* : string + contains_string* : string + attrs* : string type - Entry = object - name, tag, description, link, category : Descriptor + Entry* = object + name*, tag*, description*, link*, category* : Descriptor -proc setDescriptor(desc : var Descriptor, descToChange : string, value : string) = - case descToChange: - of "name": - desc.name = value - of "html_tag": - desc.html_tag = value - of "html_context_tag": - desc.html_context_tag = value - of "contains_string": - desc.contains_string = value - of "attrs": - desc.attrs = value - - -#proc newEntry() : Entry = -# newDescriptor() - -proc getEntryFromHtml(entry : Entry, node : XmlNode) = +proc getEntryFromHtml*(entry : Entry, node : XmlNode) = echo(entry.description) - var childrens = node.findAll(entry.name.html_tag) + + let context = node.findAll(entry.name.html_context_tag) + for a in context: + + let subContext = a.findAll(entry.name.html_tag) + for b in subContext: + echo(b) + if entry.name.attrs != "": + if b.attrs.hasKey(entry.name.attrs): + echo("found key") + if entry.name.contains_string != "": + echo(" and string") + #check if b contains contains_string + #return function + if entry.name.contains_string != "": + echo("found string") + #check if b contains contains_string + #return function proc test() = var htmlnode : XmlNode var str_html : string var entry: Entry -# entry = Entry(name : "testname",description : "testdescription",link : "testlink",category : "testcategory") - entry.name.setDescriptor("name","testname") - entry.name.setDescriptor("html_tag","p") - entry.link.setDescriptor("name","inserat_link") - entry.link.setDescriptor("html_tag","a") - entry.link.setDescriptor("attrs","href") + entry.name = Descriptor(html_tag : "p", contains_string : "test_container") echo entry htmlnode = parseHtml(str_html) - #entry.getEntryFromHtml() + entry.getEntryFromHtml(htmlnode) -test() +#test()