2024-06-27 10:02:25 +00:00
|
|
|
import std/httpclient
|
|
|
|
import std/htmlparser
|
|
|
|
import std/xmltree
|
|
|
|
import std/strtabs
|
|
|
|
import std/os
|
2024-06-28 11:19:47 +00:00
|
|
|
import scrap
|
2024-06-27 10:02:25 +00:00
|
|
|
|
|
|
|
var client = newHttpClient()
|
|
|
|
var html: string
|
|
|
|
#var node: XmlNode
|
|
|
|
|
|
|
|
echo "URL:"
|
|
|
|
var url: string = readLine(stdin)
|
|
|
|
echo "given url is: ",url
|
|
|
|
|
|
|
|
try:
|
2024-06-28 11:19:47 +00:00
|
|
|
html = client.getContent(url)
|
|
|
|
let node = parseHtml(html)
|
|
|
|
|
|
|
|
var htmlnode: XmlNode
|
|
|
|
var entry : Entry
|
2024-06-28 11:58:36 +00:00
|
|
|
|
2024-07-01 07:55:25 +00:00
|
|
|
add(entry.desc,Descriptor(name : "alink", html_context_tag : "div", html_tag : "a",attrs : "href"))
|
2024-07-01 08:14:01 +00:00
|
|
|
add(entry.desc,Descriptor(name:"key_and_string_search",contains_string: "a",html_context_tag : "div", html_tag : "a",attrs : "href"))
|
2024-07-01 07:55:25 +00:00
|
|
|
add(entry.desc,Descriptor(name:"anotherlin",html_context_tag : "div", html_tag : "a",attrs : "href"))
|
|
|
|
add(entry.desc,Descriptor(name:"anotherlin",html_context_tag : "didfgv", html_tag : "sdfga",attrs : "hdsfgref"))
|
|
|
|
|
2024-06-28 11:58:36 +00:00
|
|
|
echo entry
|
2024-06-28 11:19:47 +00:00
|
|
|
entry.getEntryFromHtml(node)
|
|
|
|
# echo node
|
|
|
|
# for a in node.findAll("a"):
|
|
|
|
# if a.attrs.hasKey "href":
|
|
|
|
# echo "striping"
|
|
|
|
# let (dir,filename,ext) = splitFile(a.attrs["href"])
|
|
|
|
# echo "found a link!",dir & "/" & filename
|
|
|
|
# else:
|
|
|
|
# echo "Key has no attribute href"
|
2024-06-27 10:02:25 +00:00
|
|
|
finally:
|
|
|
|
client.close()
|
|
|
|
|
|
|
|
|
|
|
|
|