restructing
This commit is contained in:
parent
a2b97e9f21
commit
2d9b7afa9d
@ -9,15 +9,13 @@ from datetime import datetime
|
|||||||
import os
|
import os
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import webbrowser
|
import webbrowser
|
||||||
from time import sleep
|
import mozilla
|
||||||
|
|
||||||
DEBUG = True
|
DEBUG = True
|
||||||
number = ['0','1','2','3','4','5','6','7','8','9']
|
number = ['0','1','2','3','4','5','6','7','8','9']
|
||||||
homePath = os.path.expanduser('~')
|
|
||||||
cookiePath = homePath + "/.mozilla/firefox/imibizoh.default/cookies.sqlite"
|
|
||||||
tmpPath = "/tmp/cookies.sqlite"
|
|
||||||
DBFILE = "../db/sqlite3.db"
|
|
||||||
winFirefoxPath = f"""C:\Program Files\Mozilla Firefox\firefox.exe"""
|
|
||||||
def log(*s):
|
def log(*s):
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
print(s)
|
print(s)
|
||||||
@ -226,77 +224,4 @@ def indeedExtractDays(datestr):
|
|||||||
#print("int:",cleannumint,"today:",today,"cleandate:",datetime.fromtimestamp(cleandate).strftime('%Y-%m-%d'))
|
#print("int:",cleannumint,"today:",today,"cleandate:",datetime.fromtimestamp(cleandate).strftime('%Y-%m-%d'))
|
||||||
return datetime.fromtimestamp(cleandate).strftime('%Y-%m-%d')
|
return datetime.fromtimestamp(cleandate).strftime('%Y-%m-%d')
|
||||||
return "NOTFound"
|
return "NOTFound"
|
||||||
def getCookiesFromBrowser(url):
|
|
||||||
#workaround for loked database
|
|
||||||
tries=0
|
|
||||||
cookie = ''
|
|
||||||
rows = [0]
|
|
||||||
while(cookie == '' and tries < 2):
|
|
||||||
tries+=1;
|
|
||||||
shutil.copyfile(cookiePath,tmpPath)
|
|
||||||
with sqlite3.connect(tmpPath) as connection:
|
|
||||||
cmd_read_cookies = f"""SELECT name,value FROM moz_cookies WHERE host like ?;"""
|
|
||||||
print(cmd_read_cookies)
|
|
||||||
cursor = connection.cursor()
|
|
||||||
cursor.execute(cmd_read_cookies,(urlToDomain(url),))
|
|
||||||
while len(rows)!=0:
|
|
||||||
rows = cursor.fetchmany(25)
|
|
||||||
for row in rows:
|
|
||||||
print("row:",row)
|
|
||||||
cookie = cookie + row[0] + '=' + row[1]
|
|
||||||
cookie += ";"
|
|
||||||
|
|
||||||
print("Cookies:",cookie)
|
|
||||||
if cookie == '':
|
|
||||||
if os.name == 'posix':
|
|
||||||
webbrowser.register("firefox",None,webbrowser.BackgroundBrowser("firefox"))
|
|
||||||
webbrowser.get('firefox').open(url)
|
|
||||||
elif os.name == 'nt':
|
|
||||||
webbrowser.register("firefox",None,webbrowser.BackgroundBrowser(winFirefoxPath))
|
|
||||||
webbrowser.get('firefox').open(url)
|
|
||||||
sleep(1)
|
|
||||||
return cookie
|
|
||||||
#access cookies from firefox:
|
|
||||||
#copy (because locked): cp .mozilla/firefox/imibizoh.default/cookies.sqlite cookies.sqlite
|
|
||||||
#Select value from moz_cookies where host like '%indeed%'
|
|
||||||
def urlToDomain(url):
|
|
||||||
pos = patternSearch(url,"https://")
|
|
||||||
urlCut = dropBeforePos(url,pos)
|
|
||||||
posDot = skipAfterChar(urlCut,'.') - 1
|
|
||||||
urlCut = dropBeforePos(urlCut,posDot)
|
|
||||||
posDot = skipAfterChar(urlCut,'/')
|
|
||||||
urlCut = dropAfterPos(urlCut,posDot)
|
|
||||||
print("url after cut dot:",urlCut)
|
|
||||||
return urlCut
|
|
||||||
|
|
||||||
def patternSearch(url,pattern):
|
|
||||||
x = 0
|
|
||||||
for a,i in enumerate(url):
|
|
||||||
print("i:",i)
|
|
||||||
if i == pattern[x]:
|
|
||||||
if x<len(pattern)-1:
|
|
||||||
x = x + 1
|
|
||||||
elif x==len(pattern)-1:
|
|
||||||
print("FULL PATTERN FOUND at pos :",a)
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
x = 0
|
|
||||||
return a
|
|
||||||
def skipAfterChar(aString,char):
|
|
||||||
for a,i in enumerate(aString):
|
|
||||||
if i == char:
|
|
||||||
break
|
|
||||||
return a
|
|
||||||
def dropBeforePos(aString,pos):
|
|
||||||
aString2=''
|
|
||||||
pos+=1
|
|
||||||
if pos < len(aString):
|
|
||||||
for i in range(pos,len(aString)):
|
|
||||||
aString2 += aString[i]
|
|
||||||
return aString2
|
|
||||||
def dropAfterPos(aString,pos):
|
|
||||||
aString2=''
|
|
||||||
if pos < len(aString):
|
|
||||||
for i in range(0,pos):
|
|
||||||
aString2 += aString[i]
|
|
||||||
return aString2
|
|
||||||
|
42
lib/manipulateString.py
Normal file
42
lib/manipulateString.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
|
||||||
|
def urlToDomain(url):
|
||||||
|
pos = patternSearch(url,"https://")
|
||||||
|
urlCut = dropBeforePos(url,pos)
|
||||||
|
posDot = skipAfterChar(urlCut,'.') - 1
|
||||||
|
urlCut = dropBeforePos(urlCut,posDot)
|
||||||
|
posDot = skipAfterChar(urlCut,'/')
|
||||||
|
urlCut = dropAfterPos(urlCut,posDot)
|
||||||
|
print("url after cut dot:",urlCut)
|
||||||
|
return urlCut
|
||||||
|
|
||||||
|
def patternSearch(url,pattern):
|
||||||
|
x = 0
|
||||||
|
for a,i in enumerate(url):
|
||||||
|
print("i:",i)
|
||||||
|
if i == pattern[x]:
|
||||||
|
if x<len(pattern)-1:
|
||||||
|
x = x + 1
|
||||||
|
elif x==len(pattern)-1:
|
||||||
|
print("FULL PATTERN FOUND at pos :",a)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
x = 0
|
||||||
|
return a
|
||||||
|
def skipAfterChar(aString,char):
|
||||||
|
for a,i in enumerate(aString):
|
||||||
|
if i == char:
|
||||||
|
break
|
||||||
|
return a
|
||||||
|
def dropBeforePos(aString,pos):
|
||||||
|
aString2=''
|
||||||
|
pos+=1
|
||||||
|
if pos < len(aString):
|
||||||
|
for i in range(pos,len(aString)):
|
||||||
|
aString2 += aString[i]
|
||||||
|
return aString2
|
||||||
|
def dropAfterPos(aString,pos):
|
||||||
|
aString2=''
|
||||||
|
if pos < len(aString):
|
||||||
|
for i in range(0,pos):
|
||||||
|
aString2 += aString[i]
|
||||||
|
return aString2
|
72
lib/mozilla.py
Normal file
72
lib/mozilla.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
#access cookies from firefox:
|
||||||
|
#copy (because locked): cp .mozilla/firefox/imibizoh.default/cookies.sqlite cookies.sqlite
|
||||||
|
#Select value from moz_cookies where host like '%indeed%'
|
||||||
|
import webbrowser
|
||||||
|
import tempfile
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import shutil
|
||||||
|
from time import sleep
|
||||||
|
import manipulateString as ms
|
||||||
|
DEBUG = True
|
||||||
|
def log(*s):
|
||||||
|
if DEBUG:
|
||||||
|
print(s)
|
||||||
|
|
||||||
|
def findDefaultProfile(path):
|
||||||
|
target = ''
|
||||||
|
dirlist = os.listdir(path)
|
||||||
|
for directory in dirlist:
|
||||||
|
posDot = ms.skipAfterChar(directory,'.')
|
||||||
|
stringParse = ms.dropBeforePos(directory,posDot)
|
||||||
|
log(stringParse)
|
||||||
|
if stringParse == "default":
|
||||||
|
target = directory
|
||||||
|
break;
|
||||||
|
if target == '':
|
||||||
|
return -1
|
||||||
|
else:
|
||||||
|
return target
|
||||||
|
|
||||||
|
def getCookiesFromBrowser(url):
|
||||||
|
homePath = os.path.expanduser('~')
|
||||||
|
cookiePath = homePath + "/.mozilla/firefox/" + findDefaultProfile(homePath + "/.mozilla/firefox/") + "/cookies.sqlite"
|
||||||
|
tmpPath = "/tmp/cookies.sqlite"
|
||||||
|
DBFILE = "../db/sqlite3.db"
|
||||||
|
winFirefoxPath = f"""C:\\Program Files\\Mozilla Firefox\\firefox.exe"""
|
||||||
|
wintTmpPath = tempfile.gettempdir()
|
||||||
|
#workaround for loked database
|
||||||
|
tries=0
|
||||||
|
cookie = ''
|
||||||
|
rows = [0]
|
||||||
|
while(cookie == '' and tries < 2):
|
||||||
|
tries+=1
|
||||||
|
if os.name == 'posix':
|
||||||
|
shutil.copyfile(cookiePath,tmpPath)
|
||||||
|
elif os.name == 'nt':
|
||||||
|
appdata = os.getenv('APPDATA')
|
||||||
|
winCookiePath = appdata + "\\Mozilla\\Firefox\\Profiles\\" + profile + "cookies.sqlite"
|
||||||
|
shutil.copyfile(winCookiePath,wintTmpPath)
|
||||||
|
|
||||||
|
with sqlite3.connect(tmpPath) as connection:
|
||||||
|
cmd_read_cookies = f"""SELECT name,value FROM moz_cookies WHERE host like ?;"""
|
||||||
|
print(cmd_read_cookies)
|
||||||
|
cursor = connection.cursor()
|
||||||
|
cursor.execute(cmd_read_cookies,(ms.urlToDomain(url),))
|
||||||
|
while len(rows)!=0:
|
||||||
|
rows = cursor.fetchmany(25)
|
||||||
|
for row in rows:
|
||||||
|
print("row:",row)
|
||||||
|
cookie = cookie + row[0] + '=' + row[1]
|
||||||
|
cookie += ";"
|
||||||
|
|
||||||
|
print("Cookies:",cookie)
|
||||||
|
if cookie == '':
|
||||||
|
if os.name == 'posix':
|
||||||
|
webbrowser.register("firefox",None,webbrowser.BackgroundBrowser("firefox"))
|
||||||
|
webbrowser.get('firefox').open(url)
|
||||||
|
elif os.name == 'nt':
|
||||||
|
webbrowser.register("firefox",None,webbrowser.BackgroundBrowser(winFirefoxPath))
|
||||||
|
webbrowser.get('firefox').open(url)
|
||||||
|
sleep(1)
|
||||||
|
return cookie
|
@ -1,5 +1,6 @@
|
|||||||
from helpers import *
|
from helpers import *
|
||||||
from login import solveCaptcha
|
from login import solveCaptcha
|
||||||
|
import mozilla
|
||||||
DEBUG = True
|
DEBUG = True
|
||||||
|
|
||||||
def log(*s):
|
def log(*s):
|
||||||
@ -7,7 +8,7 @@ def log(*s):
|
|||||||
print(s)
|
print(s)
|
||||||
|
|
||||||
def scrap_indeed_com(url,entry,session):
|
def scrap_indeed_com(url,entry,session):
|
||||||
moz_cookies = getCookiesFromBrowser(url)
|
moz_cookies = mozilla.getCookiesFromBrowser(url)
|
||||||
print("[scrap]cookies:", moz_cookies)
|
print("[scrap]cookies:", moz_cookies)
|
||||||
session.headers = {
|
session.headers = {
|
||||||
"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0",
|
"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0",
|
||||||
|
Loading…
Reference in New Issue
Block a user