Compare commits
No commits in common. "threading" and "master" have entirely different histories.
2
.gitignore
vendored
2
.gitignore
vendored
@ -4,5 +4,3 @@ output/
|
||||
*.db
|
||||
*.csv
|
||||
__pycache__
|
||||
dist/
|
||||
build/
|
||||
|
31
Readme.md
31
Readme.md
@ -1,31 +0,0 @@
|
||||
# Warning!!
|
||||
If you run the sync to often it is very likely that your IP gets banned as bot. It is wise to use a vpn or a proxy, or just sync once or twice a day.
|
||||
|
||||
# Dependencies
|
||||
Install the requirements.txt
|
||||
|
||||
We depend on firefox, the reason for this is to workaround cloundflares bot protection. This works by stealing cookies from the browser.
|
||||
|
||||
# First time usage
|
||||
1. cd lib/
|
||||
|
||||
### Set up the database
|
||||
|
||||
2. python main.py --createdb
|
||||
3. python main.py --importregiondb
|
||||
|
||||
# Config
|
||||
edit lib/conf
|
||||
|
||||
|
||||
# Main scripts
|
||||
- cd lib/
|
||||
|
||||
For the cmd line use:
|
||||
- python main.py --help
|
||||
|
||||
For the gui run:
|
||||
- python gui.py
|
||||
|
||||
# Windows
|
||||
There is a windows build in (https://5ccppi.org:3000/ccppi/job-scrapper/src/branch/threading/dist/wine-nuitka)
|
BIN
dist/wine-nuitka/win64-nuitka.zip
vendored
BIN
dist/wine-nuitka/win64-nuitka.zip
vendored
Binary file not shown.
44
gui.spec
44
gui.spec
@ -1,44 +0,0 @@
|
||||
# -*- mode: python ; coding: utf-8 -*-
|
||||
|
||||
|
||||
a = Analysis(
|
||||
['lib\\gui.py'],
|
||||
pathex=[],
|
||||
binaries=[],
|
||||
datas=[],
|
||||
hiddenimports=[],
|
||||
hookspath=[],
|
||||
hooksconfig={},
|
||||
runtime_hooks=[],
|
||||
excludes=[],
|
||||
noarchive=False,
|
||||
optimize=0,
|
||||
)
|
||||
pyz = PYZ(a.pure)
|
||||
|
||||
exe = EXE(
|
||||
pyz,
|
||||
a.scripts,
|
||||
[],
|
||||
exclude_binaries=True,
|
||||
name='gui',
|
||||
debug=False,
|
||||
bootloader_ignore_signals=False,
|
||||
strip=False,
|
||||
upx=True,
|
||||
console=True,
|
||||
disable_windowed_traceback=False,
|
||||
argv_emulation=False,
|
||||
target_arch=None,
|
||||
codesign_identity=None,
|
||||
entitlements_file=None,
|
||||
)
|
||||
coll = COLLECT(
|
||||
exe,
|
||||
a.binaries,
|
||||
a.datas,
|
||||
strip=False,
|
||||
upx=True,
|
||||
upx_exclude=[],
|
||||
name='gui',
|
||||
)
|
24
lib/conf
24
lib/conf
@ -1,26 +1,6 @@
|
||||
[python_qt_indeed]
|
||||
USER = NONE
|
||||
PW = NONE
|
||||
LOGINURL = NONE
|
||||
SCRAPURL = https://ch.indeed.com/jobs?q=python+qt&
|
||||
TAG = Informatiker, Python, qt
|
||||
|
||||
[quereinsteiger_indeed]
|
||||
USER = NONE
|
||||
PW = NONE
|
||||
LOGINURL = NONE
|
||||
SCRAPURL = https://ch.indeed.com/jobs?q=quereinsteiger+it
|
||||
TAG = Informatiker, Quereinsteiger
|
||||
|
||||
[jobs.ch_seilbahn]
|
||||
USER = NONE
|
||||
PW = NONE
|
||||
LOGINURL = NONE
|
||||
SCRAPURL = https://www.jobs.ch/en/vacancies/?term=seilbahn
|
||||
TAG = Seilbahn
|
||||
|
||||
[jobagent.ch]
|
||||
USER = j.wyss@kolabnow.ch
|
||||
USER = test@gmx.ch
|
||||
PW = ASK
|
||||
LOGINURL = https://www.jobagent.ch/user/login
|
||||
SCRAPURL = https://www.jobagent.ch/search?terms=Automatiker&lra=0&as=0
|
||||
@ -50,7 +30,7 @@ TAG = Informatiker,Linux
|
||||
|
||||
|
||||
[jobagent.ch-2]
|
||||
USER = j.wyss@kolabnow.ch
|
||||
USER = test@gmx.ch
|
||||
PW = ASK
|
||||
LOGINURL = https://www.jobagent.ch/user/login
|
||||
SCRAPURL = https://www.jobagent.ch/search?terms=Informatiker&lra=0&as=0
|
||||
|
@ -1,55 +0,0 @@
|
||||
|
||||
[jobagent.ch]
|
||||
USER = test@gmx.ch
|
||||
PW = ASK
|
||||
LOGINURL = https://www.jobagent.ch/user/login
|
||||
SCRAPURL = https://www.jobagent.ch/search?terms=Automatiker&lra=0&as=0
|
||||
TAG = Automatiker
|
||||
|
||||
|
||||
[software-job.ch-application-engineer]
|
||||
USER = NONE
|
||||
PW = NONE
|
||||
LOGINURL = NONE
|
||||
SCRAPURL = https://software-job.ch/application-engineer
|
||||
TAG = Informatiker
|
||||
|
||||
[software-job.ch]
|
||||
USER = NONE
|
||||
PW = NONE
|
||||
LOGINURL = NONE
|
||||
SCRAPURL = https://software-job.ch/python-entwicklung
|
||||
TAG = Informatiker,Python
|
||||
|
||||
[jobs.ch_linux]
|
||||
USER = NONE
|
||||
PW = NONE
|
||||
LOGINURL = NONE
|
||||
SCRAPURL = https://www.jobs.ch/en/vacancies/?term=linux
|
||||
TAG = Informatiker,Linux
|
||||
|
||||
|
||||
[jobagent.ch-2]
|
||||
USER = test@gmx.ch
|
||||
PW = ASK
|
||||
LOGINURL = https://www.jobagent.ch/user/login
|
||||
SCRAPURL = https://www.jobagent.ch/search?terms=Informatiker&lra=0&as=0
|
||||
TAG = Informatiker
|
||||
|
||||
[jobs.ch]
|
||||
USER = NONE
|
||||
PW = NONE
|
||||
LOGINURL = NONE
|
||||
SCRAPURL= https://www.jobs.ch/en/vacancies/?term=automatiker
|
||||
TAG = Automatiker
|
||||
|
||||
[jobs.ch_informatiker]
|
||||
USER = NONE
|
||||
PW = NONE
|
||||
LOGINURL = NONE
|
||||
SCRAPURL= https://www.jobs.ch/en/vacancies/?term=informatiker
|
||||
TAG = Informatiker
|
||||
|
||||
|
||||
|
||||
#https://www.jobagent.ch/search?terms=Automatiker&workload=60-100&lra=0&as=0
|
@ -18,7 +18,7 @@ class Entry:
|
||||
if self.gui:
|
||||
worker.messageContent = self.scrapurl
|
||||
worker.dialog_closed=False
|
||||
worker.pwprompt.emit() #signal to mainthread run showDialog and wait for close
|
||||
worker.pwprompt.emit() #signal to mainthread
|
||||
while not worker.dialog_closed:
|
||||
time.sleep(1)
|
||||
pass
|
||||
|
@ -1,5 +1,3 @@
|
||||
import re
|
||||
import datetime
|
||||
def DateCHToUS(date):
|
||||
#01.02.2010 --> 2010-02-01
|
||||
day=""
|
||||
@ -14,10 +12,3 @@ def DateCHToUS(date):
|
||||
newdate = year+"-"+month+"-"+day
|
||||
return(newdate)
|
||||
|
||||
def indeed_date(date):
|
||||
redate = re.match('\d+',date)
|
||||
fixdate = today().strftime("%Y/%m%d") - timedelta(days=redate.group())
|
||||
print("date: today")
|
||||
return fixdate
|
||||
|
||||
|
||||
|
89
lib/db.py
89
lib/db.py
@ -1,8 +1,6 @@
|
||||
import sqlite3
|
||||
import mmh3
|
||||
import sys
|
||||
#import requests
|
||||
import httplib2
|
||||
DEBUG = False
|
||||
|
||||
def log(*s):
|
||||
@ -12,7 +10,7 @@ def initdb(file):
|
||||
with sqlite3.connect(file) as connection:
|
||||
print("db connection", connection.total_changes)
|
||||
cursor = connection.cursor()
|
||||
cursor.execute("CREATE TABLE jobs (star INT,tag INT ,title TEXT, location TEXT, company TEXT,link TEXT,pubdate TEXT,hash INT,viewed INT)")
|
||||
cursor.execute("CREATE TABLE jobs (star TEXT,tag INT ,title TEXT, location TEXT, company TEXT,link TEXT,pubdate TEXT,hash INT)")
|
||||
sys.exit()
|
||||
def rmdb(file,table):
|
||||
with sqlite3.connect(file) as connection:
|
||||
@ -48,7 +46,7 @@ def importdb(file,importdb,table):
|
||||
|
||||
def createnwview(file):
|
||||
with sqlite3.connect(file) as connection:
|
||||
cmd_create_nw_table = f"""CREATE VIEW IF NOT EXISTS "Nordwest-SCHWEIZ" AS SELECT * FROM jobs as b
|
||||
cmd_create_nw_table = f"""CREATE VIEW "Nordwest-SCHWEIZ" AS SELECT * FROM jobs as b
|
||||
WHERE EXISTS
|
||||
(SELECT GDENAME FROM Cantons as w
|
||||
where w.GDEKT = 'ZH' AND
|
||||
@ -64,25 +62,9 @@ def createnwview(file):
|
||||
cursor = connection.cursor()
|
||||
cursor.execute(cmd_create_nw_table)
|
||||
print("db connection",connection.total_changes)
|
||||
createFilterTable(file)
|
||||
|
||||
def createFilterTable(file):
|
||||
with sqlite3.connect(file,timeout=10) as connection:
|
||||
cmd_create_filter_table = f"""CREATE TABLE IF NOT EXISTS filters(cmd TEXT);"""
|
||||
cursor = connection.cursor()
|
||||
cursor.execute(cmd_create_filter_table)
|
||||
print("db connection:",connection.total_changes)
|
||||
def addFineFilter(file,table,filterstr):
|
||||
with sqlite3.connect(file,timeout=10) as connection:
|
||||
cmd_createFineFilter = f"""INSERT INTO {table}(cmd) VALUES(?);"""
|
||||
cmd_checkIfExists = f"""SELECT * FROM {table} WHERE cmd = ?"""
|
||||
cursor = connection.cursor()
|
||||
if cursor.execute(cmd_checkIfExists,(filterstr,)).fetchone() == None:
|
||||
cursor.execute(cmd_createFineFilter,(filterstr,))
|
||||
|
||||
def writedb(jobs):
|
||||
with sqlite3.connect("../db/sqlite3.db",timeout=10) as connection:
|
||||
connection.execute("pragma journal_mode=wal")
|
||||
with sqlite3.connect("../db/sqlite3.db") as connection:
|
||||
print("db connection", connection.total_changes)
|
||||
cursor = connection.cursor()
|
||||
# cursor.execute("CREATE TABLE jobs (title TEXT, location TEXT, company TEXT,link TEXT,hash INT)")
|
||||
@ -91,67 +73,6 @@ def writedb(jobs):
|
||||
log(hash1);
|
||||
if(cursor.execute("SELECT * FROM jobs WHERE hash = ?",(hash1,)).fetchone() != None):
|
||||
log("Hash already exist")
|
||||
elif(cursor.execute("SELECT * FROM jobs where link = ?",(job.link,)).fetchone() != None):
|
||||
log("link already exist")
|
||||
else:
|
||||
log("NEW_ENTRY")
|
||||
cursor.execute("INSERT INTO jobs (star,tag,title,company,location,link,pubdate,hash,viewed) VALUES (?,?,?,?,?,?,?,?,?)",(job.starred,job.tag,job.title,job.company,job.location,job.link,job.date,hash1,0))
|
||||
|
||||
def viewedEntry(hash1):
|
||||
viewedEntry.list = []
|
||||
viewedEntry.list.append(hash1)
|
||||
print("viewedEntry.list:",viewedEntry.list)
|
||||
if len(viewedEntry.list) >= 5:
|
||||
with sqlite3.connect("../db/sqlite3.db",timeout=10) as connection:
|
||||
cursor = connection.cursor()
|
||||
for x in viewedEntry.list:
|
||||
print("hash:",x)
|
||||
cursor.execute("UPDATE jobs SET viewed = '1' WHERE hash = ?",(x,))
|
||||
viewedEntry.list = []
|
||||
print("modified rows: ",cursor.rowcount)
|
||||
|
||||
|
||||
def isStillValid(file,skiprows):
|
||||
rows = [0,0,0]
|
||||
with sqlite3.connect(file,timeout=10) as connection:
|
||||
cmd_read_chunk = f"""SELECT link from jobs;"""
|
||||
connection.execute("pragma journal_mode=wal")
|
||||
cursor = connection.cursor()
|
||||
cursor.execute(cmd_read_chunk)
|
||||
|
||||
#cursor.fetchmany(skiprows)#drop rows
|
||||
while(len(rows)!=0):
|
||||
isLink = True
|
||||
rows = []
|
||||
|
||||
rows = cursor.fetchmany(256)
|
||||
h = httplib2.Http()
|
||||
for row in rows:
|
||||
print("row: ",row[0])
|
||||
try:
|
||||
(resp,content) = h.request(row[0], 'HEAD')
|
||||
except IOError as e:
|
||||
print("link is no valid URL so remove item")
|
||||
print("error: ",e)
|
||||
isLink = False
|
||||
except httplib2.error.RelativeURIError:
|
||||
isLink = False
|
||||
print("RelativeURIError: Not a valid link")
|
||||
#rm_cursor = connection.cursor()
|
||||
#rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ?;""",(row[0],))
|
||||
finally:
|
||||
try:
|
||||
resp
|
||||
except NameError:
|
||||
print("Not a valid link")
|
||||
rm_cursor = connection.cursor()
|
||||
rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ? AND star != 1;""",(row[0],))
|
||||
else:
|
||||
if resp.status >= 400 or isLink == False:
|
||||
print("link is no more valid, remove item")
|
||||
rm_cursor = connection.cursor()
|
||||
rm_itm = rm_cursor.execute("DELETE from jobs WHERE link = ? AND star != 1;",(row[0],))
|
||||
print ("Deletion resultet in: ", rm_itm)
|
||||
print("result of commit: ", connection.commit())
|
||||
return 0
|
||||
|
||||
print("NEW_ENTRY")
|
||||
cursor.execute("INSERT INTO jobs (star,tag,title,company,location,link,pubdate,hash) VALUES (?,?,?,?,?,?,?,?)",(job.starred,job.tag,job.title,job.company,job.location,job.link,job.date,hash1))
|
||||
|
194
lib/gui.py
194
lib/gui.py
@ -1,61 +1,20 @@
|
||||
from PySide6.QtWidgets import QApplication, QWidget, QMainWindow, QTableWidget, QVBoxLayout, QTableWidgetItem, QPushButton, QHBoxLayout, QTableView, QLineEdit, QDialog, QLabel, QTextEdit, QCheckBox, QComboBox, QStyledItemDelegate
|
||||
from PySide6.QtWidgets import QApplication, QWidget, QMainWindow, QTableWidget, QVBoxLayout, QTableWidgetItem, QPushButton, QHBoxLayout, QTableView, QLineEdit, QDialog, QLabel, QTextEdit, QCheckBox, QComboBox
|
||||
from PySide6.QtWebEngineWidgets import QWebEngineView
|
||||
from PySide6.QtCore import QUrl,Qt,QSortFilterProxyModel, qDebug, QSize,QObject,QThread,Signal,QAbstractTableModel, Slot
|
||||
from PySide6.QtCore import QUrl,Qt,QSortFilterProxyModel, qDebug, QSize,QObject,QThread,Signal
|
||||
from PySide6.QtSql import QSqlDatabase, QSqlTableModel, QSqlQueryModel, QSqlQuery
|
||||
from PySide6 import QtGui
|
||||
from db import addFineFilter
|
||||
|
||||
|
||||
import sysparse
|
||||
import sys
|
||||
import db as db
|
||||
from qsqlmod import SqlQueryModel_editable
|
||||
|
||||
DEBUG = True
|
||||
def log(*s):
|
||||
if DEBUG:
|
||||
print(s)
|
||||
|
||||
DBFILE = "../db/sqlite3.db"
|
||||
|
||||
Cantons = ["AG","ZH","BE","SG","SO"]
|
||||
|
||||
class ColorDelegate(QStyledItemDelegate):
|
||||
currentRow = 0
|
||||
starred = 0
|
||||
|
||||
def __init__(self,main):
|
||||
super().__init__()
|
||||
print("initialice overload init of ColorDelegate")
|
||||
self.main = main
|
||||
def initStyleOption(self,option,index):
|
||||
super().initStyleOption(option,index)
|
||||
data = index.data()
|
||||
column = index.column()
|
||||
flag_viewed = self.main.sel_model.model().index(index.row(),8).data()
|
||||
try:
|
||||
flag_starred = int(self.main.sel_model.model().index(index.row(),0).data())
|
||||
except ValueError:
|
||||
print("probably empty string asign zero")
|
||||
flag_starred = 0
|
||||
if flag_starred == 1:
|
||||
option.backgroundBrush = QtGui.QColor("red")
|
||||
elif flag_viewed != 1:
|
||||
option.backgroundBrush = QtGui.QColor("green")
|
||||
else:
|
||||
option.backgroundBrush = QtGui.QColor("white")
|
||||
|
||||
class ValidationWorker(QObject):
|
||||
finished = Signal()
|
||||
def run(self):
|
||||
ret = db.isStillValid(DBFILE,0)
|
||||
if ret == 0:
|
||||
self.finished.emit()
|
||||
|
||||
class Worker(QObject):
|
||||
pwprompt = Signal()
|
||||
pw = Signal(str)
|
||||
finished = Signal()
|
||||
dialog_closed = True
|
||||
dialog_rejected = False
|
||||
password = ['empty']
|
||||
|
||||
def run(self):
|
||||
@ -70,11 +29,10 @@ class MainWindow(QMainWindow):
|
||||
self.w = None
|
||||
|
||||
self.cmdCanton = ''
|
||||
self.initcmd = 'SELECT * FROM jobs as b '
|
||||
self.initcmd = 'SELECT * FROM jobs as b'
|
||||
self.customcmd = ''
|
||||
self.cmd = ''
|
||||
self.setWindowTitle("DB_Inspector")
|
||||
self.isAWhere = False
|
||||
|
||||
self.layout = QVBoxLayout()
|
||||
self.layout2 = QHBoxLayout()
|
||||
@ -85,46 +43,26 @@ class MainWindow(QMainWindow):
|
||||
self.browser = QWebEngineView()
|
||||
self.browser.setUrl(QUrl("https://jobagent.ch"))
|
||||
|
||||
#self.EditQuery = QLineEdit()
|
||||
self.CEditQuery = QComboBox()
|
||||
self.CEditQuery.setEditable(True)
|
||||
self.CEditQuery.setInsertPolicy(QComboBox.InsertAtBottom)
|
||||
self.CEditQuery.setDuplicatesEnabled(True)
|
||||
self.queryFineFilers()
|
||||
self.EditQuery = self.CEditQuery.lineEdit()
|
||||
self.EditQuery = QLineEdit()
|
||||
self.EditQuery.returnPressed.connect(self.queryEditLine)
|
||||
|
||||
editables = {0 : ("UPDATE jobs SET star = '{}' WHERE hash = '{}'",7)}
|
||||
self.model = SqlQueryModel_editable(editables)
|
||||
|
||||
self.model = QSqlTableModel(self)
|
||||
self.model.setTable("jobs")
|
||||
self.model.select()
|
||||
|
||||
self.view = QTableView()
|
||||
self.view.setModel(self.model)
|
||||
|
||||
self.proxymodel2 = QSortFilterProxyModel(self)
|
||||
self.proxymodel2.setSourceModel(self.model)
|
||||
self.view.setModel(self.proxymodel2)
|
||||
self.setProxyViewSettings()
|
||||
|
||||
self.delegate = ColorDelegate(self)
|
||||
self.view.setItemDelegate(self.delegate)
|
||||
|
||||
self.setProxyViewSettings()
|
||||
self.view.activated.connect(self.cell_clicked)
|
||||
self.view.clicked.connect(self.cell_clicked)
|
||||
|
||||
self.sel_model = self.view.selectionModel()
|
||||
self.sel_model.selectionChanged.connect(self.cell_clicked)
|
||||
|
||||
self.PValidate = QPushButton("links valid")
|
||||
self.PValidate.clicked.connect(self.runValidation)
|
||||
|
||||
self.PsyncDB = QPushButton("Perform sync acording to config file")
|
||||
self.PsyncDB.clicked.connect(self.runWorker)
|
||||
|
||||
self.layout.addWidget(self.view)
|
||||
self.layout.addWidget(self.b_canton)
|
||||
self.layout.addWidget(self.CEditQuery)
|
||||
self.layout.addWidget(self.EditQuery)
|
||||
self.layout.addWidget(self.PsyncDB)
|
||||
self.layout.addWidget(self.PValidate)
|
||||
self.layout2.addLayout(self.layout)
|
||||
self.layout2.addWidget(self.browser)
|
||||
|
||||
@ -138,9 +76,7 @@ class MainWindow(QMainWindow):
|
||||
self.view.setColumnWidth(5,10)
|
||||
self.view.hideColumn(7)
|
||||
self.view.setSortingEnabled(True)
|
||||
#self.sel_model = self.view.selectionModel()
|
||||
#self.sel_model.selectionChanged.connect(self.cell_clicked)
|
||||
|
||||
self.view.clicked.connect(self.cell_clicked)
|
||||
def runWorker(self):
|
||||
self.thread = QThread()
|
||||
self.worker = Worker()
|
||||
@ -153,25 +89,9 @@ class MainWindow(QMainWindow):
|
||||
self.worker.pwprompt.connect(self.showDialog)
|
||||
self.worker.finished.connect(self.thread.quit)
|
||||
self.worker.finished.connect(self.enable_PsyncDB)
|
||||
|
||||
|
||||
self.thread.start()
|
||||
def runValidation(self):
|
||||
self.validationThread = QThread()
|
||||
self.validationWorker = ValidationWorker()
|
||||
|
||||
self.validationWorker.moveToThread(self.validationThread)
|
||||
|
||||
self.validationThread.started.connect(self.disableValidationButton)
|
||||
self.validationThread.started.connect(self.validationWorker.run)
|
||||
|
||||
self.validationThread.start()
|
||||
self.validationWorker.finished.connect(self.validationThread.quit)
|
||||
self.validationWorker.finished.connect(self.enableValidationButton)
|
||||
|
||||
def enableValidationButton(self):
|
||||
self.PValidate.setEnabled(True)
|
||||
def disableValidationButton(self):
|
||||
self.PValidate.setEnabled(False)
|
||||
|
||||
def disable_PsyncDB(self):
|
||||
self.PsyncDB.setText("Sync Running...")
|
||||
self.PsyncDB.setEnabled(False)
|
||||
@ -182,19 +102,17 @@ class MainWindow(QMainWindow):
|
||||
w = PWPrompt()
|
||||
w.set_MSG(self.worker.messageContent)
|
||||
ret = w.exec()
|
||||
if ret == QDialog.Rejected:
|
||||
self.worker.dialog_rejected = True
|
||||
log("[gui] qdialog.rejected set to TRUE")
|
||||
self.pw = w.pw
|
||||
self.worker.password = w.pw
|
||||
log("showDialog,self.pw:",self.pw)
|
||||
print("showDialog,self.pw:",self.pw)
|
||||
self.worker.dialog_closed=True
|
||||
if ret == QDialog.Rejected:
|
||||
return 1
|
||||
|
||||
def showQueryWindow(self,checked):
|
||||
if self.w is None:
|
||||
self.w = QueryWindow()
|
||||
self.w.show()
|
||||
self.w.queryFineFilers()
|
||||
def filter_canton(self,canton):
|
||||
if canton != "ALL":
|
||||
self.cmdCanton = f"""
|
||||
@ -205,25 +123,14 @@ class MainWindow(QMainWindow):
|
||||
print("cmd canton:", self.cmdCanton)
|
||||
|
||||
else:
|
||||
self.cmdCanton = ''
|
||||
print("disable fil§.ter")
|
||||
self.cmdCanton = ' '
|
||||
print("disable filter")
|
||||
# self.customSQL(self.cmd)
|
||||
|
||||
def queryEditLine(self):
|
||||
self.cmd = self.EditQuery.text()
|
||||
if self.customcmd or self.cmd:
|
||||
if self.cmdCanton:
|
||||
self.isAWhere = True
|
||||
connectingstring = " AND "
|
||||
else:
|
||||
self.isAWhere = False
|
||||
connectingstring = " WHERE "
|
||||
else:
|
||||
connectingstring = ' '
|
||||
print(self.initcmd + self.cmdCanton +connectingstring +self.customcmd + self.cmd)
|
||||
self.customSQL(self.initcmd+ self.cmdCanton + connectingstring + self.customcmd + self.cmd)
|
||||
if self.customcmd or self.cmd:
|
||||
addFineFilter("../db/sqlite3.db","filters",self.customcmd + self.cmd)
|
||||
print(self.initcmd + self.cmdCanton +self.customcmd + self.cmd)
|
||||
self.customSQL(self.initcmd+ self.cmdCanton + self.customcmd + self.cmd)
|
||||
|
||||
def cell_clicked(self):
|
||||
x = self.view.selectionModel().currentIndex().row()
|
||||
@ -231,37 +138,16 @@ class MainWindow(QMainWindow):
|
||||
data = self.view.model().index(x,5).data()
|
||||
print("cell clicked:",x," / ",y, "-->",data)
|
||||
self.browser.setUrl(QUrl(data))
|
||||
hash1 = self.view.model().index(x,7).data()
|
||||
print("hash of selected: ",hash1)
|
||||
#db.viewedEntry(hash1)
|
||||
self.view.selectionModel().currentIndex()
|
||||
self.model.setData({0,8},hash1,role=1001)
|
||||
self.view.updateGeometries()
|
||||
self.view.viewport().repaint()
|
||||
|
||||
def queryFineFilers(self):
|
||||
FineFilterItems = self.getFineFilters()
|
||||
for item in FineFilterItems:
|
||||
self.CEditQuery.addItem(item)
|
||||
def getFineFilters(self):
|
||||
item = []
|
||||
statement = f"""Select cmd FROM filters;"""
|
||||
query = QSqlQuery(statement)
|
||||
while query.next():
|
||||
item.append(query.value(0))
|
||||
return item
|
||||
|
||||
def customSQL(self,cmd):
|
||||
print("Run SQL Query",cmd)
|
||||
#self.model.setTable("")
|
||||
self.model.setTable("")
|
||||
self.model.setQuery(cmd +" ;")
|
||||
#self.model.setTable("jobs")
|
||||
while (self.model.canFetchMore()):
|
||||
print("fetch iterations++")
|
||||
self.model.fetchMore()
|
||||
|
||||
|
||||
self.view.show()
|
||||
self.proxymodel2 = QSortFilterProxyModel(self)
|
||||
self.proxymodel2.setSourceModel(self.model)
|
||||
self.view.setModel(self.proxymodel2)
|
||||
self.setProxyViewSettings()
|
||||
class PWPrompt(QDialog):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
@ -309,7 +195,6 @@ class QueryWindow(QWidget):
|
||||
self.CFilter.addItem("ALL")
|
||||
for Canton in Cantons:
|
||||
self.CFilter.addItem(Canton)
|
||||
|
||||
self.CFilter.currentTextChanged.connect(window.filter_canton)
|
||||
self.CFilter.currentTextChanged.connect(self.setTFilter)
|
||||
|
||||
@ -331,30 +216,25 @@ class QueryWindow(QWidget):
|
||||
items = self.getViews()
|
||||
for item in items:
|
||||
self.CShowViews.addItem(item)
|
||||
|
||||
self.CShowViews.currentTextChanged.connect(self.setView)
|
||||
|
||||
self.CShowFineFilters = QComboBox()
|
||||
|
||||
self.queryFineFilers()
|
||||
self.CShowFineFilters.currentTextChanged.connect(window.EditQuery.setText)
|
||||
|
||||
self.PApplyView = QCheckBox()
|
||||
self.PApplyView.setText("Apply View")
|
||||
self.PApplyView.clicked.connect(self.setView)
|
||||
|
||||
|
||||
self.vrLayout = QVBoxLayout()
|
||||
self.vrLayout.addWidget(self.LFilter)
|
||||
self.vrLayout.addWidget(self.CFilter)
|
||||
self.vrLayout.addWidget(self.LShowViews)
|
||||
self.vrLayout.addWidget(self.CShowViews)
|
||||
self.vrLayout.addWidget(self.PApplyView)
|
||||
self.vrLayout.addWidget(self.CShowFineFilters)
|
||||
|
||||
self.WvrLayout = QWidget()
|
||||
self.WvrLayout.setLayout(self.vrLayout)
|
||||
self.WvrLayout.setMaximumSize(QSize(200,200))
|
||||
|
||||
|
||||
self.hLayout = QHBoxLayout()
|
||||
self.hLayout.addLayout(self.vLayout)
|
||||
self.hLayout.addWidget(self.WvrLayout)
|
||||
@ -364,18 +244,6 @@ class QueryWindow(QWidget):
|
||||
self.EditQuery.setText(window.customcmd)
|
||||
|
||||
print("Comboshowview:",self.CShowViews.currentText())
|
||||
def queryFineFilers(self):
|
||||
FineFilterItems = self.getFineFilters()
|
||||
for item in FineFilterItems:
|
||||
self.CShowFineFilters.addItem(item)
|
||||
def getFineFilters(self):
|
||||
item = []
|
||||
statement = f"""Select cmd FROM filters;"""
|
||||
query = QSqlQuery(statement)
|
||||
while query.next():
|
||||
item.append(query.value(0))
|
||||
return item
|
||||
|
||||
|
||||
def getViews(self):
|
||||
item = []
|
||||
@ -392,10 +260,10 @@ class QueryWindow(QWidget):
|
||||
if self.PApplyView.isChecked():
|
||||
self.view = self.CShowViews.currentText()
|
||||
print("Selected View:",self.view)
|
||||
window.initcmd = f"""SELECT * FROM '{self.view}' """
|
||||
window.initcmd = f"""SELECT * FROM '{self.view}'"""
|
||||
print("window.initcmd:", window.initcmd)
|
||||
else:
|
||||
window.initcmd = f"""SELECT * FROM jobs as b """
|
||||
window.initcmd = f"""SELECT * FROM jobs as b """
|
||||
print("View unchecked")
|
||||
self.TInitCmd.setText(window.initcmd)
|
||||
|
||||
|
@ -3,18 +3,9 @@ import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from enum import Enum
|
||||
import re
|
||||
import shutil
|
||||
from dateconverter import *
|
||||
from datetime import datetime
|
||||
import os
|
||||
import sqlite3
|
||||
import webbrowser
|
||||
import mozilla
|
||||
|
||||
DEBUG = False
|
||||
number = ['0','1','2','3','4','5','6','7','8','9']
|
||||
|
||||
|
||||
|
||||
def log(*s):
|
||||
if DEBUG:
|
||||
@ -44,14 +35,10 @@ months = [
|
||||
('November','11'),
|
||||
('December','12')]
|
||||
class item():
|
||||
def __init__(self,tag,tag_content,index,name=None):
|
||||
def __init__(self,tag,tag_content,index):
|
||||
self.tag = tag
|
||||
self.tag_content = tag_content
|
||||
self.index = index
|
||||
if name is not None:
|
||||
self.name = name
|
||||
else:
|
||||
self.name = "not defined"
|
||||
|
||||
class job():
|
||||
def __init__(self,title,profession,company,location,date,description,link,tag,starred):
|
||||
@ -64,57 +51,37 @@ class job():
|
||||
self.link = link
|
||||
self.tag = tag
|
||||
self.starred = starred
|
||||
|
||||
def __str__(self):
|
||||
return "%s| %s|%s|%s|%s|%s|%s" % (self.title,self.profession,self.company,self.location,self.date,self.description,self.link)
|
||||
|
||||
def finder(results,item,**modes):
|
||||
GETCHILDREN = modes.get("GETCHILDREN",'')
|
||||
ATTRS = modes.get('ATTRS',0)
|
||||
LOCATION_CLEANUP = modes.get('LOCATION_CLEANUP',0)
|
||||
LINK = modes.get('LINK',0)
|
||||
SWAPDATE = modes.get('SWAPDATE',0)
|
||||
CLEANDATE = modes.get('CLEANDATE',0)
|
||||
BASEURL = modes.get('BASEURL','')
|
||||
INDEEDDATE = modes.get('INDEEDDATE',0)
|
||||
content = []
|
||||
i = item.index
|
||||
log("name",item.name)
|
||||
log("Item tag: ",item.tag)
|
||||
log("Modes:",modes)
|
||||
log("tag_content: ",item.tag_content)
|
||||
|
||||
for entry in results:
|
||||
if ATTRS==1:
|
||||
result = entry.findAll(item.tag,attrs=item.tag_content)
|
||||
log(item.tag_content)
|
||||
else:
|
||||
result = entry.findAll(item.tag,class_=item.tag_content)
|
||||
log("found count results:",len(result))
|
||||
if len(result)==0 and DEBUG == True:
|
||||
log("len result: ",len(result))
|
||||
for x in result:
|
||||
log("No entry found for: ",item.name,item.tag,item.tag_content," -->", x)
|
||||
input()
|
||||
log("found:",len(result))
|
||||
if result:
|
||||
log("theres a result")
|
||||
if i>(len(result)-1):
|
||||
log("len:",len(result)-1,"i:",i)
|
||||
log("index out of bounds fall back to the %d count",i)
|
||||
# input("Press Enter..")
|
||||
i=(len(result)-1)
|
||||
result2 = result[i]
|
||||
if GETCHILDREN != '':
|
||||
found = False
|
||||
for results in result:
|
||||
child = results.find(GETCHILDREN)
|
||||
log("[finder] search for '",GETCHILDREN,"' in: ",child)
|
||||
if child != None and found == False:
|
||||
log("CHILD text strip: ",child.text.strip())
|
||||
found = True
|
||||
content.append(child.text.strip())
|
||||
if found == False:
|
||||
log("[finder] No matching Child found: ",child)
|
||||
content.append("CHILD_NOT_FOUND: " + GETCHILDREN)
|
||||
|
||||
elif LOCATION_CLEANUP==1:
|
||||
if LOCATION_CLEANUP==1:
|
||||
location = CleanLocation(result2.text.strip())
|
||||
content.append(location)
|
||||
elif LINK==1:
|
||||
@ -125,20 +92,14 @@ def finder(results,item,**modes):
|
||||
elif SWAPDATE==1:
|
||||
content.append(DateCHToUS(result2.text.strip()))
|
||||
elif CLEANDATE==1:
|
||||
log("[finder] pre cleandate:",result2.text.strip)
|
||||
content.append(jobs_ch_clean_date(result2.text.strip()))
|
||||
elif INDEEDDATE==1:
|
||||
log("[finder] pre indeeddate:",result2.text.strip)
|
||||
content.append(indeedExtractDays(result2.text.strip()))
|
||||
else:
|
||||
log(result2)
|
||||
content.append(result2.text.strip())
|
||||
if not result:
|
||||
if item.tag_content == "pubdate":
|
||||
today = datetime.today().strftime('%Y-%m-%d')
|
||||
if CLEANDATE:
|
||||
today = datetime.today().strftime('%Y-%M-%D')
|
||||
content.append(today)
|
||||
else:
|
||||
content.append("NOTFound")
|
||||
content.append("NOTFound")
|
||||
return content
|
||||
|
||||
|
||||
@ -153,10 +114,10 @@ def arrayToClass(titles,companys,locations,dates,links,tag):
|
||||
log("len:",len(titles))
|
||||
for i, title in enumerate(titles):
|
||||
jobs.append(job(title,"test_prof",companys[i],locations[i],dates[i],"test_desc",links[i],tag,0))
|
||||
log("class job:",jobs[i])
|
||||
log(jobs[i])
|
||||
return jobs
|
||||
else:
|
||||
log("Something went wrong unequal length of data arrays: ",len(titles),len(companys),len(locations),len(dates))
|
||||
print("Something went wrong unequal length of data arrays")
|
||||
return 0
|
||||
def jobs_ch_clean_date(date):
|
||||
newdate=''
|
||||
@ -203,25 +164,3 @@ def makeSession(url):
|
||||
with requests.Session() as session:
|
||||
page = session.get(url)
|
||||
return session
|
||||
def indeedExtractDays(datestr):
|
||||
cleannumstr=''
|
||||
cleannumint=-1
|
||||
cleandate=''
|
||||
foundchar=False
|
||||
for a in datestr:
|
||||
print(a)
|
||||
if a in number and foundchar==False:
|
||||
foundchar=True
|
||||
cleannumstr+=a
|
||||
elif a in number and foundchar == True:
|
||||
cleannumstr+=a
|
||||
elif a not in number and foundchar == True:
|
||||
break
|
||||
if cleannumstr != '':
|
||||
cleannumint = int(cleannumstr)
|
||||
today = int(datetime.utcnow().timestamp())
|
||||
cleandate = today - cleannumint * 60 * 60 * 7 * 4
|
||||
#print("int:",cleannumint,"today:",today,"cleandate:",datetime.fromtimestamp(cleandate).strftime('%Y-%m-%d'))
|
||||
return datetime.fromtimestamp(cleandate).strftime('%Y-%m-%d')
|
||||
return "NOTFound"
|
||||
|
||||
|
44
lib/login.py
44
lib/login.py
@ -1,5 +1,4 @@
|
||||
import requests
|
||||
from requests_html import HTMLSession
|
||||
from helpers import *
|
||||
def login(entry):
|
||||
user = entry.user
|
||||
@ -7,18 +6,17 @@ def login(entry):
|
||||
loginurl = entry.loginurl
|
||||
scrapurl = entry.scrapurl
|
||||
with requests.Session() as session:
|
||||
session.headers = {
|
||||
headers = {
|
||||
"Host": "www.jobagent.ch",
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:124.0) Gecko/20100101 Firefox/124.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
# "Content-Type": "application/x-www-form-urlencoded",
|
||||
# "Content-Length": "58",
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Content-Length": "58",
|
||||
"Origin": "https://www.jobagent.ch",
|
||||
"Connection": "keep-alive",
|
||||
"Referer": "https://jobagent.ch",
|
||||
#"Cookie": "datadome=BbGio7V9YBqYELb~B2a7DqE9Zr5EWb315OypbcxGQOFKbhkJR48etFSooYwtnKF2sK5leCh7Q_0o6W5YMwl0qEH~Fw3BU0m~48MgrkuaxO3Z1s5MTqCnTZVW3PcQv7KM; _uc=ad_storage=granted:analytics_storage=granted; _gcl_au=1.1.1328234550.1724056973.1502078804.1724062620.1724062680; _ga=GA1.1.1840632117.1724056971; _ga_T0E2JNNRW2=GS1.1.1724062555.3.1.1724062683.0.1.2098134382; JSESSIONID=AB8CC398C2576A6A87C53A74CCD8F7F5; _pin_unauth=dWlkPU56Y3pabU14WW1JdFptTTFNeTAwTkdFMkxUbGlZV0V0TWprNVkyTXpZemd4WldNNA; _fbp=fb.1.1724056975123.543612652217076856; _clck=16bp9by%7C2%7Cfog%7C0%7C1692; _clsk=1o7y6b9%7C1724062683361%7C9%7C1%7Cu.clarity.ms%2Fcollect; _rm=ai53eXNzJTQwa29sYWJub3cuY2g6MTcyNjY1NDY4MTA0NDpTSEEyNTY6ZGRkMmZhYTRjZWY3MWZkZDU1M2VlMTI4ZjYzOGY1NmFiYmRkNjNiMmI3ZjE1NWRhNmU3YzcwZWU1NjQ2Mjc0Mw; _uetsid=0737af805e0711efbe7bdd027b00b063; _uetvid=0737b3005e0711efb7c7035382896421",
|
||||
# "Connection": "keep-alive",
|
||||
"Referer": "https://www.jobagent.ch/user/login",
|
||||
# "Upgrade-Insecure-Requests": "1",
|
||||
# "Sec-Fetch-Dest": "document",
|
||||
# "Sec-Fetch-Mode": "navigate",
|
||||
@ -26,35 +24,15 @@ def login(entry):
|
||||
# "DNT": "1",
|
||||
# "Sec-GPC": "1"
|
||||
}
|
||||
|
||||
r = session.get(loginurl)
|
||||
payload = {"redirectUrl":"","email":user,"password":pw}
|
||||
resp = session.post(loginurl,data=payload)
|
||||
resp = session.post(loginurl,data=payload,headers=headers)
|
||||
print(payload)
|
||||
checkBlockers(session,resp)
|
||||
r = session.get(scrapurl)
|
||||
print(session.headers)
|
||||
print("response:",r)
|
||||
return session
|
||||
#solveCaptcha when :
|
||||
#string "captcha" is in response
|
||||
#search for <iframe
|
||||
#get src tag
|
||||
#open a webbrowser to solve the captcha
|
||||
#somehow getting the cookie maype?
|
||||
|
||||
def solveCaptcha(session,resp):
|
||||
print("response:",resp)
|
||||
if "captcha" or "Enable JavaScript" in resp :
|
||||
print("captcha link!! found:")
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def checkBlockers(session,resp):
|
||||
print("response from login attempt",resp)
|
||||
if resp:
|
||||
print("response from login attempt",resp.url)
|
||||
if resp.url == 'https://www.jobagent.ch/user/login?error':
|
||||
print("Error on login")
|
||||
return -1
|
||||
solveCaptcha(session,resp)
|
||||
r = session.get(scrapurl)
|
||||
|
||||
return session
|
||||
|
@ -1,42 +0,0 @@
|
||||
|
||||
def urlToDomain(url):
|
||||
pos = patternSearch(url,"https://")
|
||||
urlCut = dropBeforePos(url,pos)
|
||||
posDot = skipAfterChar(urlCut,'.') - 1
|
||||
urlCut = dropBeforePos(urlCut,posDot)
|
||||
posDot = skipAfterChar(urlCut,'/')
|
||||
urlCut = dropAfterPos(urlCut,posDot)
|
||||
print("url after cut dot:",urlCut)
|
||||
return urlCut
|
||||
|
||||
def patternSearch(url,pattern):
|
||||
x = 0
|
||||
for a,i in enumerate(url):
|
||||
print("i:",i)
|
||||
if i == pattern[x]:
|
||||
if x<len(pattern)-1:
|
||||
x = x + 1
|
||||
elif x==len(pattern)-1:
|
||||
print("FULL PATTERN FOUND at pos :",a)
|
||||
break
|
||||
else:
|
||||
x = 0
|
||||
return a
|
||||
def skipAfterChar(aString,char):
|
||||
for a,i in enumerate(aString):
|
||||
if i == char:
|
||||
break
|
||||
return a
|
||||
def dropBeforePos(aString,pos):
|
||||
aString2=''
|
||||
pos+=1
|
||||
if pos < len(aString):
|
||||
for i in range(pos,len(aString)):
|
||||
aString2 += aString[i]
|
||||
return aString2
|
||||
def dropAfterPos(aString,pos):
|
||||
aString2=''
|
||||
if pos < len(aString):
|
||||
for i in range(0,pos):
|
||||
aString2 += aString[i]
|
||||
return aString2
|
@ -1,74 +0,0 @@
|
||||
#access cookies from firefox:
|
||||
#copy (because locked): cp .mozilla/firefox/imibizoh.default/cookies.sqlite cookies.sqlite
|
||||
#Select value from moz_cookies where host like '%indeed%'
|
||||
import webbrowser
|
||||
import tempfile
|
||||
import os
|
||||
import sqlite3
|
||||
import shutil
|
||||
from time import sleep
|
||||
import manipulateString as ms
|
||||
|
||||
DEBUG = True
|
||||
def log(*s):
|
||||
if DEBUG:
|
||||
print(s)
|
||||
|
||||
def findDefaultProfile(path):
|
||||
target = ''
|
||||
dirlist = os.listdir(path)
|
||||
for directory in dirlist:
|
||||
posDot = ms.skipAfterChar(directory,'.')
|
||||
stringParse = ms.dropBeforePos(directory,posDot)
|
||||
log(stringParse)
|
||||
if stringParse == "default":
|
||||
target = directory
|
||||
break;
|
||||
if target == '':
|
||||
return -1
|
||||
else:
|
||||
return target
|
||||
|
||||
def getCookiesFromBrowser(url,force=False):
|
||||
DBFILE = "../db/sqlite3.db"
|
||||
if os.name == 'posix':
|
||||
homePath = os.path.expanduser('~')
|
||||
cookiePath = homePath + "/.mozilla/firefox/" + findDefaultProfile(homePath + "/.mozilla/firefox/") + "/cookies.sqlite"
|
||||
tmpPath = "/tmp/cookies.sqlite"
|
||||
if os.name == 'nt':
|
||||
appdata = os.getenv('APPDATA')
|
||||
winCookiePath = appdata + "\\Mozilla\\Firefox\\Profiles\\" + findDefaultProfile(appdata + "\\Mozilla\\Firefox\\Profiles\\") + "cookies.sqlite"
|
||||
winFirefoxPath = "C:\\Program Files\\Mozilla Firefox\\firefox.exe"
|
||||
tmpPath = tempfile.gettempdir() + "\\cookies.sqlite"
|
||||
|
||||
tries=0
|
||||
cookie = ''
|
||||
rows = [0]
|
||||
while(cookie == '' and tries < 2):
|
||||
tries+=1
|
||||
if os.name == 'posix':
|
||||
shutil.copyfile(cookiePath,tmpPath)
|
||||
elif os.name == 'nt':
|
||||
shutil.copyfile(winCookiePath,tmpPath)#workaround for loked database
|
||||
with sqlite3.connect(tmpPath) as connection:
|
||||
cmd_read_cookies = f"""SELECT name,value FROM moz_cookies WHERE host like ?;"""
|
||||
print(cmd_read_cookies)
|
||||
cursor = connection.cursor()
|
||||
cursor.execute(cmd_read_cookies,(ms.urlToDomain(url),))
|
||||
while len(rows)!=0:
|
||||
rows = cursor.fetchmany(25)
|
||||
for row in rows:
|
||||
print("row:",row)
|
||||
cookie = cookie + row[0] + '=' + row[1]
|
||||
cookie += ";"
|
||||
|
||||
print("Cookies:",cookie)
|
||||
if cookie == '' and force == False:
|
||||
if os.name == 'posix':
|
||||
webbrowser.register("firefox",None,webbrowser.BackgroundBrowser("firefox"))
|
||||
webbrowser.get('firefox').open(url)
|
||||
elif os.name == 'nt':
|
||||
webbrowser.register("firefox",None,webbrowser.BackgroundBrowser(winFirefoxPath))
|
||||
webbrowser.get('firefox').open(url)
|
||||
sleep(1)
|
||||
return cookie
|
@ -1,82 +0,0 @@
|
||||
#modifie the QSqlQueryModel to be editable and sets them to the database
|
||||
from PySide6.QtSql import QSqlQueryModel, QSqlQuery
|
||||
from PySide6.QtCore import Qt
|
||||
#credits to :
|
||||
#https://stackoverflow.com/questions/49752388/editable-qtableview-of-complex-sql-query
|
||||
|
||||
|
||||
class SqlQueryModel_editable(QSqlQueryModel):
|
||||
"""a subclass of QSqlQueryModel where individual columns can be defined as editable
|
||||
"""
|
||||
def __init__(self, editables):
|
||||
"""editables should be a dict of format:
|
||||
{INT editable_column_nr : (STR update query to be performed when changes are made on this column
|
||||
INT model's column number for the filter-column (used in the where-clause),
|
||||
)}
|
||||
"""
|
||||
super().__init__()
|
||||
self.editables = editables
|
||||
self.updatelist = []
|
||||
|
||||
def flags(self, index):
|
||||
fl = QSqlQueryModel.flags(self, index)
|
||||
if index.column() in self.editables:
|
||||
fl |= Qt.ItemIsEditable
|
||||
return fl
|
||||
|
||||
def setData(self, index, value, role=Qt.EditRole):
|
||||
print("role: ",role)
|
||||
if role == Qt.EditRole:
|
||||
mycolumn = index.column()
|
||||
if mycolumn in self.editables:
|
||||
(query, filter_col) = self.editables[mycolumn]
|
||||
filter_value = self.index(index.row(), filter_col).data()
|
||||
q = QSqlQuery(query.format(value, filter_value))
|
||||
result = q.exec_()
|
||||
if result:
|
||||
self.query().exec_()
|
||||
# print("filter_value:",filter_value)
|
||||
print("setdata query: ",query.format(value, filter_value))
|
||||
print("index.row:",index.row(), "filter_col:",filter_col)
|
||||
else:
|
||||
print(self.query().lastError().text())
|
||||
return result
|
||||
elif role == 1001:
|
||||
result = 0
|
||||
self.updatelist.append(value)
|
||||
print(self.updatelist)
|
||||
if len(self.updatelist) >= 5:
|
||||
for x in self.updatelist:
|
||||
print("Atempt flaging view")
|
||||
q = QSqlQuery("UPDATE jobs SET viewed = '1' WHERE hash = {}".format(x))
|
||||
print("QSQLQuery: ", "UPDATE jobs SET viewed = '1' WHERE hash = {}".format(x))
|
||||
result = q.exec_()
|
||||
if result:
|
||||
self.query().exec_()
|
||||
else:
|
||||
print("Error:", self.query().lastError().text())
|
||||
return result
|
||||
self.updatelist = []
|
||||
return result
|
||||
self.dataChanged.emit(index-100,index+100)
|
||||
self.layoutChanged.emit()
|
||||
return QSqlQueryModel.setData(self, index, value, role)
|
||||
|
||||
|
||||
# view = QTableView()
|
||||
#
|
||||
# editables = {1 : ("UPDATE Manufacturers SET Country = '{}' WHERE Company = '{}'", 2)}
|
||||
# model = SqlQueryModel_editable(editables)
|
||||
# query = '''
|
||||
# SELECT (comp.company || " " || cars.model) as Car,
|
||||
# comp.Country,
|
||||
# cars.company,
|
||||
# (CASE WHEN cars.Year > 2000 THEN 'yes' ELSE 'no' END) as this_century
|
||||
# from manufacturers comp left join cars
|
||||
# on comp.company = cars.company
|
||||
# '''
|
||||
# q = QSqlQuery(query)
|
||||
# model.setQuery(q)
|
||||
# model.setFilter("cars.Company = 'VW'")
|
||||
# view.setModel(model)
|
||||
# view.hideColumn(2)
|
@ -1,117 +1,70 @@
|
||||
from helpers import *
|
||||
from login import solveCaptcha
|
||||
import mozilla
|
||||
DEBUG = True
|
||||
DEBUG = False
|
||||
|
||||
def log(*s):
|
||||
if DEBUG:
|
||||
print(s)
|
||||
|
||||
def scrap_indeed_com(url,entry,session):
|
||||
moz_cookies = mozilla.getCookiesFromBrowser(url)
|
||||
print("[scrap]cookies:", moz_cookies)
|
||||
session.headers = {
|
||||
"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0",
|
||||
"Referer" : "https://ch.indeed.com/jobs?&from=searchOnHP",
|
||||
"Cookie" : moz_cookies
|
||||
}
|
||||
def indeed_com(url,session):
|
||||
jobs = []
|
||||
log("in scrap jobs,url",url)
|
||||
if(session == 0 or session == -1):
|
||||
with requests.Session() as session:
|
||||
page = session.get(url)
|
||||
log(page)
|
||||
else:
|
||||
page = session.get(url)
|
||||
log(page)
|
||||
if solveCaptcha(session,page) == 1:
|
||||
print("Cookie stealing unsuccesfull retry with force")
|
||||
moz_cookies = mozilla.getCookiesFromBrowser(url,force=True)
|
||||
|
||||
soup = BeautifulSoup(page.content,"html.parser")
|
||||
#print(soup.prettify())
|
||||
|
||||
|
||||
results = soup.find_all("li",class_= 'css-5lfssm eu4oa1w0') #top level list element
|
||||
|
||||
location = item("div",{'data-testid':'text-location'},0,"indeed location")
|
||||
ar_location = finder(results,location,ATTRS=1,LOCATION_CLEANUP=1)
|
||||
|
||||
company = item("span",{'data-testid':'company-name'},0,"indeed company")
|
||||
ar_company = finder(results,company,ATTRS=1)
|
||||
|
||||
title = item("a",'jcs-JobTitle',0,"indeed title")
|
||||
ar_title = finder(results,title,GETCHILDREN="span")
|
||||
|
||||
date = item("span",{'data-testid':'myJobsStateDate'},0,"indeed date")
|
||||
ar_date = finder(results,date,ATTRS=1,INDEEDDATE=1)
|
||||
|
||||
link = item("a",'jcs-JobTitle',0,"link")
|
||||
ar_link = finder(results,link,LINK=1,BASEURL="https://ch.indeed.com")
|
||||
|
||||
tag = entry.tag#get from config
|
||||
return arrayToClass(ar_title,ar_company,ar_location,ar_date,ar_link,tag)
|
||||
|
||||
def scrap_jobs(url,entry,session):
|
||||
session.headers = {
|
||||
"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0"
|
||||
}
|
||||
jobs = []
|
||||
log("in scrap jobs,url",url)
|
||||
if(session == 0 or session == -1):
|
||||
with requests.Session() as session:
|
||||
page = session.get(url)
|
||||
log(page)
|
||||
else:
|
||||
page = session.get(url)
|
||||
log(page)
|
||||
soup = BeautifulSoup(page.content,"html.parser")
|
||||
#print(soup.prettify())
|
||||
|
||||
results = soup.find_all("div",attrs={'data-feat':'searched_jobs'})
|
||||
|
||||
location_class = "d_grid items_start gap_s12 grid-cols_[auto_1fr] px_s8"
|
||||
location = item("div",location_class,0,"location")
|
||||
ar_location = finder(results,location,GETCHILDREN='p',LOCATION_CLEANUP=1)
|
||||
|
||||
company_class = "mb_s12 lastOfType:mb_s0 textStyle_p2"
|
||||
company = item("p",company_class,0,"company")
|
||||
ar_company = finder(results,company,DEFAULT=1,GETCHILDREN='strong')
|
||||
|
||||
title = item("span","text_link.brand.base",0,"TITLE")
|
||||
ar_title = finder(results,title,DEFAULT=1)
|
||||
|
||||
date = item("span","pos_absolute",0,"date")
|
||||
ar_date = finder(results,date,CLEANDATE=1)
|
||||
|
||||
link = item("a",{'data-cy' :'job-link'},0,"link")
|
||||
ar_link = finder(results,link,LINK=1,ATTRS=1,BASEURL="https://jobs.ch")
|
||||
|
||||
tag = entry.tag#get from config
|
||||
return arrayToClass(ar_title,ar_company,ar_location,ar_date,ar_link,tag)
|
||||
|
||||
def next_url_indeed_com(url,session,baseurl):
|
||||
next_link_str = ''
|
||||
if(session == 0):
|
||||
with requests.Session() as session:
|
||||
page = session.get(url)
|
||||
print(page)
|
||||
else:
|
||||
page = requests.get(url)
|
||||
page = session.get(url)
|
||||
print(page)
|
||||
soup = BeautifulSoup(page.content,"html.parser")
|
||||
result_next = soup.findAll("nav",attrs={"role":"navigation"})
|
||||
next_=item("a",{'data-testid':'pagination-page-next'},0)
|
||||
next_link = finder(result_next,next_,ATTRS=1,LINK=1)
|
||||
if next_link:
|
||||
if(next_link[0] != "NOTFound"):
|
||||
next_link_str = str(next_link[0])
|
||||
next_link_str = baseurl + next_link_str
|
||||
log(next_link_str)
|
||||
#print(soup.prettify())
|
||||
|
||||
results = soup.find_all("li",class_= 'css-5lfssm eu4oa1w0')
|
||||
|
||||
location = item("p",{'data-testid':'text-location'},0)
|
||||
ar_location = finder(results,location,LOCATION_CLEANUP=1,ATTRS=1)
|
||||
|
||||
company = item("p",{'data-testid':'company-name'},0)
|
||||
ar_company = finder(results,location,ATTRS=1)
|
||||
|
||||
title = item("a",'jobTitle',0)
|
||||
ar_title = finder(results,location)
|
||||
|
||||
date = item("span","Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 krGudM hUhFmL",0)
|
||||
ar_date = finder(results,date,CLEANDATE=1)
|
||||
|
||||
def scrap_jobs(url,entry,session):
|
||||
jobs = []
|
||||
log("in scrap jobs,url",url)
|
||||
if(session == 0):
|
||||
with requests.Session() as session:
|
||||
page = session.get(url)
|
||||
log(page)
|
||||
else:
|
||||
return 0
|
||||
if next_link_str != '':
|
||||
return next_link_str
|
||||
else:
|
||||
return 0
|
||||
page = session.get(url)
|
||||
log(page)
|
||||
soup = BeautifulSoup(page.content,"html.parser")
|
||||
#print(soup.prettify())
|
||||
|
||||
results = soup.find_all("div",attrs={"data-feat":"searched_jobs"})
|
||||
|
||||
location_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn"
|
||||
location = item("p",location_class,0)
|
||||
ar_location = finder(results,location,LOCATION_CLEANUP=1)
|
||||
|
||||
company_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn"
|
||||
company = item("p",company_class,3)
|
||||
ar_company = finder(results,company,DEFAULT=1)
|
||||
|
||||
title = item("span","Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 VacancyItem___StyledText2-sc-iugtv6-5 iaJYDR jlFpCz dMwMcR",0)
|
||||
ar_title = finder(results,title,DEFAULT=1)
|
||||
|
||||
date = item("span","Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 krGudM hUhFmL",0)
|
||||
ar_date = finder(results,date,CLEANDATE=1)
|
||||
|
||||
link = item("a","Link__ExtendedRR6Link-sc-czsz28-1 khAvCu Link-sc-czsz28-2 VacancyLink___StyledLink-sc-ufp08j-0 dXKwhi dDgwgk",0)
|
||||
ar_link = finder(results,link,LINK=1,BASEURL="https://jobs.ch")
|
||||
|
||||
tag = entry.tag#get from config
|
||||
return arrayToClass(ar_title,ar_company,ar_location,ar_date,ar_link,tag)
|
||||
|
||||
def next_url_jobs_ch(url,session,baseurl):
|
||||
next_link_str = ''
|
||||
if(session == 0):
|
||||
@ -156,7 +109,7 @@ def next_url_jobagent(base_url,session,c):#depreacted will be removed in the fut
|
||||
for i2 in next_url_names:
|
||||
striped_string = i2.text.strip()
|
||||
log(i2.text.strip(),"stripped:",striped_string)
|
||||
log("Printable characters?",striped_string.isprintable())
|
||||
# print("Printable characters?",striped_string.isprintable())
|
||||
if (striped_string) == "Nächste Seite":
|
||||
log(i2)
|
||||
next_url = i2.get("href")
|
||||
@ -170,34 +123,29 @@ def next_url_jobagent(base_url,session,c):#depreacted will be removed in the fut
|
||||
|
||||
def scrap_jobagent(url,entry,session):
|
||||
jobs = []
|
||||
log("[scrap_jobagent],url",url)
|
||||
if(session == 0 or session == -1):
|
||||
log("session not sucessful transmitted ",session)
|
||||
log("in scrap jobs,url",url)
|
||||
if(session == 0):
|
||||
with requests.Session() as session:
|
||||
page = session.get(url)
|
||||
log(page)
|
||||
else:
|
||||
page = session.get(url)
|
||||
page = session.get(url)
|
||||
log("[scrap_jobagent]page:",page)
|
||||
log(page)
|
||||
soup = BeautifulSoup(page.content,"html.parser")
|
||||
print(soup.prettify())
|
||||
print(session.headers)
|
||||
#print(soup.prettify())
|
||||
|
||||
results = soup.find_all("li",class_="item")
|
||||
if not results:
|
||||
print("no li items found")
|
||||
log("page:",page)
|
||||
|
||||
title = item("span","jobtitle",0,"jobagent title")
|
||||
title = item("span","jobtitle",0)
|
||||
ar_title = finder(results,title)
|
||||
|
||||
location = item("span","location",0,"jobagent location")
|
||||
location = item("span","location",0)
|
||||
ar_location = finder(results,location,LOCATION_CLEANUP=1)
|
||||
|
||||
company = item("span","company",0,"jobagent company")
|
||||
company = item("span","company",0)
|
||||
ar_company = finder(results,company,DEFAULT=1)
|
||||
|
||||
link = item("a","title",0,"jobagent link")
|
||||
link = item("a","title",0)
|
||||
ar_link = finder(results,link,LINK=1)
|
||||
|
||||
date = item("span","pubdate",0)
|
||||
@ -206,4 +154,3 @@ def scrap_jobagent(url,entry,session):
|
||||
|
||||
return arrayToClass(ar_title,ar_company,ar_location,ar_date,ar_link,tag)
|
||||
|
||||
|
||||
|
@ -7,11 +7,6 @@ from login import *
|
||||
from time import sleep
|
||||
from db import *
|
||||
|
||||
DEBUG = True
|
||||
def log(*s):
|
||||
if DEBUG:
|
||||
print(s)
|
||||
|
||||
def choose_scraper(entry,session):
|
||||
if not session:
|
||||
session = requests.Session()
|
||||
@ -23,8 +18,6 @@ def choose_scraper(entry,session):
|
||||
runner(entry,session,scrap_jobagent,next_url_jobagent)
|
||||
case 'https://www.jobagent.ch':
|
||||
runner(entry,session,scrap_jobagent,next_url_jobagent)
|
||||
case 'https://ch.indeed.com':
|
||||
runner(entry,session,scrap_indeed_com,next_url_indeed_com)
|
||||
|
||||
def parse(**kwargs):
|
||||
session=0
|
||||
@ -39,11 +32,11 @@ def parse(**kwargs):
|
||||
# parser.add_argument("--help",help = "print help")
|
||||
parser.add_argument("--login",nargs=3,help = "login by specifing login and passwor by a given url",metavar=('USERNAME','PASSWORD','URL'))
|
||||
parser.add_argument("--createnwview",help = "Create a VIEW for the Region Nordwest Schweiz",action="store_true")
|
||||
parser.add_argument("-VC","--ValidationCheck",help = "Check if links are still valid, if not remove them",action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.test:
|
||||
addFineFilter("../db/sqlite3.db","filters","testfilterentry")
|
||||
session = makeSession(sys.argv[args.test])
|
||||
choose_scraper(arg.test,session)
|
||||
if args.importregiondb:
|
||||
importdb("../db/sqlite3.db","../db/Cantons.db","Cantons")
|
||||
if args.initdb:
|
||||
@ -58,8 +51,7 @@ def parse(**kwargs):
|
||||
login_loop(args.config,False,worker)
|
||||
if args.createnwview:
|
||||
createnwview("../db/sqlite3.db")
|
||||
if args.ValidationCheck:
|
||||
isStillValid("../db/sqlite3.db")
|
||||
|
||||
if len(kwargs)>0:
|
||||
print("no sysargs fiven, running as a module")
|
||||
vconfig = kwargs.get('config')
|
||||
@ -76,65 +68,33 @@ def login_loop(config_file,gui,worker):
|
||||
ret_login = 0
|
||||
session = 0
|
||||
while (ret != 0):
|
||||
if gui:
|
||||
worker.dialog_rejected = False
|
||||
ret = entry2 = config.readConfig(config_file,gui,worker)
|
||||
print(entry2)
|
||||
if(ret != 0 and ret_login != 1):
|
||||
if(entry2.loginurl != 'NONE'):
|
||||
session = -1
|
||||
log("[pre while] worker.dialog_rejected = ",worker.dialog_rejected)
|
||||
worker.dialog_rejected = False
|
||||
while (session == -1 and worker.dialog_rejected == False):
|
||||
log("worker.dialog_rejected = ",worker.dialog_rejected)
|
||||
session = login(entry2)
|
||||
ret = entry2 = config.readConfig(config_file,gui,worker)
|
||||
print(entry2)
|
||||
if(ret != 0 and ret_login != 1):
|
||||
if(entry2.loginurl != 'NONE'):
|
||||
session = -1
|
||||
while session == -1:
|
||||
session = login(entry2)
|
||||
if session == -1:
|
||||
ret_login = entry2.input_pw(gui,entry2.user,worker)
|
||||
if worker.dialog_rejected == False:
|
||||
choose_scraper(entry2,session)
|
||||
if not gui:
|
||||
ret = entry2 = config.readConfig(config_file,gui,worker)
|
||||
#print(entry2)
|
||||
if(ret != 0 and ret_login != 1):
|
||||
if(entry2.loginurl != 'NONE'):
|
||||
session = -1
|
||||
while (session == -1):
|
||||
session = login(entry2)
|
||||
if session == -1:
|
||||
ret_login = entry2.input_pw(gui,entry2.user,worker)
|
||||
log("[login_loop] session:",session)
|
||||
choose_scraper(entry2,session)
|
||||
choose_scraper(entry2,session)
|
||||
|
||||
def runner(entry,session,scrap_func,next_url_func):
|
||||
i=0
|
||||
b_url = entry.scrapurl
|
||||
while b_url != 0 and i<50:
|
||||
sleep(0.5)
|
||||
sleep(0.3)
|
||||
if b_url:
|
||||
domain = extractDomain(b_url)
|
||||
print(domain)
|
||||
if domain == 'https://www.jobagent.ch' or domain == 'https://software-job.ch':
|
||||
jobs = scrap_func(b_url,entry,session)
|
||||
if jobs:
|
||||
writedb(jobs)
|
||||
else:
|
||||
print("nothing found on this page")
|
||||
writedb(jobs)
|
||||
b_url = next_url_func(b_url,session,0)
|
||||
elif domain == 'https://www.jobs.ch':
|
||||
jobs = scrap_func(b_url,entry,session)
|
||||
if jobs:
|
||||
writedb(jobs)
|
||||
else:
|
||||
print("nothing found on this page")
|
||||
writedb(jobs)
|
||||
b_url = next_url_func(b_url,session,"https://www.jobs.ch")
|
||||
elif domain == 'https://ch.indeed.com':
|
||||
jobs = scrap_func(b_url,entry,session)
|
||||
if jobs:
|
||||
writedb(jobs)
|
||||
else:
|
||||
print("nothing found on this page")
|
||||
b_url = next_url_func(b_url,session,domain)
|
||||
|
||||
|
||||
|
||||
if b_url != 0:
|
||||
print("main:" + b_url)
|
||||
|
28
makefile
28
makefile
@ -1,28 +0,0 @@
|
||||
#Works but is fucking big
|
||||
#Single file version:
|
||||
#pyinstaller lib/gui.py --onefile --add-data ./db/:./db
|
||||
pypath := /home/ccppi2/.wine/drive_c/users/ccppi2/AppData/Local/Programs/Python/Python312/
|
||||
all:gui cli datafiles
|
||||
|
||||
gui:
|
||||
pyinstaller lib/gui.py
|
||||
make datafiles
|
||||
cli:
|
||||
pyinstaller lib/main.py
|
||||
make datafiles
|
||||
datafiles:
|
||||
make db-dir
|
||||
cp ./db/Cantons.db dist/db/
|
||||
cp ./db/sqlite3.db dist/db/
|
||||
cp ./lib/conf dist/gui/
|
||||
db-dir:
|
||||
mkdir -p dist/db
|
||||
wine-build:
|
||||
wine64 $(pypath)/python.exe $(pypath)/Lib/site-packages/pyinstaller lib/gui.py
|
||||
wine-nukita:
|
||||
wine64 nuitka --standalone --enable-plugin=pyside6 --include-qt-plugins=sqldrivers --output-dir=dist/ lib/gui.py
|
||||
make datafiles
|
||||
wine-nuitka-zip:
|
||||
cd dist/; zip -r wine-nuitka/win64-nuitka.zip db/ gui.dist/
|
||||
clean:
|
||||
rm dist/db -r
|
@ -1,9 +1,5 @@
|
||||
beautifulsoup4==4.12.3
|
||||
httplib2==0.22.0
|
||||
mmh3==4.1.0
|
||||
#PySide6==6.7.1
|
||||
#PySide6==6.7.2
|
||||
#PySide6.egg==info
|
||||
#PySide6_Addons==6.7.1
|
||||
#PySide6_Essentials==6.7.1
|
||||
Requests==2.32.3
|
||||
numpy==1.26.4
|
||||
Requests==2.31.0
|
||||
pyside6
|
||||
|
Loading…
Reference in New Issue
Block a user