Compare commits

...

No commits in common. "threading" and "master" have entirely different histories.

19 changed files with 144 additions and 922 deletions

2
.gitignore vendored
View File

@ -4,5 +4,3 @@ output/
*.db
*.csv
__pycache__
dist/
build/

View File

@ -1,31 +0,0 @@
# Warning!!
If you run the sync to often it is very likely that your IP gets banned as bot. It is wise to use a vpn or a proxy, or just sync once or twice a day.
# Dependencies
Install the requirements.txt
We depend on firefox, the reason for this is to workaround cloundflares bot protection. This works by stealing cookies from the browser.
# First time usage
1. cd lib/
### Set up the database
2. python main.py --createdb
3. python main.py --importregiondb
# Config
edit lib/conf
# Main scripts
- cd lib/
For the cmd line use:
- python main.py --help
For the gui run:
- python gui.py
# Windows
There is a windows build in (https://5ccppi.org:3000/ccppi/job-scrapper/src/branch/threading/dist/wine-nuitka)

Binary file not shown.

View File

@ -1,44 +0,0 @@
# -*- mode: python ; coding: utf-8 -*-
a = Analysis(
['lib\\gui.py'],
pathex=[],
binaries=[],
datas=[],
hiddenimports=[],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=[],
noarchive=False,
optimize=0,
)
pyz = PYZ(a.pure)
exe = EXE(
pyz,
a.scripts,
[],
exclude_binaries=True,
name='gui',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
console=True,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None,
)
coll = COLLECT(
exe,
a.binaries,
a.datas,
strip=False,
upx=True,
upx_exclude=[],
name='gui',
)

View File

@ -1,26 +1,6 @@
[python_qt_indeed]
USER = NONE
PW = NONE
LOGINURL = NONE
SCRAPURL = https://ch.indeed.com/jobs?q=python+qt&
TAG = Informatiker, Python, qt
[quereinsteiger_indeed]
USER = NONE
PW = NONE
LOGINURL = NONE
SCRAPURL = https://ch.indeed.com/jobs?q=quereinsteiger+it
TAG = Informatiker, Quereinsteiger
[jobs.ch_seilbahn]
USER = NONE
PW = NONE
LOGINURL = NONE
SCRAPURL = https://www.jobs.ch/en/vacancies/?term=seilbahn
TAG = Seilbahn
[jobagent.ch]
USER = j.wyss@kolabnow.ch
USER = test@gmx.ch
PW = ASK
LOGINURL = https://www.jobagent.ch/user/login
SCRAPURL = https://www.jobagent.ch/search?terms=Automatiker&lra=0&as=0
@ -50,7 +30,7 @@ TAG = Informatiker,Linux
[jobagent.ch-2]
USER = j.wyss@kolabnow.ch
USER = test@gmx.ch
PW = ASK
LOGINURL = https://www.jobagent.ch/user/login
SCRAPURL = https://www.jobagent.ch/search?terms=Informatiker&lra=0&as=0

View File

@ -1,55 +0,0 @@
[jobagent.ch]
USER = test@gmx.ch
PW = ASK
LOGINURL = https://www.jobagent.ch/user/login
SCRAPURL = https://www.jobagent.ch/search?terms=Automatiker&lra=0&as=0
TAG = Automatiker
[software-job.ch-application-engineer]
USER = NONE
PW = NONE
LOGINURL = NONE
SCRAPURL = https://software-job.ch/application-engineer
TAG = Informatiker
[software-job.ch]
USER = NONE
PW = NONE
LOGINURL = NONE
SCRAPURL = https://software-job.ch/python-entwicklung
TAG = Informatiker,Python
[jobs.ch_linux]
USER = NONE
PW = NONE
LOGINURL = NONE
SCRAPURL = https://www.jobs.ch/en/vacancies/?term=linux
TAG = Informatiker,Linux
[jobagent.ch-2]
USER = test@gmx.ch
PW = ASK
LOGINURL = https://www.jobagent.ch/user/login
SCRAPURL = https://www.jobagent.ch/search?terms=Informatiker&lra=0&as=0
TAG = Informatiker
[jobs.ch]
USER = NONE
PW = NONE
LOGINURL = NONE
SCRAPURL= https://www.jobs.ch/en/vacancies/?term=automatiker
TAG = Automatiker
[jobs.ch_informatiker]
USER = NONE
PW = NONE
LOGINURL = NONE
SCRAPURL= https://www.jobs.ch/en/vacancies/?term=informatiker
TAG = Informatiker
#https://www.jobagent.ch/search?terms=Automatiker&workload=60-100&lra=0&as=0

View File

@ -18,7 +18,7 @@ class Entry:
if self.gui:
worker.messageContent = self.scrapurl
worker.dialog_closed=False
worker.pwprompt.emit() #signal to mainthread run showDialog and wait for close
worker.pwprompt.emit() #signal to mainthread
while not worker.dialog_closed:
time.sleep(1)
pass

View File

@ -1,5 +1,3 @@
import re
import datetime
def DateCHToUS(date):
#01.02.2010 --> 2010-02-01
day=""
@ -14,10 +12,3 @@ def DateCHToUS(date):
newdate = year+"-"+month+"-"+day
return(newdate)
def indeed_date(date):
redate = re.match('\d+',date)
fixdate = today().strftime("%Y/%m%d") - timedelta(days=redate.group())
print("date: today")
return fixdate

View File

@ -1,8 +1,6 @@
import sqlite3
import mmh3
import sys
#import requests
import httplib2
DEBUG = False
def log(*s):
@ -12,7 +10,7 @@ def initdb(file):
with sqlite3.connect(file) as connection:
print("db connection", connection.total_changes)
cursor = connection.cursor()
cursor.execute("CREATE TABLE jobs (star INT,tag INT ,title TEXT, location TEXT, company TEXT,link TEXT,pubdate TEXT,hash INT,viewed INT)")
cursor.execute("CREATE TABLE jobs (star TEXT,tag INT ,title TEXT, location TEXT, company TEXT,link TEXT,pubdate TEXT,hash INT)")
sys.exit()
def rmdb(file,table):
with sqlite3.connect(file) as connection:
@ -48,7 +46,7 @@ def importdb(file,importdb,table):
def createnwview(file):
with sqlite3.connect(file) as connection:
cmd_create_nw_table = f"""CREATE VIEW IF NOT EXISTS "Nordwest-SCHWEIZ" AS SELECT * FROM jobs as b
cmd_create_nw_table = f"""CREATE VIEW "Nordwest-SCHWEIZ" AS SELECT * FROM jobs as b
WHERE EXISTS
(SELECT GDENAME FROM Cantons as w
where w.GDEKT = 'ZH' AND
@ -64,25 +62,9 @@ def createnwview(file):
cursor = connection.cursor()
cursor.execute(cmd_create_nw_table)
print("db connection",connection.total_changes)
createFilterTable(file)
def createFilterTable(file):
with sqlite3.connect(file,timeout=10) as connection:
cmd_create_filter_table = f"""CREATE TABLE IF NOT EXISTS filters(cmd TEXT);"""
cursor = connection.cursor()
cursor.execute(cmd_create_filter_table)
print("db connection:",connection.total_changes)
def addFineFilter(file,table,filterstr):
with sqlite3.connect(file,timeout=10) as connection:
cmd_createFineFilter = f"""INSERT INTO {table}(cmd) VALUES(?);"""
cmd_checkIfExists = f"""SELECT * FROM {table} WHERE cmd = ?"""
cursor = connection.cursor()
if cursor.execute(cmd_checkIfExists,(filterstr,)).fetchone() == None:
cursor.execute(cmd_createFineFilter,(filterstr,))
def writedb(jobs):
with sqlite3.connect("../db/sqlite3.db",timeout=10) as connection:
connection.execute("pragma journal_mode=wal")
with sqlite3.connect("../db/sqlite3.db") as connection:
print("db connection", connection.total_changes)
cursor = connection.cursor()
# cursor.execute("CREATE TABLE jobs (title TEXT, location TEXT, company TEXT,link TEXT,hash INT)")
@ -91,67 +73,6 @@ def writedb(jobs):
log(hash1);
if(cursor.execute("SELECT * FROM jobs WHERE hash = ?",(hash1,)).fetchone() != None):
log("Hash already exist")
elif(cursor.execute("SELECT * FROM jobs where link = ?",(job.link,)).fetchone() != None):
log("link already exist")
else:
log("NEW_ENTRY")
cursor.execute("INSERT INTO jobs (star,tag,title,company,location,link,pubdate,hash,viewed) VALUES (?,?,?,?,?,?,?,?,?)",(job.starred,job.tag,job.title,job.company,job.location,job.link,job.date,hash1,0))
def viewedEntry(hash1):
viewedEntry.list = []
viewedEntry.list.append(hash1)
print("viewedEntry.list:",viewedEntry.list)
if len(viewedEntry.list) >= 5:
with sqlite3.connect("../db/sqlite3.db",timeout=10) as connection:
cursor = connection.cursor()
for x in viewedEntry.list:
print("hash:",x)
cursor.execute("UPDATE jobs SET viewed = '1' WHERE hash = ?",(x,))
viewedEntry.list = []
print("modified rows: ",cursor.rowcount)
def isStillValid(file,skiprows):
rows = [0,0,0]
with sqlite3.connect(file,timeout=10) as connection:
cmd_read_chunk = f"""SELECT link from jobs;"""
connection.execute("pragma journal_mode=wal")
cursor = connection.cursor()
cursor.execute(cmd_read_chunk)
#cursor.fetchmany(skiprows)#drop rows
while(len(rows)!=0):
isLink = True
rows = []
rows = cursor.fetchmany(256)
h = httplib2.Http()
for row in rows:
print("row: ",row[0])
try:
(resp,content) = h.request(row[0], 'HEAD')
except IOError as e:
print("link is no valid URL so remove item")
print("error: ",e)
isLink = False
except httplib2.error.RelativeURIError:
isLink = False
print("RelativeURIError: Not a valid link")
#rm_cursor = connection.cursor()
#rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ?;""",(row[0],))
finally:
try:
resp
except NameError:
print("Not a valid link")
rm_cursor = connection.cursor()
rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ? AND star != 1;""",(row[0],))
else:
if resp.status >= 400 or isLink == False:
print("link is no more valid, remove item")
rm_cursor = connection.cursor()
rm_itm = rm_cursor.execute("DELETE from jobs WHERE link = ? AND star != 1;",(row[0],))
print ("Deletion resultet in: ", rm_itm)
print("result of commit: ", connection.commit())
return 0
print("NEW_ENTRY")
cursor.execute("INSERT INTO jobs (star,tag,title,company,location,link,pubdate,hash) VALUES (?,?,?,?,?,?,?,?)",(job.starred,job.tag,job.title,job.company,job.location,job.link,job.date,hash1))

View File

@ -1,61 +1,20 @@
from PySide6.QtWidgets import QApplication, QWidget, QMainWindow, QTableWidget, QVBoxLayout, QTableWidgetItem, QPushButton, QHBoxLayout, QTableView, QLineEdit, QDialog, QLabel, QTextEdit, QCheckBox, QComboBox, QStyledItemDelegate
from PySide6.QtWidgets import QApplication, QWidget, QMainWindow, QTableWidget, QVBoxLayout, QTableWidgetItem, QPushButton, QHBoxLayout, QTableView, QLineEdit, QDialog, QLabel, QTextEdit, QCheckBox, QComboBox
from PySide6.QtWebEngineWidgets import QWebEngineView
from PySide6.QtCore import QUrl,Qt,QSortFilterProxyModel, qDebug, QSize,QObject,QThread,Signal,QAbstractTableModel, Slot
from PySide6.QtCore import QUrl,Qt,QSortFilterProxyModel, qDebug, QSize,QObject,QThread,Signal
from PySide6.QtSql import QSqlDatabase, QSqlTableModel, QSqlQueryModel, QSqlQuery
from PySide6 import QtGui
from db import addFineFilter
import sysparse
import sys
import db as db
from qsqlmod import SqlQueryModel_editable
DEBUG = True
def log(*s):
if DEBUG:
print(s)
DBFILE = "../db/sqlite3.db"
Cantons = ["AG","ZH","BE","SG","SO"]
class ColorDelegate(QStyledItemDelegate):
currentRow = 0
starred = 0
def __init__(self,main):
super().__init__()
print("initialice overload init of ColorDelegate")
self.main = main
def initStyleOption(self,option,index):
super().initStyleOption(option,index)
data = index.data()
column = index.column()
flag_viewed = self.main.sel_model.model().index(index.row(),8).data()
try:
flag_starred = int(self.main.sel_model.model().index(index.row(),0).data())
except ValueError:
print("probably empty string asign zero")
flag_starred = 0
if flag_starred == 1:
option.backgroundBrush = QtGui.QColor("red")
elif flag_viewed != 1:
option.backgroundBrush = QtGui.QColor("green")
else:
option.backgroundBrush = QtGui.QColor("white")
class ValidationWorker(QObject):
finished = Signal()
def run(self):
ret = db.isStillValid(DBFILE,0)
if ret == 0:
self.finished.emit()
class Worker(QObject):
pwprompt = Signal()
pw = Signal(str)
finished = Signal()
dialog_closed = True
dialog_rejected = False
password = ['empty']
def run(self):
@ -70,11 +29,10 @@ class MainWindow(QMainWindow):
self.w = None
self.cmdCanton = ''
self.initcmd = 'SELECT * FROM jobs as b '
self.initcmd = 'SELECT * FROM jobs as b'
self.customcmd = ''
self.cmd = ''
self.setWindowTitle("DB_Inspector")
self.isAWhere = False
self.layout = QVBoxLayout()
self.layout2 = QHBoxLayout()
@ -85,46 +43,26 @@ class MainWindow(QMainWindow):
self.browser = QWebEngineView()
self.browser.setUrl(QUrl("https://jobagent.ch"))
#self.EditQuery = QLineEdit()
self.CEditQuery = QComboBox()
self.CEditQuery.setEditable(True)
self.CEditQuery.setInsertPolicy(QComboBox.InsertAtBottom)
self.CEditQuery.setDuplicatesEnabled(True)
self.queryFineFilers()
self.EditQuery = self.CEditQuery.lineEdit()
self.EditQuery = QLineEdit()
self.EditQuery.returnPressed.connect(self.queryEditLine)
editables = {0 : ("UPDATE jobs SET star = '{}' WHERE hash = '{}'",7)}
self.model = SqlQueryModel_editable(editables)
self.model = QSqlTableModel(self)
self.model.setTable("jobs")
self.model.select()
self.view = QTableView()
self.view.setModel(self.model)
self.proxymodel2 = QSortFilterProxyModel(self)
self.proxymodel2.setSourceModel(self.model)
self.view.setModel(self.proxymodel2)
self.setProxyViewSettings()
self.delegate = ColorDelegate(self)
self.view.setItemDelegate(self.delegate)
self.setProxyViewSettings()
self.view.activated.connect(self.cell_clicked)
self.view.clicked.connect(self.cell_clicked)
self.sel_model = self.view.selectionModel()
self.sel_model.selectionChanged.connect(self.cell_clicked)
self.PValidate = QPushButton("links valid")
self.PValidate.clicked.connect(self.runValidation)
self.PsyncDB = QPushButton("Perform sync acording to config file")
self.PsyncDB.clicked.connect(self.runWorker)
self.layout.addWidget(self.view)
self.layout.addWidget(self.b_canton)
self.layout.addWidget(self.CEditQuery)
self.layout.addWidget(self.EditQuery)
self.layout.addWidget(self.PsyncDB)
self.layout.addWidget(self.PValidate)
self.layout2.addLayout(self.layout)
self.layout2.addWidget(self.browser)
@ -138,9 +76,7 @@ class MainWindow(QMainWindow):
self.view.setColumnWidth(5,10)
self.view.hideColumn(7)
self.view.setSortingEnabled(True)
#self.sel_model = self.view.selectionModel()
#self.sel_model.selectionChanged.connect(self.cell_clicked)
self.view.clicked.connect(self.cell_clicked)
def runWorker(self):
self.thread = QThread()
self.worker = Worker()
@ -153,25 +89,9 @@ class MainWindow(QMainWindow):
self.worker.pwprompt.connect(self.showDialog)
self.worker.finished.connect(self.thread.quit)
self.worker.finished.connect(self.enable_PsyncDB)
self.thread.start()
def runValidation(self):
self.validationThread = QThread()
self.validationWorker = ValidationWorker()
self.validationWorker.moveToThread(self.validationThread)
self.validationThread.started.connect(self.disableValidationButton)
self.validationThread.started.connect(self.validationWorker.run)
self.validationThread.start()
self.validationWorker.finished.connect(self.validationThread.quit)
self.validationWorker.finished.connect(self.enableValidationButton)
def enableValidationButton(self):
self.PValidate.setEnabled(True)
def disableValidationButton(self):
self.PValidate.setEnabled(False)
def disable_PsyncDB(self):
self.PsyncDB.setText("Sync Running...")
self.PsyncDB.setEnabled(False)
@ -182,19 +102,17 @@ class MainWindow(QMainWindow):
w = PWPrompt()
w.set_MSG(self.worker.messageContent)
ret = w.exec()
if ret == QDialog.Rejected:
self.worker.dialog_rejected = True
log("[gui] qdialog.rejected set to TRUE")
self.pw = w.pw
self.worker.password = w.pw
log("showDialog,self.pw:",self.pw)
print("showDialog,self.pw:",self.pw)
self.worker.dialog_closed=True
if ret == QDialog.Rejected:
return 1
def showQueryWindow(self,checked):
if self.w is None:
self.w = QueryWindow()
self.w.show()
self.w.queryFineFilers()
def filter_canton(self,canton):
if canton != "ALL":
self.cmdCanton = f"""
@ -205,25 +123,14 @@ class MainWindow(QMainWindow):
print("cmd canton:", self.cmdCanton)
else:
self.cmdCanton = ''
print("disable fil§.ter")
self.cmdCanton = ' '
print("disable filter")
# self.customSQL(self.cmd)
def queryEditLine(self):
self.cmd = self.EditQuery.text()
if self.customcmd or self.cmd:
if self.cmdCanton:
self.isAWhere = True
connectingstring = " AND "
else:
self.isAWhere = False
connectingstring = " WHERE "
else:
connectingstring = ' '
print(self.initcmd + self.cmdCanton +connectingstring +self.customcmd + self.cmd)
self.customSQL(self.initcmd+ self.cmdCanton + connectingstring + self.customcmd + self.cmd)
if self.customcmd or self.cmd:
addFineFilter("../db/sqlite3.db","filters",self.customcmd + self.cmd)
print(self.initcmd + self.cmdCanton +self.customcmd + self.cmd)
self.customSQL(self.initcmd+ self.cmdCanton + self.customcmd + self.cmd)
def cell_clicked(self):
x = self.view.selectionModel().currentIndex().row()
@ -231,37 +138,16 @@ class MainWindow(QMainWindow):
data = self.view.model().index(x,5).data()
print("cell clicked:",x," / ",y, "-->",data)
self.browser.setUrl(QUrl(data))
hash1 = self.view.model().index(x,7).data()
print("hash of selected: ",hash1)
#db.viewedEntry(hash1)
self.view.selectionModel().currentIndex()
self.model.setData({0,8},hash1,role=1001)
self.view.updateGeometries()
self.view.viewport().repaint()
def queryFineFilers(self):
FineFilterItems = self.getFineFilters()
for item in FineFilterItems:
self.CEditQuery.addItem(item)
def getFineFilters(self):
item = []
statement = f"""Select cmd FROM filters;"""
query = QSqlQuery(statement)
while query.next():
item.append(query.value(0))
return item
def customSQL(self,cmd):
print("Run SQL Query",cmd)
#self.model.setTable("")
self.model.setTable("")
self.model.setQuery(cmd +" ;")
#self.model.setTable("jobs")
while (self.model.canFetchMore()):
print("fetch iterations++")
self.model.fetchMore()
self.view.show()
self.proxymodel2 = QSortFilterProxyModel(self)
self.proxymodel2.setSourceModel(self.model)
self.view.setModel(self.proxymodel2)
self.setProxyViewSettings()
class PWPrompt(QDialog):
def __init__(self):
super().__init__()
@ -309,7 +195,6 @@ class QueryWindow(QWidget):
self.CFilter.addItem("ALL")
for Canton in Cantons:
self.CFilter.addItem(Canton)
self.CFilter.currentTextChanged.connect(window.filter_canton)
self.CFilter.currentTextChanged.connect(self.setTFilter)
@ -331,30 +216,25 @@ class QueryWindow(QWidget):
items = self.getViews()
for item in items:
self.CShowViews.addItem(item)
self.CShowViews.currentTextChanged.connect(self.setView)
self.CShowFineFilters = QComboBox()
self.queryFineFilers()
self.CShowFineFilters.currentTextChanged.connect(window.EditQuery.setText)
self.PApplyView = QCheckBox()
self.PApplyView.setText("Apply View")
self.PApplyView.clicked.connect(self.setView)
self.vrLayout = QVBoxLayout()
self.vrLayout.addWidget(self.LFilter)
self.vrLayout.addWidget(self.CFilter)
self.vrLayout.addWidget(self.LShowViews)
self.vrLayout.addWidget(self.CShowViews)
self.vrLayout.addWidget(self.PApplyView)
self.vrLayout.addWidget(self.CShowFineFilters)
self.WvrLayout = QWidget()
self.WvrLayout.setLayout(self.vrLayout)
self.WvrLayout.setMaximumSize(QSize(200,200))
self.hLayout = QHBoxLayout()
self.hLayout.addLayout(self.vLayout)
self.hLayout.addWidget(self.WvrLayout)
@ -364,18 +244,6 @@ class QueryWindow(QWidget):
self.EditQuery.setText(window.customcmd)
print("Comboshowview:",self.CShowViews.currentText())
def queryFineFilers(self):
FineFilterItems = self.getFineFilters()
for item in FineFilterItems:
self.CShowFineFilters.addItem(item)
def getFineFilters(self):
item = []
statement = f"""Select cmd FROM filters;"""
query = QSqlQuery(statement)
while query.next():
item.append(query.value(0))
return item
def getViews(self):
item = []
@ -392,10 +260,10 @@ class QueryWindow(QWidget):
if self.PApplyView.isChecked():
self.view = self.CShowViews.currentText()
print("Selected View:",self.view)
window.initcmd = f"""SELECT * FROM '{self.view}' """
window.initcmd = f"""SELECT * FROM '{self.view}'"""
print("window.initcmd:", window.initcmd)
else:
window.initcmd = f"""SELECT * FROM jobs as b """
window.initcmd = f"""SELECT * FROM jobs as b """
print("View unchecked")
self.TInitCmd.setText(window.initcmd)

View File

@ -3,18 +3,9 @@ import requests
from bs4 import BeautifulSoup
from enum import Enum
import re
import shutil
from dateconverter import *
from datetime import datetime
import os
import sqlite3
import webbrowser
import mozilla
DEBUG = False
number = ['0','1','2','3','4','5','6','7','8','9']
def log(*s):
if DEBUG:
@ -44,14 +35,10 @@ months = [
('November','11'),
('December','12')]
class item():
def __init__(self,tag,tag_content,index,name=None):
def __init__(self,tag,tag_content,index):
self.tag = tag
self.tag_content = tag_content
self.index = index
if name is not None:
self.name = name
else:
self.name = "not defined"
class job():
def __init__(self,title,profession,company,location,date,description,link,tag,starred):
@ -64,57 +51,37 @@ class job():
self.link = link
self.tag = tag
self.starred = starred
def __str__(self):
return "%s| %s|%s|%s|%s|%s|%s" % (self.title,self.profession,self.company,self.location,self.date,self.description,self.link)
def finder(results,item,**modes):
GETCHILDREN = modes.get("GETCHILDREN",'')
ATTRS = modes.get('ATTRS',0)
LOCATION_CLEANUP = modes.get('LOCATION_CLEANUP',0)
LINK = modes.get('LINK',0)
SWAPDATE = modes.get('SWAPDATE',0)
CLEANDATE = modes.get('CLEANDATE',0)
BASEURL = modes.get('BASEURL','')
INDEEDDATE = modes.get('INDEEDDATE',0)
content = []
i = item.index
log("name",item.name)
log("Item tag: ",item.tag)
log("Modes:",modes)
log("tag_content: ",item.tag_content)
for entry in results:
if ATTRS==1:
result = entry.findAll(item.tag,attrs=item.tag_content)
log(item.tag_content)
else:
result = entry.findAll(item.tag,class_=item.tag_content)
log("found count results:",len(result))
if len(result)==0 and DEBUG == True:
log("len result: ",len(result))
for x in result:
log("No entry found for: ",item.name,item.tag,item.tag_content," -->", x)
input()
log("found:",len(result))
if result:
log("theres a result")
if i>(len(result)-1):
log("len:",len(result)-1,"i:",i)
log("index out of bounds fall back to the %d count",i)
# input("Press Enter..")
i=(len(result)-1)
result2 = result[i]
if GETCHILDREN != '':
found = False
for results in result:
child = results.find(GETCHILDREN)
log("[finder] search for '",GETCHILDREN,"' in: ",child)
if child != None and found == False:
log("CHILD text strip: ",child.text.strip())
found = True
content.append(child.text.strip())
if found == False:
log("[finder] No matching Child found: ",child)
content.append("CHILD_NOT_FOUND: " + GETCHILDREN)
elif LOCATION_CLEANUP==1:
if LOCATION_CLEANUP==1:
location = CleanLocation(result2.text.strip())
content.append(location)
elif LINK==1:
@ -125,20 +92,14 @@ def finder(results,item,**modes):
elif SWAPDATE==1:
content.append(DateCHToUS(result2.text.strip()))
elif CLEANDATE==1:
log("[finder] pre cleandate:",result2.text.strip)
content.append(jobs_ch_clean_date(result2.text.strip()))
elif INDEEDDATE==1:
log("[finder] pre indeeddate:",result2.text.strip)
content.append(indeedExtractDays(result2.text.strip()))
else:
log(result2)
content.append(result2.text.strip())
if not result:
if item.tag_content == "pubdate":
today = datetime.today().strftime('%Y-%m-%d')
if CLEANDATE:
today = datetime.today().strftime('%Y-%M-%D')
content.append(today)
else:
content.append("NOTFound")
content.append("NOTFound")
return content
@ -153,10 +114,10 @@ def arrayToClass(titles,companys,locations,dates,links,tag):
log("len:",len(titles))
for i, title in enumerate(titles):
jobs.append(job(title,"test_prof",companys[i],locations[i],dates[i],"test_desc",links[i],tag,0))
log("class job:",jobs[i])
log(jobs[i])
return jobs
else:
log("Something went wrong unequal length of data arrays: ",len(titles),len(companys),len(locations),len(dates))
print("Something went wrong unequal length of data arrays")
return 0
def jobs_ch_clean_date(date):
newdate=''
@ -203,25 +164,3 @@ def makeSession(url):
with requests.Session() as session:
page = session.get(url)
return session
def indeedExtractDays(datestr):
cleannumstr=''
cleannumint=-1
cleandate=''
foundchar=False
for a in datestr:
print(a)
if a in number and foundchar==False:
foundchar=True
cleannumstr+=a
elif a in number and foundchar == True:
cleannumstr+=a
elif a not in number and foundchar == True:
break
if cleannumstr != '':
cleannumint = int(cleannumstr)
today = int(datetime.utcnow().timestamp())
cleandate = today - cleannumint * 60 * 60 * 7 * 4
#print("int:",cleannumint,"today:",today,"cleandate:",datetime.fromtimestamp(cleandate).strftime('%Y-%m-%d'))
return datetime.fromtimestamp(cleandate).strftime('%Y-%m-%d')
return "NOTFound"

View File

@ -1,5 +1,4 @@
import requests
from requests_html import HTMLSession
from helpers import *
def login(entry):
user = entry.user
@ -7,18 +6,17 @@ def login(entry):
loginurl = entry.loginurl
scrapurl = entry.scrapurl
with requests.Session() as session:
session.headers = {
headers = {
"Host": "www.jobagent.ch",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:124.0) Gecko/20100101 Firefox/124.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
# "Content-Type": "application/x-www-form-urlencoded",
# "Content-Length": "58",
"Content-Type": "application/x-www-form-urlencoded",
"Content-Length": "58",
"Origin": "https://www.jobagent.ch",
"Connection": "keep-alive",
"Referer": "https://jobagent.ch",
#"Cookie": "datadome=BbGio7V9YBqYELb~B2a7DqE9Zr5EWb315OypbcxGQOFKbhkJR48etFSooYwtnKF2sK5leCh7Q_0o6W5YMwl0qEH~Fw3BU0m~48MgrkuaxO3Z1s5MTqCnTZVW3PcQv7KM; _uc=ad_storage=granted:analytics_storage=granted; _gcl_au=1.1.1328234550.1724056973.1502078804.1724062620.1724062680; _ga=GA1.1.1840632117.1724056971; _ga_T0E2JNNRW2=GS1.1.1724062555.3.1.1724062683.0.1.2098134382; JSESSIONID=AB8CC398C2576A6A87C53A74CCD8F7F5; _pin_unauth=dWlkPU56Y3pabU14WW1JdFptTTFNeTAwTkdFMkxUbGlZV0V0TWprNVkyTXpZemd4WldNNA; _fbp=fb.1.1724056975123.543612652217076856; _clck=16bp9by%7C2%7Cfog%7C0%7C1692; _clsk=1o7y6b9%7C1724062683361%7C9%7C1%7Cu.clarity.ms%2Fcollect; _rm=ai53eXNzJTQwa29sYWJub3cuY2g6MTcyNjY1NDY4MTA0NDpTSEEyNTY6ZGRkMmZhYTRjZWY3MWZkZDU1M2VlMTI4ZjYzOGY1NmFiYmRkNjNiMmI3ZjE1NWRhNmU3YzcwZWU1NjQ2Mjc0Mw; _uetsid=0737af805e0711efbe7bdd027b00b063; _uetvid=0737b3005e0711efb7c7035382896421",
# "Connection": "keep-alive",
"Referer": "https://www.jobagent.ch/user/login",
# "Upgrade-Insecure-Requests": "1",
# "Sec-Fetch-Dest": "document",
# "Sec-Fetch-Mode": "navigate",
@ -26,35 +24,15 @@ def login(entry):
# "DNT": "1",
# "Sec-GPC": "1"
}
r = session.get(loginurl)
payload = {"redirectUrl":"","email":user,"password":pw}
resp = session.post(loginurl,data=payload)
resp = session.post(loginurl,data=payload,headers=headers)
print(payload)
checkBlockers(session,resp)
r = session.get(scrapurl)
print(session.headers)
print("response:",r)
return session
#solveCaptcha when :
#string "captcha" is in response
#search for <iframe
#get src tag
#open a webbrowser to solve the captcha
#somehow getting the cookie maype?
def solveCaptcha(session,resp):
print("response:",resp)
if "captcha" or "Enable JavaScript" in resp :
print("captcha link!! found:")
return 1
else:
return 0
def checkBlockers(session,resp):
print("response from login attempt",resp)
if resp:
print("response from login attempt",resp.url)
if resp.url == 'https://www.jobagent.ch/user/login?error':
print("Error on login")
return -1
solveCaptcha(session,resp)
r = session.get(scrapurl)
return session

View File

@ -1,42 +0,0 @@
def urlToDomain(url):
pos = patternSearch(url,"https://")
urlCut = dropBeforePos(url,pos)
posDot = skipAfterChar(urlCut,'.') - 1
urlCut = dropBeforePos(urlCut,posDot)
posDot = skipAfterChar(urlCut,'/')
urlCut = dropAfterPos(urlCut,posDot)
print("url after cut dot:",urlCut)
return urlCut
def patternSearch(url,pattern):
x = 0
for a,i in enumerate(url):
print("i:",i)
if i == pattern[x]:
if x<len(pattern)-1:
x = x + 1
elif x==len(pattern)-1:
print("FULL PATTERN FOUND at pos :",a)
break
else:
x = 0
return a
def skipAfterChar(aString,char):
for a,i in enumerate(aString):
if i == char:
break
return a
def dropBeforePos(aString,pos):
aString2=''
pos+=1
if pos < len(aString):
for i in range(pos,len(aString)):
aString2 += aString[i]
return aString2
def dropAfterPos(aString,pos):
aString2=''
if pos < len(aString):
for i in range(0,pos):
aString2 += aString[i]
return aString2

View File

@ -1,74 +0,0 @@
#access cookies from firefox:
#copy (because locked): cp .mozilla/firefox/imibizoh.default/cookies.sqlite cookies.sqlite
#Select value from moz_cookies where host like '%indeed%'
import webbrowser
import tempfile
import os
import sqlite3
import shutil
from time import sleep
import manipulateString as ms
DEBUG = True
def log(*s):
if DEBUG:
print(s)
def findDefaultProfile(path):
target = ''
dirlist = os.listdir(path)
for directory in dirlist:
posDot = ms.skipAfterChar(directory,'.')
stringParse = ms.dropBeforePos(directory,posDot)
log(stringParse)
if stringParse == "default":
target = directory
break;
if target == '':
return -1
else:
return target
def getCookiesFromBrowser(url,force=False):
DBFILE = "../db/sqlite3.db"
if os.name == 'posix':
homePath = os.path.expanduser('~')
cookiePath = homePath + "/.mozilla/firefox/" + findDefaultProfile(homePath + "/.mozilla/firefox/") + "/cookies.sqlite"
tmpPath = "/tmp/cookies.sqlite"
if os.name == 'nt':
appdata = os.getenv('APPDATA')
winCookiePath = appdata + "\\Mozilla\\Firefox\\Profiles\\" + findDefaultProfile(appdata + "\\Mozilla\\Firefox\\Profiles\\") + "cookies.sqlite"
winFirefoxPath = "C:\\Program Files\\Mozilla Firefox\\firefox.exe"
tmpPath = tempfile.gettempdir() + "\\cookies.sqlite"
tries=0
cookie = ''
rows = [0]
while(cookie == '' and tries < 2):
tries+=1
if os.name == 'posix':
shutil.copyfile(cookiePath,tmpPath)
elif os.name == 'nt':
shutil.copyfile(winCookiePath,tmpPath)#workaround for loked database
with sqlite3.connect(tmpPath) as connection:
cmd_read_cookies = f"""SELECT name,value FROM moz_cookies WHERE host like ?;"""
print(cmd_read_cookies)
cursor = connection.cursor()
cursor.execute(cmd_read_cookies,(ms.urlToDomain(url),))
while len(rows)!=0:
rows = cursor.fetchmany(25)
for row in rows:
print("row:",row)
cookie = cookie + row[0] + '=' + row[1]
cookie += ";"
print("Cookies:",cookie)
if cookie == '' and force == False:
if os.name == 'posix':
webbrowser.register("firefox",None,webbrowser.BackgroundBrowser("firefox"))
webbrowser.get('firefox').open(url)
elif os.name == 'nt':
webbrowser.register("firefox",None,webbrowser.BackgroundBrowser(winFirefoxPath))
webbrowser.get('firefox').open(url)
sleep(1)
return cookie

View File

@ -1,82 +0,0 @@
#modifie the QSqlQueryModel to be editable and sets them to the database
from PySide6.QtSql import QSqlQueryModel, QSqlQuery
from PySide6.QtCore import Qt
#credits to :
#https://stackoverflow.com/questions/49752388/editable-qtableview-of-complex-sql-query
class SqlQueryModel_editable(QSqlQueryModel):
"""a subclass of QSqlQueryModel where individual columns can be defined as editable
"""
def __init__(self, editables):
"""editables should be a dict of format:
{INT editable_column_nr : (STR update query to be performed when changes are made on this column
INT model's column number for the filter-column (used in the where-clause),
)}
"""
super().__init__()
self.editables = editables
self.updatelist = []
def flags(self, index):
fl = QSqlQueryModel.flags(self, index)
if index.column() in self.editables:
fl |= Qt.ItemIsEditable
return fl
def setData(self, index, value, role=Qt.EditRole):
print("role: ",role)
if role == Qt.EditRole:
mycolumn = index.column()
if mycolumn in self.editables:
(query, filter_col) = self.editables[mycolumn]
filter_value = self.index(index.row(), filter_col).data()
q = QSqlQuery(query.format(value, filter_value))
result = q.exec_()
if result:
self.query().exec_()
# print("filter_value:",filter_value)
print("setdata query: ",query.format(value, filter_value))
print("index.row:",index.row(), "filter_col:",filter_col)
else:
print(self.query().lastError().text())
return result
elif role == 1001:
result = 0
self.updatelist.append(value)
print(self.updatelist)
if len(self.updatelist) >= 5:
for x in self.updatelist:
print("Atempt flaging view")
q = QSqlQuery("UPDATE jobs SET viewed = '1' WHERE hash = {}".format(x))
print("QSQLQuery: ", "UPDATE jobs SET viewed = '1' WHERE hash = {}".format(x))
result = q.exec_()
if result:
self.query().exec_()
else:
print("Error:", self.query().lastError().text())
return result
self.updatelist = []
return result
self.dataChanged.emit(index-100,index+100)
self.layoutChanged.emit()
return QSqlQueryModel.setData(self, index, value, role)
# view = QTableView()
#
# editables = {1 : ("UPDATE Manufacturers SET Country = '{}' WHERE Company = '{}'", 2)}
# model = SqlQueryModel_editable(editables)
# query = '''
# SELECT (comp.company || " " || cars.model) as Car,
# comp.Country,
# cars.company,
# (CASE WHEN cars.Year > 2000 THEN 'yes' ELSE 'no' END) as this_century
# from manufacturers comp left join cars
# on comp.company = cars.company
# '''
# q = QSqlQuery(query)
# model.setQuery(q)
# model.setFilter("cars.Company = 'VW'")
# view.setModel(model)
# view.hideColumn(2)

View File

@ -1,117 +1,70 @@
from helpers import *
from login import solveCaptcha
import mozilla
DEBUG = True
DEBUG = False
def log(*s):
if DEBUG:
print(s)
def scrap_indeed_com(url,entry,session):
moz_cookies = mozilla.getCookiesFromBrowser(url)
print("[scrap]cookies:", moz_cookies)
session.headers = {
"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0",
"Referer" : "https://ch.indeed.com/jobs?&from=searchOnHP",
"Cookie" : moz_cookies
}
def indeed_com(url,session):
jobs = []
log("in scrap jobs,url",url)
if(session == 0 or session == -1):
with requests.Session() as session:
page = session.get(url)
log(page)
else:
page = session.get(url)
log(page)
if solveCaptcha(session,page) == 1:
print("Cookie stealing unsuccesfull retry with force")
moz_cookies = mozilla.getCookiesFromBrowser(url,force=True)
soup = BeautifulSoup(page.content,"html.parser")
#print(soup.prettify())
results = soup.find_all("li",class_= 'css-5lfssm eu4oa1w0') #top level list element
location = item("div",{'data-testid':'text-location'},0,"indeed location")
ar_location = finder(results,location,ATTRS=1,LOCATION_CLEANUP=1)
company = item("span",{'data-testid':'company-name'},0,"indeed company")
ar_company = finder(results,company,ATTRS=1)
title = item("a",'jcs-JobTitle',0,"indeed title")
ar_title = finder(results,title,GETCHILDREN="span")
date = item("span",{'data-testid':'myJobsStateDate'},0,"indeed date")
ar_date = finder(results,date,ATTRS=1,INDEEDDATE=1)
link = item("a",'jcs-JobTitle',0,"link")
ar_link = finder(results,link,LINK=1,BASEURL="https://ch.indeed.com")
tag = entry.tag#get from config
return arrayToClass(ar_title,ar_company,ar_location,ar_date,ar_link,tag)
def scrap_jobs(url,entry,session):
session.headers = {
"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0"
}
jobs = []
log("in scrap jobs,url",url)
if(session == 0 or session == -1):
with requests.Session() as session:
page = session.get(url)
log(page)
else:
page = session.get(url)
log(page)
soup = BeautifulSoup(page.content,"html.parser")
#print(soup.prettify())
results = soup.find_all("div",attrs={'data-feat':'searched_jobs'})
location_class = "d_grid items_start gap_s12 grid-cols_[auto_1fr] px_s8"
location = item("div",location_class,0,"location")
ar_location = finder(results,location,GETCHILDREN='p',LOCATION_CLEANUP=1)
company_class = "mb_s12 lastOfType:mb_s0 textStyle_p2"
company = item("p",company_class,0,"company")
ar_company = finder(results,company,DEFAULT=1,GETCHILDREN='strong')
title = item("span","text_link.brand.base",0,"TITLE")
ar_title = finder(results,title,DEFAULT=1)
date = item("span","pos_absolute",0,"date")
ar_date = finder(results,date,CLEANDATE=1)
link = item("a",{'data-cy' :'job-link'},0,"link")
ar_link = finder(results,link,LINK=1,ATTRS=1,BASEURL="https://jobs.ch")
tag = entry.tag#get from config
return arrayToClass(ar_title,ar_company,ar_location,ar_date,ar_link,tag)
def next_url_indeed_com(url,session,baseurl):
next_link_str = ''
if(session == 0):
with requests.Session() as session:
page = session.get(url)
print(page)
else:
page = requests.get(url)
page = session.get(url)
print(page)
soup = BeautifulSoup(page.content,"html.parser")
result_next = soup.findAll("nav",attrs={"role":"navigation"})
next_=item("a",{'data-testid':'pagination-page-next'},0)
next_link = finder(result_next,next_,ATTRS=1,LINK=1)
if next_link:
if(next_link[0] != "NOTFound"):
next_link_str = str(next_link[0])
next_link_str = baseurl + next_link_str
log(next_link_str)
#print(soup.prettify())
results = soup.find_all("li",class_= 'css-5lfssm eu4oa1w0')
location = item("p",{'data-testid':'text-location'},0)
ar_location = finder(results,location,LOCATION_CLEANUP=1,ATTRS=1)
company = item("p",{'data-testid':'company-name'},0)
ar_company = finder(results,location,ATTRS=1)
title = item("a",'jobTitle',0)
ar_title = finder(results,location)
date = item("span","Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 krGudM hUhFmL",0)
ar_date = finder(results,date,CLEANDATE=1)
def scrap_jobs(url,entry,session):
jobs = []
log("in scrap jobs,url",url)
if(session == 0):
with requests.Session() as session:
page = session.get(url)
log(page)
else:
return 0
if next_link_str != '':
return next_link_str
else:
return 0
page = session.get(url)
log(page)
soup = BeautifulSoup(page.content,"html.parser")
#print(soup.prettify())
results = soup.find_all("div",attrs={"data-feat":"searched_jobs"})
location_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn"
location = item("p",location_class,0)
ar_location = finder(results,location,LOCATION_CLEANUP=1)
company_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn"
company = item("p",company_class,3)
ar_company = finder(results,company,DEFAULT=1)
title = item("span","Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 VacancyItem___StyledText2-sc-iugtv6-5 iaJYDR jlFpCz dMwMcR",0)
ar_title = finder(results,title,DEFAULT=1)
date = item("span","Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 krGudM hUhFmL",0)
ar_date = finder(results,date,CLEANDATE=1)
link = item("a","Link__ExtendedRR6Link-sc-czsz28-1 khAvCu Link-sc-czsz28-2 VacancyLink___StyledLink-sc-ufp08j-0 dXKwhi dDgwgk",0)
ar_link = finder(results,link,LINK=1,BASEURL="https://jobs.ch")
tag = entry.tag#get from config
return arrayToClass(ar_title,ar_company,ar_location,ar_date,ar_link,tag)
def next_url_jobs_ch(url,session,baseurl):
next_link_str = ''
if(session == 0):
@ -156,7 +109,7 @@ def next_url_jobagent(base_url,session,c):#depreacted will be removed in the fut
for i2 in next_url_names:
striped_string = i2.text.strip()
log(i2.text.strip(),"stripped:",striped_string)
log("Printable characters?",striped_string.isprintable())
# print("Printable characters?",striped_string.isprintable())
if (striped_string) == "Nächste Seite":
log(i2)
next_url = i2.get("href")
@ -170,34 +123,29 @@ def next_url_jobagent(base_url,session,c):#depreacted will be removed in the fut
def scrap_jobagent(url,entry,session):
jobs = []
log("[scrap_jobagent],url",url)
if(session == 0 or session == -1):
log("session not sucessful transmitted ",session)
log("in scrap jobs,url",url)
if(session == 0):
with requests.Session() as session:
page = session.get(url)
log(page)
else:
page = session.get(url)
page = session.get(url)
log("[scrap_jobagent]page:",page)
log(page)
soup = BeautifulSoup(page.content,"html.parser")
print(soup.prettify())
print(session.headers)
#print(soup.prettify())
results = soup.find_all("li",class_="item")
if not results:
print("no li items found")
log("page:",page)
title = item("span","jobtitle",0,"jobagent title")
title = item("span","jobtitle",0)
ar_title = finder(results,title)
location = item("span","location",0,"jobagent location")
location = item("span","location",0)
ar_location = finder(results,location,LOCATION_CLEANUP=1)
company = item("span","company",0,"jobagent company")
company = item("span","company",0)
ar_company = finder(results,company,DEFAULT=1)
link = item("a","title",0,"jobagent link")
link = item("a","title",0)
ar_link = finder(results,link,LINK=1)
date = item("span","pubdate",0)
@ -206,4 +154,3 @@ def scrap_jobagent(url,entry,session):
return arrayToClass(ar_title,ar_company,ar_location,ar_date,ar_link,tag)

View File

@ -7,11 +7,6 @@ from login import *
from time import sleep
from db import *
DEBUG = True
def log(*s):
if DEBUG:
print(s)
def choose_scraper(entry,session):
if not session:
session = requests.Session()
@ -23,8 +18,6 @@ def choose_scraper(entry,session):
runner(entry,session,scrap_jobagent,next_url_jobagent)
case 'https://www.jobagent.ch':
runner(entry,session,scrap_jobagent,next_url_jobagent)
case 'https://ch.indeed.com':
runner(entry,session,scrap_indeed_com,next_url_indeed_com)
def parse(**kwargs):
session=0
@ -39,11 +32,11 @@ def parse(**kwargs):
# parser.add_argument("--help",help = "print help")
parser.add_argument("--login",nargs=3,help = "login by specifing login and passwor by a given url",metavar=('USERNAME','PASSWORD','URL'))
parser.add_argument("--createnwview",help = "Create a VIEW for the Region Nordwest Schweiz",action="store_true")
parser.add_argument("-VC","--ValidationCheck",help = "Check if links are still valid, if not remove them",action="store_true")
args = parser.parse_args()
if args.test:
addFineFilter("../db/sqlite3.db","filters","testfilterentry")
session = makeSession(sys.argv[args.test])
choose_scraper(arg.test,session)
if args.importregiondb:
importdb("../db/sqlite3.db","../db/Cantons.db","Cantons")
if args.initdb:
@ -58,8 +51,7 @@ def parse(**kwargs):
login_loop(args.config,False,worker)
if args.createnwview:
createnwview("../db/sqlite3.db")
if args.ValidationCheck:
isStillValid("../db/sqlite3.db")
if len(kwargs)>0:
print("no sysargs fiven, running as a module")
vconfig = kwargs.get('config')
@ -76,65 +68,33 @@ def login_loop(config_file,gui,worker):
ret_login = 0
session = 0
while (ret != 0):
if gui:
worker.dialog_rejected = False
ret = entry2 = config.readConfig(config_file,gui,worker)
print(entry2)
if(ret != 0 and ret_login != 1):
if(entry2.loginurl != 'NONE'):
session = -1
log("[pre while] worker.dialog_rejected = ",worker.dialog_rejected)
worker.dialog_rejected = False
while (session == -1 and worker.dialog_rejected == False):
log("worker.dialog_rejected = ",worker.dialog_rejected)
session = login(entry2)
ret = entry2 = config.readConfig(config_file,gui,worker)
print(entry2)
if(ret != 0 and ret_login != 1):
if(entry2.loginurl != 'NONE'):
session = -1
while session == -1:
session = login(entry2)
if session == -1:
ret_login = entry2.input_pw(gui,entry2.user,worker)
if worker.dialog_rejected == False:
choose_scraper(entry2,session)
if not gui:
ret = entry2 = config.readConfig(config_file,gui,worker)
#print(entry2)
if(ret != 0 and ret_login != 1):
if(entry2.loginurl != 'NONE'):
session = -1
while (session == -1):
session = login(entry2)
if session == -1:
ret_login = entry2.input_pw(gui,entry2.user,worker)
log("[login_loop] session:",session)
choose_scraper(entry2,session)
choose_scraper(entry2,session)
def runner(entry,session,scrap_func,next_url_func):
i=0
b_url = entry.scrapurl
while b_url != 0 and i<50:
sleep(0.5)
sleep(0.3)
if b_url:
domain = extractDomain(b_url)
print(domain)
if domain == 'https://www.jobagent.ch' or domain == 'https://software-job.ch':
jobs = scrap_func(b_url,entry,session)
if jobs:
writedb(jobs)
else:
print("nothing found on this page")
writedb(jobs)
b_url = next_url_func(b_url,session,0)
elif domain == 'https://www.jobs.ch':
jobs = scrap_func(b_url,entry,session)
if jobs:
writedb(jobs)
else:
print("nothing found on this page")
writedb(jobs)
b_url = next_url_func(b_url,session,"https://www.jobs.ch")
elif domain == 'https://ch.indeed.com':
jobs = scrap_func(b_url,entry,session)
if jobs:
writedb(jobs)
else:
print("nothing found on this page")
b_url = next_url_func(b_url,session,domain)
if b_url != 0:
print("main:" + b_url)

View File

@ -1,28 +0,0 @@
#Works but is fucking big
#Single file version:
#pyinstaller lib/gui.py --onefile --add-data ./db/:./db
pypath := /home/ccppi2/.wine/drive_c/users/ccppi2/AppData/Local/Programs/Python/Python312/
all:gui cli datafiles
gui:
pyinstaller lib/gui.py
make datafiles
cli:
pyinstaller lib/main.py
make datafiles
datafiles:
make db-dir
cp ./db/Cantons.db dist/db/
cp ./db/sqlite3.db dist/db/
cp ./lib/conf dist/gui/
db-dir:
mkdir -p dist/db
wine-build:
wine64 $(pypath)/python.exe $(pypath)/Lib/site-packages/pyinstaller lib/gui.py
wine-nukita:
wine64 nuitka --standalone --enable-plugin=pyside6 --include-qt-plugins=sqldrivers --output-dir=dist/ lib/gui.py
make datafiles
wine-nuitka-zip:
cd dist/; zip -r wine-nuitka/win64-nuitka.zip db/ gui.dist/
clean:
rm dist/db -r

View File

@ -1,9 +1,5 @@
beautifulsoup4==4.12.3
httplib2==0.22.0
mmh3==4.1.0
#PySide6==6.7.1
#PySide6==6.7.2
#PySide6.egg==info
#PySide6_Addons==6.7.1
#PySide6_Essentials==6.7.1
Requests==2.32.3
numpy==1.26.4
Requests==2.31.0
pyside6