job-scrapper/lib/manipulateString.py

43 lines
1.1 KiB
Python
Raw Permalink Normal View History

2024-08-23 10:13:15 +00:00
def urlToDomain(url):
pos = patternSearch(url,"https://")
urlCut = dropBeforePos(url,pos)
posDot = skipAfterChar(urlCut,'.') - 1
urlCut = dropBeforePos(urlCut,posDot)
posDot = skipAfterChar(urlCut,'/')
urlCut = dropAfterPos(urlCut,posDot)
print("url after cut dot:",urlCut)
return urlCut
def patternSearch(url,pattern):
x = 0
for a,i in enumerate(url):
print("i:",i)
if i == pattern[x]:
if x<len(pattern)-1:
x = x + 1
elif x==len(pattern)-1:
print("FULL PATTERN FOUND at pos :",a)
break
else:
x = 0
return a
def skipAfterChar(aString,char):
for a,i in enumerate(aString):
if i == char:
break
return a
def dropBeforePos(aString,pos):
aString2=''
pos+=1
if pos < len(aString):
for i in range(pos,len(aString)):
aString2 += aString[i]
return aString2
def dropAfterPos(aString,pos):
aString2=''
if pos < len(aString):
for i in range(0,pos):
aString2 += aString[i]
return aString2