Initial release.
This commit is contained in:
parent
b0e125ffc9
commit
c5c8fccf8e
3 changed files with 95 additions and 0 deletions
4
webwatcher/test.csv
Normal file
4
webwatcher/test.csv
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
url,hash
|
||||||
|
https://packages.gentoo.org/packages/sys-kernel/gentoo-sources,d38c5d823f690f928ba31e739cbdbe1748321e120bbcbca3325e7f1e
|
||||||
|
https://packages.gentoo.org/packages/sys-kernel/git-sources,60869301e0b1beb470b1cf224568fe89c5d532a9e69898962286b96c
|
||||||
|
https://packages.gentoo.org/packages/sys-kernel/vanilla-sources,3cf20983f27b8412dd37748f65a7d26c01d6a779493cf0a2889b2dd3
|
|
2
webwatcher/test.csv.bak
Normal file
2
webwatcher/test.csv.bak
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
https://packages.gentoo.org/packages/sys-kernel/gentoo-sources,d38c5d823f690f928ba31e739cbdbe1748321e120bbcbca3325e7f1e
|
||||||
|
https://packages.gentoo.org/packages/sys-kernel/git-sources,60869301e0b1beb470b1cf224568fe89c5d532a9e69898962286b96c
|
89
webwatcher/webwatcher.py
Normal file
89
webwatcher/webwatcher.py
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import hashlib
|
||||||
|
from urllib.request import urlopen, Request
|
||||||
|
from pandas import *
|
||||||
|
import csv
|
||||||
|
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
print("Sorry, need a url!")
|
||||||
|
exit(1)
|
||||||
|
else:
|
||||||
|
checkurl = sys.argv[1]
|
||||||
|
|
||||||
|
url = Request(checkurl,
|
||||||
|
headers={'User-Agent': 'Mozilla/5.0'})
|
||||||
|
|
||||||
|
# to perform a GET request and load the
|
||||||
|
# content of the website and store it in a var
|
||||||
|
response = urlopen(url).read()
|
||||||
|
|
||||||
|
# to create the initial hash
|
||||||
|
currentHash = hashlib.sha224(response).hexdigest()
|
||||||
|
keeprunning = True
|
||||||
|
|
||||||
|
while keeprunning == True:
|
||||||
|
try:
|
||||||
|
# perform the get request and store it in a var
|
||||||
|
response = urlopen(url).read()
|
||||||
|
|
||||||
|
# create a hash
|
||||||
|
currentHash = hashlib.sha224(response).hexdigest()
|
||||||
|
newHash = "null"
|
||||||
|
currenturl = -1
|
||||||
|
|
||||||
|
# csv file name
|
||||||
|
data = read_csv("test.csv")
|
||||||
|
|
||||||
|
# converting column data to list
|
||||||
|
urls = data['url'].tolist()
|
||||||
|
hashes = data['hash'].tolist()
|
||||||
|
|
||||||
|
for link in range(len(urls)):
|
||||||
|
if urls[link] == checkurl:
|
||||||
|
newHash = hashes[link]
|
||||||
|
currenturl = urls[link]
|
||||||
|
|
||||||
|
# check if new hash is same as the previous hash
|
||||||
|
if newHash == currentHash:
|
||||||
|
print("same hash!")
|
||||||
|
keeprunning = False
|
||||||
|
break
|
||||||
|
# if something changed in the hashes
|
||||||
|
else:
|
||||||
|
print("something changed")
|
||||||
|
|
||||||
|
if currenturl == -1:
|
||||||
|
# this ia a new url
|
||||||
|
print("found new url")
|
||||||
|
urls.append(checkurl)
|
||||||
|
hashes.append(hashlib.sha224(response).hexdigest())
|
||||||
|
keeprunning = False
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# this is an update to an existing url
|
||||||
|
print("found update to existing url")
|
||||||
|
for link in range(len(urls)):
|
||||||
|
if urls[link] == checkurl:
|
||||||
|
hashes[link] = hashlib.sha224(response).hexdigest()
|
||||||
|
keeprunning = False
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
# To handle exceptions
|
||||||
|
except Exception as e:
|
||||||
|
print(traceback.format_exc())
|
||||||
|
#print("error:")
|
||||||
|
exit(e)
|
||||||
|
|
||||||
|
finallist = []
|
||||||
|
finallist.append("url,hash")
|
||||||
|
for i in range(len(urls)):
|
||||||
|
finallist.append(urls[i] + "," + hashes[i])
|
||||||
|
|
||||||
|
with open('test.csv', mode='w') as csv_file:
|
||||||
|
csv_writer = csv.writer(csv_file)
|
||||||
|
reader = csv.reader(finallist, delimiter=',')
|
||||||
|
for row in reader:
|
||||||
|
#print('\t'.join(row))
|
||||||
|
csv_writer.writerow(row)
|
Loading…
Reference in a new issue