From c5c8fccf8ec164d9ab61e39e122e87dd782967cb Mon Sep 17 00:00:00 2001 From: Ronald Farrer Date: Wed, 11 Aug 2021 15:26:05 -0700 Subject: [PATCH] Initial release. --- webwatcher/test.csv | 4 ++ webwatcher/test.csv.bak | 2 + webwatcher/webwatcher.py | 89 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+) create mode 100644 webwatcher/test.csv create mode 100644 webwatcher/test.csv.bak create mode 100644 webwatcher/webwatcher.py diff --git a/webwatcher/test.csv b/webwatcher/test.csv new file mode 100644 index 0000000..9cd4d34 --- /dev/null +++ b/webwatcher/test.csv @@ -0,0 +1,4 @@ +url,hash +https://packages.gentoo.org/packages/sys-kernel/gentoo-sources,d38c5d823f690f928ba31e739cbdbe1748321e120bbcbca3325e7f1e +https://packages.gentoo.org/packages/sys-kernel/git-sources,60869301e0b1beb470b1cf224568fe89c5d532a9e69898962286b96c +https://packages.gentoo.org/packages/sys-kernel/vanilla-sources,3cf20983f27b8412dd37748f65a7d26c01d6a779493cf0a2889b2dd3 diff --git a/webwatcher/test.csv.bak b/webwatcher/test.csv.bak new file mode 100644 index 0000000..9864c02 --- /dev/null +++ b/webwatcher/test.csv.bak @@ -0,0 +1,2 @@ +https://packages.gentoo.org/packages/sys-kernel/gentoo-sources,d38c5d823f690f928ba31e739cbdbe1748321e120bbcbca3325e7f1e +https://packages.gentoo.org/packages/sys-kernel/git-sources,60869301e0b1beb470b1cf224568fe89c5d532a9e69898962286b96c diff --git a/webwatcher/webwatcher.py b/webwatcher/webwatcher.py new file mode 100644 index 0000000..02e3e58 --- /dev/null +++ b/webwatcher/webwatcher.py @@ -0,0 +1,89 @@ +import sys +import time +import hashlib +from urllib.request import urlopen, Request +from pandas import * +import csv + +if len(sys.argv) < 2: + print("Sorry, need a url!") + exit(1) +else: + checkurl = sys.argv[1] + +url = Request(checkurl, + headers={'User-Agent': 'Mozilla/5.0'}) + +# to perform a GET request and load the +# content of the website and store it in a var +response = urlopen(url).read() + +# to create the initial hash +currentHash = hashlib.sha224(response).hexdigest() +keeprunning = True + +while keeprunning == True: + try: + # perform the get request and store it in a var + response = urlopen(url).read() + + # create a hash + currentHash = hashlib.sha224(response).hexdigest() + newHash = "null" + currenturl = -1 + + # csv file name + data = read_csv("test.csv") + + # converting column data to list + urls = data['url'].tolist() + hashes = data['hash'].tolist() + + for link in range(len(urls)): + if urls[link] == checkurl: + newHash = hashes[link] + currenturl = urls[link] + + # check if new hash is same as the previous hash + if newHash == currentHash: + print("same hash!") + keeprunning = False + break + # if something changed in the hashes + else: + print("something changed") + + if currenturl == -1: + # this ia a new url + print("found new url") + urls.append(checkurl) + hashes.append(hashlib.sha224(response).hexdigest()) + keeprunning = False + break + else: + # this is an update to an existing url + print("found update to existing url") + for link in range(len(urls)): + if urls[link] == checkurl: + hashes[link] = hashlib.sha224(response).hexdigest() + keeprunning = False + break + + + # To handle exceptions + except Exception as e: + print(traceback.format_exc()) + #print("error:") + exit(e) + +finallist = [] +finallist.append("url,hash") +for i in range(len(urls)): + finallist.append(urls[i] + "," + hashes[i]) + +with open('test.csv', mode='w') as csv_file: + csv_writer = csv.writer(csv_file) + reader = csv.reader(finallist, delimiter=',') + for row in reader: + #print('\t'.join(row)) + csv_writer.writerow(row)