90 lines
2 KiB
Python
90 lines
2 KiB
Python
|
import sys
|
||
|
import time
|
||
|
import hashlib
|
||
|
from urllib.request import urlopen, Request
|
||
|
from pandas import *
|
||
|
import csv
|
||
|
|
||
|
if len(sys.argv) < 2:
|
||
|
print("Sorry, need a url!")
|
||
|
exit(1)
|
||
|
else:
|
||
|
checkurl = sys.argv[1]
|
||
|
|
||
|
url = Request(checkurl,
|
||
|
headers={'User-Agent': 'Mozilla/5.0'})
|
||
|
|
||
|
# to perform a GET request and load the
|
||
|
# content of the website and store it in a var
|
||
|
response = urlopen(url).read()
|
||
|
|
||
|
# to create the initial hash
|
||
|
currentHash = hashlib.sha224(response).hexdigest()
|
||
|
keeprunning = True
|
||
|
|
||
|
while keeprunning == True:
|
||
|
try:
|
||
|
# perform the get request and store it in a var
|
||
|
response = urlopen(url).read()
|
||
|
|
||
|
# create a hash
|
||
|
currentHash = hashlib.sha224(response).hexdigest()
|
||
|
newHash = "null"
|
||
|
currenturl = -1
|
||
|
|
||
|
# csv file name
|
||
|
data = read_csv("test.csv")
|
||
|
|
||
|
# converting column data to list
|
||
|
urls = data['url'].tolist()
|
||
|
hashes = data['hash'].tolist()
|
||
|
|
||
|
for link in range(len(urls)):
|
||
|
if urls[link] == checkurl:
|
||
|
newHash = hashes[link]
|
||
|
currenturl = urls[link]
|
||
|
|
||
|
# check if new hash is same as the previous hash
|
||
|
if newHash == currentHash:
|
||
|
print("same hash!")
|
||
|
keeprunning = False
|
||
|
break
|
||
|
# if something changed in the hashes
|
||
|
else:
|
||
|
print("something changed")
|
||
|
|
||
|
if currenturl == -1:
|
||
|
# this ia a new url
|
||
|
print("found new url")
|
||
|
urls.append(checkurl)
|
||
|
hashes.append(hashlib.sha224(response).hexdigest())
|
||
|
keeprunning = False
|
||
|
break
|
||
|
else:
|
||
|
# this is an update to an existing url
|
||
|
print("found update to existing url")
|
||
|
for link in range(len(urls)):
|
||
|
if urls[link] == checkurl:
|
||
|
hashes[link] = hashlib.sha224(response).hexdigest()
|
||
|
keeprunning = False
|
||
|
break
|
||
|
|
||
|
|
||
|
# To handle exceptions
|
||
|
except Exception as e:
|
||
|
print(traceback.format_exc())
|
||
|
#print("error:")
|
||
|
exit(e)
|
||
|
|
||
|
finallist = []
|
||
|
finallist.append("url,hash")
|
||
|
for i in range(len(urls)):
|
||
|
finallist.append(urls[i] + "," + hashes[i])
|
||
|
|
||
|
with open('test.csv', mode='w') as csv_file:
|
||
|
csv_writer = csv.writer(csv_file)
|
||
|
reader = csv.reader(finallist, delimiter=',')
|
||
|
for row in reader:
|
||
|
#print('\t'.join(row))
|
||
|
csv_writer.writerow(row)
|