toolbox/webwatcher/webwatcher.py

import sys
import time
import hashlib
from urllib.request import urlopen, Request
from pandas import *
import csv

if len(sys.argv) < 2:
	print("Sorry, need a url!")
	exit(1)
else:
	checkurl = sys.argv[1]

url = Request(checkurl,
			headers={'User-Agent': 'Mozilla/5.0'})

# to perform a GET request and load the
# content of the website and store it in a var
response = urlopen(url).read()

# to create the initial hash
currentHash = hashlib.sha224(response).hexdigest()
keeprunning = True

while keeprunning == True:
	try:
		# perform the get request and store it in a var
		response = urlopen(url).read()
		
		# create a hash
		currentHash = hashlib.sha224(response).hexdigest()
		newHash = "null"
		currenturl = -1
		
		# csv file name
		data = read_csv("test.csv")
 
		# converting column data to list
		urls = data['url'].tolist()
		hashes = data['hash'].tolist()

		for link in range(len(urls)):
			if urls[link] == checkurl:
				newHash = hashes[link]
				currenturl = urls[link]

		# check if new hash is same as the previous hash
		if newHash == currentHash:
			print("same hash!")
			keeprunning = False
			break
		# if something changed in the hashes
		else:
			print("something changed")

			if currenturl == -1:
				# this ia a new url
				print("found new url")
				urls.append(checkurl)
				hashes.append(hashlib.sha224(response).hexdigest())
				keeprunning = False
				break
			else:
				# this is an update to an existing url
				print("found update to existing url")
				for link in range(len(urls)):
					if urls[link] == checkurl:
						hashes[link] = hashlib.sha224(response).hexdigest()
						keeprunning = False
						break

			
	# To handle exceptions
	except Exception as e:
		print(traceback.format_exc())
		#print("error:")
		exit(e)

finallist = []
finallist.append("url,hash")
for i in range(len(urls)):
	finallist.append(urls[i] + "," + hashes[i])

with open('test.csv', mode='w') as csv_file:
	csv_writer = csv.writer(csv_file)
	reader = csv.reader(finallist, delimiter=',')
	for row in reader:
		#print('\t'.join(row))
		csv_writer.writerow(row)
Initial release. 2021-08-11 15:26:05 -07:00			`import sys`
			`import time`
			`import hashlib`
			`from urllib.request import urlopen, Request`
			`from pandas import *`
			`import csv`

			`if len(sys.argv) < 2:`
			`print("Sorry, need a url!")`
			`exit(1)`
			`else:`
			`checkurl = sys.argv[1]`

			`url = Request(checkurl,`
			`headers={'User-Agent': 'Mozilla/5.0'})`

			`# to perform a GET request and load the`
			`# content of the website and store it in a var`
			`response = urlopen(url).read()`

			`# to create the initial hash`
			`currentHash = hashlib.sha224(response).hexdigest()`
			`keeprunning = True`

			`while keeprunning == True:`
			`try:`
			`# perform the get request and store it in a var`
			`response = urlopen(url).read()`

			`# create a hash`
			`currentHash = hashlib.sha224(response).hexdigest()`
			`newHash = "null"`
			`currenturl = -1`

			`# csv file name`
			`data = read_csv("test.csv")`

			`# converting column data to list`
			`urls = data['url'].tolist()`
			`hashes = data['hash'].tolist()`

			`for link in range(len(urls)):`
			`if urls[link] == checkurl:`
			`newHash = hashes[link]`
			`currenturl = urls[link]`

			`# check if new hash is same as the previous hash`
			`if newHash == currentHash:`
			`print("same hash!")`
			`keeprunning = False`
			`break`
			`# if something changed in the hashes`
			`else:`
			`print("something changed")`

			`if currenturl == -1:`
			`# this ia a new url`
			`print("found new url")`
			`urls.append(checkurl)`
			`hashes.append(hashlib.sha224(response).hexdigest())`
			`keeprunning = False`
			`break`
			`else:`
			`# this is an update to an existing url`
			`print("found update to existing url")`
			`for link in range(len(urls)):`
			`if urls[link] == checkurl:`
			`hashes[link] = hashlib.sha224(response).hexdigest()`
			`keeprunning = False`
			`break`


			`# To handle exceptions`
			`except Exception as e:`
			`print(traceback.format_exc())`
			`#print("error:")`
			`exit(e)`

			`finallist = []`
			`finallist.append("url,hash")`
			`for i in range(len(urls)):`
			`finallist.append(urls[i] + "," + hashes[i])`

			`with open('test.csv', mode='w') as csv_file:`
			`csv_writer = csv.writer(csv_file)`
			`reader = csv.reader(finallist, delimiter=',')`
			`for row in reader:`
			`#print('\t'.join(row))`
			`csv_writer.writerow(row)`