Actually working traceroute parser. Todo: import data to the new database schema.

This commit is contained in:
kalzu rekku 2024-05-27 19:49:37 +03:00
parent 0e0cba8426
commit 6e9d11dcde
2 changed files with 172 additions and 21 deletions

96
test.py Normal file
View File

@ -0,0 +1,96 @@
import subprocess
import re
import json
import ipaddress
from datetime import datetime
from sys import hash_info
import pprint
def run_traceroute(host):
result = subprocess.run(['traceroute', host], stdout=subprocess.PIPE)
return result.stdout.decode()
def parse_traceroute_output(output):
lines = output.strip().split('\n')
hops = []
ip_regex = r"\((.*?)\)" # ipaddress are in ()
target = output.strip().split('\n')[0].split()[2]
for line in lines[1:]:
hop = []
hop_info = line.split()
hop_number = int(hop_info[0])
hop_name = None
hop_ip = None
hop_latency = None
latencies = []
#print("##### "+str(hop_info))
count = 0
for part in hop_info[1:]:
count += 1
# source node drops or blocks icmp packages
# We will give funny to name to hop for not answering and move on.
if part == '*':
hop_name = 'hop.'+str(count)+'.'+str(target)
break
# If first colum is either name or ip-address
if count == 1:
print(part)
match = re.search(ip_regex, part)
if match:
hop_ip = part.strip('()')
else:
print('do ever here?')
hop_name = part
# Second colum is ip-address first latency reading
if count == 2:
print(part)
if re.search(ip_regex, part):
try:
_ip = ipaddress.ip_address(part.strip('()'))
hop_ip = part.strip('()')
except ValueError:
pass # Ignore if it's not a valid IP address
# Rest of the input colums are either latency floats, 'ms' or
# reruns of the hop_name and hop_ip...
# We only need the latency floats anymore.
else:
print(part)
try:
latency = float(part)
latencies.append(latency)
except ValueError:
pass
hop_latency = sum(latencies) / len(latencies) if latencies else None
hop.append(hop_number)
if not hop_name == None:
hop.append(hop_name)
hop.append(hop_ip)
hop.append(hop_latency)
hops.append(hop)
return target, hops
if __name__ == '__main__':
target='8.8.8.8'
traceroute_output = run_traceroute(target)
target, hops = parse_traceroute_output(traceroute_output)
print('>> '+target)
pprint.pprint(hops)

View File

@ -5,34 +5,87 @@ import subprocess
import sqlite3
import re
import json
import ipaddress
from datetime import datetime
def run_traceroute(host):
timestamp = datetime.now().timestamp()
result = subprocess.run(['traceroute', host], stdout=subprocess.PIPE)
return result.stdout.decode()
return result.stdout.decode(), timestamp
def parse_traceroute_output(output):
def parse_traceroute_output(output, timestamp):
lines = output.strip().split('\n')
hops = []
ip_regex = r"\((.*?)\)" # ipaddress are in ()
target = output.strip().split('\n')[0].split()[2]
for line in lines[1:]:
hop = {}
hop_info = line.split()
hop_number = int(hop_info[0])
hop_ips = []
hop_latencies = []
hop_name = None
hop_ip = None
hop_latency = None
latencies = []
#print("##### "+str(hop_info))
count = 0
for part in hop_info[1:]:
if re.match(r'\d+\.\d+\.\d+\.\d+', part): # Match IP address
hop_ips.append(part)
elif re.match(r'\d+(\.\d+)? ms', part): # Match latency
hop_latencies.append(float(part.replace(' ms', '')))
count += 1
# source node drops or blocks icmp packages
# We will give funny to name to hop for not answering and move on.
if part == '*':
hop_name = 'hop.'+str(count)+'.'+str(target)
break
# If first colum is either name or ip-address
if count == 1:
match = re.search(ip_regex, part)
if match:
hop_ip = part.strip('()')
else:
hop_name = part
# Second colum is ip-address first latency reading
if count == 2:
if re.search(ip_regex, part):
try:
_ip = ipaddress.ip_address(part.strip('()'))
hop_ip = part.strip('()')
except ValueError:
pass # Ignore if it's not a valid IP address
# Rest of the input colums are either latency floats, 'ms' or
# reruns of the hop_name and hop_ip...
# We only need the latency floats anymore.
else:
try:
latency = float(part)
latencies.append(latency)
except ValueError:
pass
hop_latency = sum(latencies) / len(latencies) if latencies else None
hop['timestamp'] = timestamp
hop['hop_number'] = hop_number
if not hop_name == None:
hop['hop_name'] = hop_name
hop['hop_ip'] = hop_ip
hop['hop_latency'] = hop_latency
# If multiple IPs are present, we consider the first as primary
primary_ip = hop_ips[0] if hop_ips else None
avg_latency = sum(hop_latencies) / len(hop_latencies) if hop_latencies else None
hops.append((primary_ip, avg_latency))
hops.append(target)
hops.append(hop)
return hops
def create_tables(databasefile):
# Connect to the SQLite database
conn = sqlite3.connect(databasefile)
@ -97,6 +150,7 @@ def store_traceroute(db_file, start_ip, end_ip, hops):
# Insert links and get their IDs
link_ids = []
for hop in hops:
print(hop)
source_ip = start_ip if not link_ids else hops[len(link_ids)-1][0]
destination_ip = hop[0]
latency = hop[1]
@ -107,7 +161,11 @@ def store_traceroute(db_file, start_ip, end_ip, hops):
cursor.execute("""
SELECT id FROM Links WHERE source_ip = ? AND destination_ip = ?
""", (source_ip, destination_ip))
link_id = cursor.fetchone()[0]
result = cursor.fetchone()
if result is None:
print(hop)
raise ValueError(f"Failed to insert of find link between {source_ip} and {destination_ip}")
link_id = result[0]
link_ids.append(link_id)
cursor.execute("""
@ -144,14 +202,11 @@ if __name__ == '__main__':
target='vi.fi'
traceroute_output = run_traceroute(target)
hops = parse_traceroute_output(traceroute_output)
if hops:
start_ip = hops[0][0]
store_traceroute(databasefile, start_ip, target, hops)
# stored_hops = retrieve_traceroute()
# for hop in stored_hops:
# print(f"Link: {hop[0]} -> {hop[1]}, Latency: {hop[2]} ms, Timestamp: {hop[3]}")
traceroute_output, timestamp = run_traceroute(target)
hops = parse_traceroute_output(traceroute_output, timestamp)
print("#####")
print(hops)
print("#####")
exit(0)