diff --git a/test.py b/test.py new file mode 100644 index 0000000..8772466 --- /dev/null +++ b/test.py @@ -0,0 +1,96 @@ +import subprocess +import re +import json +import ipaddress +from datetime import datetime +from sys import hash_info +import pprint + +def run_traceroute(host): + result = subprocess.run(['traceroute', host], stdout=subprocess.PIPE) + return result.stdout.decode() + +def parse_traceroute_output(output): + + lines = output.strip().split('\n') + hops = [] + + ip_regex = r"\((.*?)\)" # ipaddress are in () + + target = output.strip().split('\n')[0].split()[2] + + for line in lines[1:]: + hop = [] + hop_info = line.split() + hop_number = int(hop_info[0]) + hop_name = None + hop_ip = None + hop_latency = None + + latencies = [] + + #print("##### "+str(hop_info)) + count = 0 + for part in hop_info[1:]: + count += 1 + + # source node drops or blocks icmp packages + # We will give funny to name to hop for not answering and move on. + if part == '*': + hop_name = 'hop.'+str(count)+'.'+str(target) + break + + + # If first colum is either name or ip-address + if count == 1: + print(part) + match = re.search(ip_regex, part) + if match: + hop_ip = part.strip('()') + else: + print('do ever here?') + hop_name = part + + # Second colum is ip-address first latency reading + if count == 2: + print(part) + if re.search(ip_regex, part): + try: + _ip = ipaddress.ip_address(part.strip('()')) + hop_ip = part.strip('()') + except ValueError: + pass # Ignore if it's not a valid IP address + + # Rest of the input colums are either latency floats, 'ms' or + # reruns of the hop_name and hop_ip... + # We only need the latency floats anymore. + else: + print(part) + try: + latency = float(part) + latencies.append(latency) + except ValueError: + pass + + hop_latency = sum(latencies) / len(latencies) if latencies else None + + + hop.append(hop_number) + if not hop_name == None: + hop.append(hop_name) + hop.append(hop_ip) + hop.append(hop_latency) + + + hops.append(hop) + + return target, hops + +if __name__ == '__main__': + + target='8.8.8.8' + traceroute_output = run_traceroute(target) + + target, hops = parse_traceroute_output(traceroute_output) + print('>> '+target) + pprint.pprint(hops) diff --git a/traceroute_collector.py b/traceroute_collector.py index c8550c2..32a6ff6 100755 --- a/traceroute_collector.py +++ b/traceroute_collector.py @@ -5,34 +5,87 @@ import subprocess import sqlite3 import re import json +import ipaddress from datetime import datetime def run_traceroute(host): + timestamp = datetime.now().timestamp() result = subprocess.run(['traceroute', host], stdout=subprocess.PIPE) - return result.stdout.decode() + return result.stdout.decode(), timestamp -def parse_traceroute_output(output): +def parse_traceroute_output(output, timestamp): lines = output.strip().split('\n') hops = [] + + ip_regex = r"\((.*?)\)" # ipaddress are in () + + target = output.strip().split('\n')[0].split()[2] + for line in lines[1:]: + hop = {} hop_info = line.split() hop_number = int(hop_info[0]) - hop_ips = [] - hop_latencies = [] + hop_name = None + hop_ip = None + hop_latency = None + + latencies = [] + + #print("##### "+str(hop_info)) + count = 0 for part in hop_info[1:]: - if re.match(r'\d+\.\d+\.\d+\.\d+', part): # Match IP address - hop_ips.append(part) - elif re.match(r'\d+(\.\d+)? ms', part): # Match latency - hop_latencies.append(float(part.replace(' ms', ''))) + count += 1 + + # source node drops or blocks icmp packages + # We will give funny to name to hop for not answering and move on. + if part == '*': + hop_name = 'hop.'+str(count)+'.'+str(target) + break + + + # If first colum is either name or ip-address + if count == 1: + match = re.search(ip_regex, part) + if match: + hop_ip = part.strip('()') + else: + hop_name = part + + # Second colum is ip-address first latency reading + if count == 2: + if re.search(ip_regex, part): + try: + _ip = ipaddress.ip_address(part.strip('()')) + hop_ip = part.strip('()') + except ValueError: + pass # Ignore if it's not a valid IP address + + # Rest of the input colums are either latency floats, 'ms' or + # reruns of the hop_name and hop_ip... + # We only need the latency floats anymore. + else: + try: + latency = float(part) + latencies.append(latency) + except ValueError: + pass + + hop_latency = sum(latencies) / len(latencies) if latencies else None + + hop['timestamp'] = timestamp + hop['hop_number'] = hop_number + if not hop_name == None: + hop['hop_name'] = hop_name + hop['hop_ip'] = hop_ip + hop['hop_latency'] = hop_latency - # If multiple IPs are present, we consider the first as primary - primary_ip = hop_ips[0] if hop_ips else None - avg_latency = sum(hop_latencies) / len(hop_latencies) if hop_latencies else None - hops.append((primary_ip, avg_latency)) + hops.append(target) + hops.append(hop) return hops + def create_tables(databasefile): # Connect to the SQLite database conn = sqlite3.connect(databasefile) @@ -97,6 +150,7 @@ def store_traceroute(db_file, start_ip, end_ip, hops): # Insert links and get their IDs link_ids = [] for hop in hops: + print(hop) source_ip = start_ip if not link_ids else hops[len(link_ids)-1][0] destination_ip = hop[0] latency = hop[1] @@ -107,7 +161,11 @@ def store_traceroute(db_file, start_ip, end_ip, hops): cursor.execute(""" SELECT id FROM Links WHERE source_ip = ? AND destination_ip = ? """, (source_ip, destination_ip)) - link_id = cursor.fetchone()[0] + result = cursor.fetchone() + if result is None: + print(hop) + raise ValueError(f"Failed to insert of find link between {source_ip} and {destination_ip}") + link_id = result[0] link_ids.append(link_id) cursor.execute(""" @@ -144,14 +202,11 @@ if __name__ == '__main__': target='vi.fi' - traceroute_output = run_traceroute(target) - hops = parse_traceroute_output(traceroute_output) - if hops: - start_ip = hops[0][0] - store_traceroute(databasefile, start_ip, target, hops) -# stored_hops = retrieve_traceroute() -# for hop in stored_hops: -# print(f"Link: {hop[0]} -> {hop[1]}, Latency: {hop[2]} ms, Timestamp: {hop[3]}") + traceroute_output, timestamp = run_traceroute(target) + hops = parse_traceroute_output(traceroute_output, timestamp) + print("#####") + print(hops) + print("#####") exit(0)