import os import uuid import time import requests import random import json import logging import threading import socket from datetime import datetime, timezone from concurrent.futures import ThreadPoolExecutor import argparse from requests.adapters import HTTPAdapter from urllib3.util.connection import create_connection # --- Multi-Node Client Configuration --- TARGET_SERVICE_UUID = os.environ.get( "TARGET_SERVICE_UUID", "ab73d00a-8169-46bb-997d-f13e5f760973" ) SERVER_BASE_URL = os.environ.get("SERVER_URL", "http://localhost:8000") UPDATE_INTERVAL_SECONDS = int(os.environ.get("UPDATE_INTERVAL_SECONDS", 5)) NUM_NODES = int(os.environ.get("NUM_NODES", 3)) # Base IP for loopback binding (127.0.0.x where x starts from this base) LOOPBACK_IP_BASE = int(os.environ.get("LOOPBACK_IP_BASE", 2)) # Start from 127.0.0.2 # Dynamic node management settings DYNAMIC_MIN_NODES = int(os.environ.get("DYNAMIC_MIN_NODES", 3)) DYNAMIC_MAX_NODES = int(os.environ.get("DYNAMIC_MAX_NODES", 7)) NODE_CHANGE_INTERVAL = int(os.environ.get("NODE_CHANGE_INTERVAL", 30)) # seconds NODE_LIFECYCLE_VARIANCE = int(os.environ.get("NODE_LIFECYCLE_VARIANCE", 10)) # seconds # --- Logging Configuration --- logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - [%(thread)d] - %(message)s", ) logger = logging.getLogger("MultiNodeClient") # --- Custom HTTP Adapter for Source IP Binding --- class SourceIPHTTPAdapter(HTTPAdapter): def __init__(self, source_ip, *args, **kwargs): self.source_ip = source_ip super().__init__(*args, **kwargs) def init_poolmanager(self, *args, **kwargs): # Override the socket creation to bind to specific source IP def custom_create_connection( address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None, socket_options=None, ): # Force our custom source address return create_connection( address, timeout, source_address=(self.source_ip, 0), # 0 = any available port socket_options=socket_options, ) # Monkey patch the connection creation original_create_connection = socket.create_connection socket.create_connection = custom_create_connection try: result = super().init_poolmanager(*args, **kwargs) finally: # Restore original function socket.create_connection = original_create_connection return result # --- Enhanced Node Class with IP Binding --- class SimulatedNode: def __init__( self, node_id: int, total_nodes: int, server_url: str, service_uuid: str, update_interval: int, ip_base: int, persistent_uuid: str = None, # Allow reusing UUIDs for returning nodes ): self.node_id = node_id self.node_uuid = persistent_uuid or str(uuid.uuid4()) self.server_url = server_url # Store server URL self.service_uuid = service_uuid # Store service UUID self.update_interval = update_interval # Store update interval self.uptime_seconds = 0 self.known_peers = {} self.total_nodes = total_nodes self.running = False self.is_persistent = ( persistent_uuid is not None ) # Track if this is a returning node # Assign unique loopback IP to this node using the passed ip_base self.source_ip = f"127.0.0.{ip_base + node_id - 1}" # Create requests session with custom adapter for IP binding self.session = requests.Session() adapter = SourceIPHTTPAdapter(self.source_ip) self.session.mount("http://", adapter) self.session.mount("https://", adapter) # Each node gets slightly different characteristics if not self.is_persistent: # Only randomize for new nodes self.base_load = random.uniform(0.2, 1.0) self.base_memory = random.uniform(40.0, 70.0) self.load_variance = random.uniform(0.1, 0.5) self.memory_variance = random.uniform(5.0, 15.0) # Some nodes might be "problematic" (higher load/memory) if random.random() < 0.2: # 20% chance of being a "problematic" node self.base_load *= 2.0 self.base_memory += 20.0 logger.info( f"Node {self.node_id} ({self.node_uuid[:8]}) will simulate high resource usage (IP: {self.source_ip})" ) else: # Returning nodes keep some consistency but with variation self.base_load = random.uniform(0.3, 0.8) self.base_memory = random.uniform(45.0, 65.0) self.load_variance = random.uniform(0.1, 0.4) self.memory_variance = random.uniform(5.0, 12.0) logger.info( f"Node {self.node_id} ({'returning' if self.is_persistent else 'new'}) will bind to source IP: {self.source_ip}" ) def test_ip_binding(self): """Test if we can bind to the assigned IP address.""" try: # Try to create a socket and bind to the IP test_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) test_socket.bind((self.source_ip, 0)) # Bind to any available port test_socket.close() logger.debug( f"Node {self.node_id} successfully tested binding to {self.source_ip}" ) return True except OSError as e: logger.error(f"Node {self.node_id} cannot bind to {self.source_ip}: {e}") logger.error( "Make sure the IP address is available on your loopback interface." ) logger.error( f"You might need to add it with: sudo ifconfig lo0 alias {self.source_ip} (macOS)" ) logger.error(f"Or: sudo ip addr add {self.source_ip}/8 dev lo (Linux)") return False def generate_node_status_data(self): """Generates simulated node status metrics with per-node characteristics.""" self.uptime_seconds += self.update_interval + random.randint(-1, 2) # Generate load with some randomness but consistent per-node baseline load_1min = max( 0.1, self.base_load + random.uniform(-self.load_variance, self.load_variance), ) load_5min = max(0.1, load_1min * random.uniform(0.8, 1.0)) load_15min = max(0.1, load_5min * random.uniform(0.8, 1.0)) load_avg = [round(load_1min, 2), round(load_5min, 2), round(load_15min, 2)] # Generate memory usage with baseline + variance memory_usage = max( 10.0, min( 95.0, self.base_memory + random.uniform(-self.memory_variance, self.memory_variance), ), ) return { "uptime_seconds": self.uptime_seconds, "load_avg": load_avg, "memory_usage_percent": round(memory_usage, 2), } def generate_ping_data(self): """Generates simulated ping latencies to known peers.""" pings = {} # Ping to self (loopback) pings[self.node_uuid] = round(random.uniform(0.1, 1.5), 2) # Ping to known peers for peer_uuid in self.known_peers.keys(): if peer_uuid != self.node_uuid: # Simulate network latency with some consistency per peer base_latency = random.uniform(5.0, 100.0) variation = random.uniform(-10.0, 10.0) latency = max(0.1, base_latency + variation) pings[peer_uuid] = round(latency, 2) return pings def send_update(self): """Sends a single status update to the server using bound IP.""" try: status_data = self.generate_node_status_data() ping_data = self.generate_ping_data() payload = { "node": self.node_uuid, "timestamp": datetime.now(timezone.utc).isoformat(), "status": status_data, "pings": ping_data, } endpoint_url = f"{self.server_url}/{self.service_uuid}/{self.node_uuid}/" logger.debug( f"Node {self.node_id} ({self.source_ip}) sending update. " f"Uptime: {status_data['uptime_seconds']}s, " f"Load: {status_data['load_avg'][0]}, Memory: {status_data['memory_usage_percent']}%, " f"Pings: {len(ping_data)}" ) # Use the custom session with IP binding response = self.session.put(endpoint_url, json=payload, timeout=10) if response.status_code == 200: response_data = response.json() if "peers" in response_data and isinstance( response_data["peers"], dict ): new_peers = {k: v for k, v in response_data["peers"].items()} # Log new peer discoveries newly_discovered = set(new_peers.keys()) - set( self.known_peers.keys() ) if newly_discovered: logger.info( f"Node {self.node_id} ({self.source_ip}) discovered {len(newly_discovered)} new peer(s)" ) self.known_peers = new_peers if ( len(newly_discovered) > 0 or len(self.known_peers) != self.total_nodes - 1 ): logger.debug( f"Node {self.node_id} ({self.source_ip}) knows {len(self.known_peers)} peers " f"(expected {self.total_nodes - 1})" ) return True else: logger.error( f"Node {self.node_id} ({self.source_ip}) failed to send update. " f"Status: {response.status_code}, Response: {response.text}" ) return False except requests.exceptions.Timeout: logger.error(f"Node {self.node_id} ({self.source_ip}) request timed out") return False except requests.exceptions.ConnectionError as e: logger.error( f"Node {self.node_id} ({self.source_ip}) connection error: {e}" ) return False except Exception as e: logger.error( f"Node {self.node_id} ({self.source_ip}) unexpected error: {e}" ) return False def run(self): """Main loop for this simulated node.""" # Test IP binding before starting if not self.test_ip_binding(): logger.error(f"Node {self.node_id} cannot start due to IP binding failure") return self.running = True logger.info( f"Starting Node {self.node_id} ({'returning' if self.is_persistent else 'new'}) with UUID: {self.node_uuid[:8]}... (IP: {self.source_ip})" ) # Add some initial delay to stagger node starts initial_delay = self.node_id * 0.5 time.sleep(initial_delay) consecutive_failures = 0 while self.running: try: success = self.send_update() if success: consecutive_failures = 0 else: consecutive_failures += 1 if consecutive_failures >= 3: logger.warning( f"Node {self.node_id} ({self.source_ip}) has failed {consecutive_failures} consecutive updates" ) # Add some jitter to prevent thundering herd jitter = random.uniform(-1.0, 1.0) sleep_time = max(1.0, self.update_interval + jitter) time.sleep(sleep_time) except KeyboardInterrupt: logger.info( f"Node {self.node_id} ({self.source_ip}) received interrupt signal" ) break except Exception as e: logger.error( f"Node {self.node_id} ({self.source_ip}) unexpected error in main loop: {e}" ) time.sleep(self.update_interval) logger.info(f"Node {self.node_id} ({self.source_ip}) stopped") def stop(self): """Stop the node.""" self.running = False # --- Dynamic Multi-Node Manager --- class DynamicMultiNodeManager: def __init__( self, min_nodes: int, max_nodes: int, server_url: str, service_uuid: str, update_interval: int, ip_base: int, change_interval: int, lifecycle_variance: int, ): self.min_nodes = min_nodes self.max_nodes = max_nodes self.server_url = server_url self.service_uuid = service_uuid self.update_interval = update_interval self.ip_base = ip_base self.change_interval = change_interval self.lifecycle_variance = lifecycle_variance # Track active nodes and their threads self.active_nodes = {} # node_id -> SimulatedNode self.node_threads = {} # node_id -> Thread self.node_counter = 1 # For assigning unique node IDs # Track node history for potential returns self.departed_nodes = {} # UUID -> {'characteristics', 'last_seen'} self.running = False self.manager_thread = None def get_available_ip_addresses(self): """Get list of available IP addresses for new nodes.""" max_possible_ips = self.max_nodes * 2 # Allow some extra IPs available_ips = [] used_ips = {node.source_ip for node in self.active_nodes.values()} for i in range(max_possible_ips): ip = f"127.0.0.{self.ip_base + i}" if ip not in used_ips: available_ips.append((i + 1, ip)) # (node_id_offset, ip) return available_ips def check_ip_availability(self, num_ips_needed): """Check if required number of IP addresses are available.""" logger.info(f"Checking availability of {num_ips_needed} IP addresses...") available_ips = self.get_available_ip_addresses() if len(available_ips) < num_ips_needed: logger.error( f"Only {len(available_ips)} IP addresses available, need {num_ips_needed}" ) return False # Test the first few IPs we'd actually use test_ips = available_ips[:num_ips_needed] all_available = True for node_id_offset, ip in test_ips: try: test_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) test_socket.bind((ip, 0)) test_socket.close() except OSError as e: logger.error(f"Cannot bind to {ip}: {e}") all_available = False if not all_available: logger.error("Some IP addresses are not available.") self.show_setup_commands() return False logger.info(f"All {num_ips_needed} IP addresses are available!") return True def show_setup_commands(self): """Show commands for setting up loopback IPs.""" logger.info("=== Loopback IP Setup Commands ===") logger.info("Run these commands to add the required loopback IP addresses:") import platform system = platform.system().lower() max_ips_needed = self.max_nodes * 2 for i in range(max_ips_needed): ip = f"127.0.0.{self.ip_base + i}" if system == "linux": logger.info(f"sudo ip addr add {ip}/8 dev lo") elif system == "darwin": # macOS logger.info(f"sudo ifconfig lo0 alias {ip}") else: logger.info(f"Add {ip} to loopback interface (OS: {system})") logger.info("=" * 40) def create_new_node(self, return_existing=False): """Create a new node, optionally bringing back a departed node.""" available_ips = self.get_available_ip_addresses() if not available_ips: logger.warning("No available IP addresses for new nodes") return None node_id_offset, source_ip = available_ips[0] node_id = self.node_counter self.node_counter += 1 # Decide if we should bring back an old node (30% chance if we have departed nodes) persistent_uuid = None if return_existing and self.departed_nodes and random.random() < 0.3: # Pick a random departed node to bring back persistent_uuid = random.choice(list(self.departed_nodes.keys())) logger.info( f"Bringing back departed node {persistent_uuid[:8]}... as Node {node_id}" ) # Remove from departed list since it's returning del self.departed_nodes[persistent_uuid] node = SimulatedNode( node_id=node_id, total_nodes=len(self.active_nodes) + 1, # +1 for this new node server_url=self.server_url, service_uuid=self.service_uuid, update_interval=self.update_interval, ip_base=self.ip_base + node_id_offset - 1, # Adjust IP calculation persistent_uuid=persistent_uuid, ) return node def start_node(self, node): """Start a single node in its own thread.""" thread = threading.Thread(target=node.run, name=f"Node-{node.node_id}") thread.daemon = True thread.start() self.active_nodes[node.node_id] = node self.node_threads[node.node_id] = thread logger.info( f"βœ… Started Node {node.node_id} ({node.node_uuid[:8]}...) on {node.source_ip}" ) def stop_node(self, node_id, permanently=False): """Stop a specific node.""" if node_id not in self.active_nodes: return False node = self.active_nodes[node_id] thread = self.node_threads[node_id] # Store node info for potential return (unless it's permanently leaving) if not permanently and random.random() < 0.7: # 70% chance node might return self.departed_nodes[node.node_uuid] = { "last_seen": datetime.now(), "characteristics": { "base_load": node.base_load, "base_memory": node.base_memory, "load_variance": node.load_variance, "memory_variance": node.memory_variance, }, } logger.info( f"⏸️ Node {node_id} ({node.node_uuid[:8]}...) departing temporarily" ) else: logger.info( f"❌ Node {node_id} ({node.node_uuid[:8]}...) leaving permanently" ) node.stop() thread.join(timeout=5.0) del self.active_nodes[node_id] del self.node_threads[node_id] return True def adjust_node_count(self): """Randomly adjust the number of active nodes within the specified range.""" current_count = len(self.active_nodes) # Decide on target count target_count = random.randint(self.min_nodes, self.max_nodes) if target_count == current_count: logger.debug(f"Node count staying at {current_count}") return logger.info(f"πŸ”„ Adjusting node count: {current_count} β†’ {target_count}") if target_count > current_count: # Add nodes nodes_to_add = target_count - current_count for _ in range(nodes_to_add): node = self.create_new_node(return_existing=True) if node: self.start_node(node) time.sleep(random.uniform(1, 3)) # Stagger starts elif target_count < current_count: # Remove nodes nodes_to_remove = current_count - target_count active_node_ids = list(self.active_nodes.keys()) nodes_to_stop = random.sample(active_node_ids, nodes_to_remove) for node_id in nodes_to_stop: # 20% chance node leaves permanently permanently = random.random() < 0.2 self.stop_node(node_id, permanently=permanently) time.sleep(random.uniform(0.5, 2)) # Stagger stops def manage_node_lifecycle(self): """Main loop for managing node lifecycle changes.""" logger.info("πŸš€ Starting dynamic node lifecycle management") # Start with minimum nodes logger.info(f"Initializing with {self.min_nodes} nodes...") for _ in range(self.min_nodes): node = self.create_new_node() if node: self.start_node(node) time.sleep(1) # Brief delay between starts # Main management loop while self.running: try: # Wait for the change interval (with some randomness) variance = random.randint( -self.lifecycle_variance, self.lifecycle_variance ) sleep_time = max( 10, self.change_interval + variance ) # Minimum 10 seconds logger.debug( f"Waiting {sleep_time} seconds until next potential change..." ) time.sleep(sleep_time) if not self.running: break # Randomly decide if we should make a change (70% chance) if random.random() < 0.7: self.adjust_node_count() else: logger.debug("Skipping this change cycle") except KeyboardInterrupt: logger.info("Node lifecycle manager received interrupt signal") break except Exception as e: logger.error(f"Error in node lifecycle management: {e}", exc_info=True) time.sleep(5) # Brief pause before continuing def start_dynamic_management(self): """Start the dynamic node management system.""" if not self.check_ip_availability(self.max_nodes * 2): logger.error( "Cannot start dynamic management due to IP availability issues" ) return False self.running = True self.manager_thread = threading.Thread( target=self.manage_node_lifecycle, name="NodeLifecycleManager" ) self.manager_thread.daemon = True self.manager_thread.start() return True def stop_all_nodes(self): """Stop all nodes and the management system.""" logger.info("πŸ›‘ Stopping dynamic node management...") self.running = False # Stop the manager thread if self.manager_thread and self.manager_thread.is_alive(): self.manager_thread.join(timeout=5.0) # Stop all active nodes for node_id in list(self.active_nodes.keys()): self.stop_node(node_id, permanently=True) logger.info("All nodes stopped") def print_status(self): """Print current status of the dynamic system.""" active_count = len(self.active_nodes) departed_count = len(self.departed_nodes) logger.info(f"=== Dynamic Multi-Node Status ===") logger.info( f"Active nodes: {active_count} (range: {self.min_nodes}-{self.max_nodes})" ) logger.info(f"Departed nodes (may return): {departed_count}") for node_id, node in self.active_nodes.items(): status = "returning" if node.is_persistent else "new" logger.info( f" Node {node_id}: {status}, IP={node.source_ip}, UUID={node.node_uuid[:8]}..., Uptime={node.uptime_seconds}s" ) if departed_count > 0: logger.info("Departed nodes that might return:") for uuid_val in list(self.departed_nodes.keys())[:5]: # Show first 5 logger.info(f" UUID={uuid_val[:8]}...") def main(): parser = argparse.ArgumentParser( description="Dynamic multi-node test client with fluctuating node counts" ) parser.add_argument( "--min-nodes", type=int, default=DYNAMIC_MIN_NODES, help=f"Minimum number of nodes (default: {DYNAMIC_MIN_NODES})", ) parser.add_argument( "--max-nodes", type=int, default=DYNAMIC_MAX_NODES, help=f"Maximum number of nodes (default: {DYNAMIC_MAX_NODES})", ) parser.add_argument( "--change-interval", type=int, default=NODE_CHANGE_INTERVAL, help=f"Average seconds between node changes (default: {NODE_CHANGE_INTERVAL})", ) parser.add_argument( "--lifecycle-variance", type=int, default=NODE_LIFECYCLE_VARIANCE, help=f"Random variance in change timing (default: {NODE_LIFECYCLE_VARIANCE})", ) parser.add_argument( "--interval", type=int, default=UPDATE_INTERVAL_SECONDS, help=f"Update interval in seconds (default: {UPDATE_INTERVAL_SECONDS})", ) parser.add_argument( "--server", type=str, default=SERVER_BASE_URL, help=f"Server URL (default: {SERVER_BASE_URL})", ) parser.add_argument( "--service-uuid", type=str, default=TARGET_SERVICE_UUID, help="Target service UUID", ) parser.add_argument( "--ip-base", type=int, default=LOOPBACK_IP_BASE, help=f"Starting IP for 127.0.0.X (default: {LOOPBACK_IP_BASE})", ) parser.add_argument( "--setup-help", action="store_true", help="Show commands to set up loopback IP addresses", ) parser.add_argument( "--verbose", "-v", action="store_true", help="Enable verbose logging" ) # Keep the old --nodes argument for compatibility, but make it set max nodes parser.add_argument( "--nodes", type=int, help="Set max nodes (compatibility mode, same as --max-nodes)", ) args = parser.parse_args() # Handle compatibility if args.nodes: args.max_nodes = args.nodes if ( args.min_nodes == DYNAMIC_MIN_NODES ): # Only adjust min if it wasn't explicitly set args.min_nodes = max( 1, args.nodes - 2 ) # Set min to a reasonable lower bound min_nodes = args.min_nodes max_nodes = args.max_nodes change_interval = args.change_interval lifecycle_variance = args.lifecycle_variance update_interval = args.interval server_url = args.server service_uuid = args.service_uuid ip_base = args.ip_base if args.verbose: logging.getLogger().setLevel(logging.DEBUG) # Validate configuration if min_nodes >= max_nodes: logger.error("Minimum nodes must be less than maximum nodes") return if max_nodes < 1: logger.error("Maximum nodes must be at least 1") return if args.setup_help: # Create a temporary manager just to show setup commands temp_manager = DynamicMultiNodeManager( min_nodes, max_nodes, server_url, service_uuid, update_interval, ip_base, change_interval, lifecycle_variance, ) temp_manager.show_setup_commands() return if service_uuid == "REPLACE_ME_WITH_YOUR_SERVER_SERVICE_UUID": logger.error("=" * 60) logger.error("ERROR: TARGET_SERVICE_UUID is not set correctly!") logger.error( "Please set it via --service-uuid argument or TARGET_SERVICE_UUID environment variable." ) logger.error("=" * 60) return logger.info("=" * 60) logger.info("Dynamic Multi-Node Test Client Configuration:") logger.info(f" Node range: {min_nodes} - {max_nodes} nodes") logger.info( f" Change interval: {change_interval}s (Β±{lifecycle_variance}s variance)" ) logger.info(f" Update interval: {update_interval} seconds per node") logger.info(f" Server URL: {server_url}") logger.info(f" Target Service UUID: {service_uuid}") logger.info( f" IP range: 127.0.0.{ip_base} - 127.0.0.{ip_base + max_nodes * 2 - 1}" ) logger.info("=" * 60) # Create and start the dynamic multi-node manager manager = DynamicMultiNodeManager( min_nodes, max_nodes, server_url, service_uuid, update_interval, ip_base, change_interval, lifecycle_variance, ) try: if not manager.start_dynamic_management(): logger.error("Failed to start dynamic node management") return # Main monitoring loop status_interval = 30 # Print status every 30 seconds last_status_time = time.time() logger.info( "🎯 Dynamic node management started! Nodes will fluctuate between {} and {} over time.".format( min_nodes, max_nodes ) ) logger.info("Press Ctrl+C to stop...") while True: time.sleep(5) # Check every 5 seconds current_time = time.time() if current_time - last_status_time >= status_interval: manager.print_status() last_status_time = current_time except KeyboardInterrupt: logger.info("Received interrupt signal, shutting down...") except Exception as e: logger.error(f"Unexpected error in main: {e}", exc_info=True) finally: manager.stop_all_nodes() if __name__ == "__main__": main()