package main import ( "bytes" "context" "encoding/json" "flag" "fmt" "io" "log" "net" "net/http" "os" "os/exec" "os/signal" "regexp" "strconv" "strings" "sync" "syscall" "time" "github.com/go-ping/ping" "gopkg.in/yaml.v3" ) const VERSION = "1.0.0" type Config struct { InputFile string `yaml:"input_file"` OutputFile string `yaml:"output_file"` IntervalSeconds int `yaml:"interval_seconds"` CooldownMinutes int `yaml:"cooldown_minutes"` EnableTraceroute bool `yaml:"enable_traceroute"` TracerouteMaxHops int `yaml:"traceroute_max_hops"` HealthCheckPort int `yaml:"health_check_port"` } type PingResult struct { IP string `json:"ip"` Sent int `json:"sent"` Received int `json:"received"` PacketLoss float64 `json:"packet_loss"` AvgRtt time.Duration `json:"avg_rtt"` Timestamp time.Time `json:"timestamp"` Error string `json:"error,omitempty"` Traceroute *TracerouteResult `json:"traceroute,omitempty"` } type TracerouteResult struct { Method string `json:"method"` Hops []TracerouteHop `json:"hops"` Completed bool `json:"completed"` Error string `json:"error,omitempty"` } type TracerouteHop struct { TTL int `json:"ttl"` IP string `json:"ip"` Rtt time.Duration `json:"rtt,omitempty"` Timeout bool `json:"timeout,omitempty"` } type HealthStatus struct { Status string `json:"status"` Version string `json:"version"` Uptime string `json:"uptime"` LastRun time.Time `json:"last_run"` TotalPings int64 `json:"total_pings"` SuccessfulPings int64 `json:"successful_pings"` FailedPings int64 `json:"failed_pings"` } var ( cooldownCache = make(map[string]time.Time) cacheMux sync.Mutex verbose bool startTime time.Time health HealthStatus healthMux sync.RWMutex ) func main() { // CLI flags configPath := flag.String("config", "config.yaml", "Path to config file") verboseFlag := flag.Bool("v", false, "Enable verbose logging") flag.BoolVar(verboseFlag, "verbose", false, "Enable verbose logging") versionFlag := flag.Bool("version", false, "Show version") help := flag.Bool("help", false, "Show help message") flag.Parse() if *versionFlag { fmt.Printf("ping-service version %s\n", VERSION) os.Exit(0) } if *help { fmt.Println("Ping Service - Monitor network endpoints via ping and traceroute") fmt.Printf("Version: %s\n\n", VERSION) fmt.Println("Usage:") flag.PrintDefaults() fmt.Println("\nConfig file format (YAML):") fmt.Println(" input_file: Path/URL to file containing IPs (one per line)") fmt.Println(" output_file: Path/URL/socket for JSON results") fmt.Println(" interval_seconds: How often to check for new IPs") fmt.Println(" cooldown_minutes: Minimum time between pings for same IP") fmt.Println(" enable_traceroute: Enable traceroute after successful ping") fmt.Println(" traceroute_max_hops: Maximum TTL for traceroute (default: 30)") fmt.Println(" health_check_port: HTTP port for health checks (default: 8090)") os.Exit(0) } verbose = *verboseFlag startTime = time.Now() // Initialize health status health = HealthStatus{ Status: "starting", Version: VERSION, } config := loadConfig(*configPath) if config.CooldownMinutes == 0 { config.CooldownMinutes = 10 } if config.TracerouteMaxHops == 0 { config.TracerouteMaxHops = 30 } if config.HealthCheckPort == 0 { config.HealthCheckPort = 8090 } // Start health check server go startHealthCheckServer(config.HealthCheckPort) // Setup graceful shutdown ctx, cancel := context.WithCancel(context.Background()) defer cancel() sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) go func() { sig := <-sigChan log.Printf("Received signal %v, shutting down gracefully...", sig) updateHealth("shutting_down") cancel() }() ticker := time.NewTicker(time.Duration(config.IntervalSeconds) * time.Second) defer ticker.Stop() log.Printf("App started. Version: %s", VERSION) log.Printf("Cooldown set to %d mins. Polling every %ds...", config.CooldownMinutes, config.IntervalSeconds) if config.EnableTraceroute { log.Printf("Traceroute enabled (max %d hops)", config.TracerouteMaxHops) } log.Printf("Health check available at http://localhost:%d/health", config.HealthCheckPort) updateHealth("running") // Main loop for { select { case <-ctx.Done(): log.Println("Shutdown complete") return case <-ticker.C: process(config) } } } func startHealthCheckServer(port int) { http.HandleFunc("/health", healthCheckHandler) http.HandleFunc("/ready", readinessHandler) http.HandleFunc("/metrics", metricsHandler) addr := fmt.Sprintf(":%d", port) log.Printf("Starting health check server on %s", addr) server := &http.Server{ Addr: addr, ReadTimeout: 5 * time.Second, WriteTimeout: 5 * time.Second, } if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed { log.Printf("Health check server error: %v", err) } } func healthCheckHandler(w http.ResponseWriter, r *http.Request) { healthMux.RLock() defer healthMux.RUnlock() health.Uptime = time.Since(startTime).String() w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(health) } func readinessHandler(w http.ResponseWriter, r *http.Request) { healthMux.RLock() defer healthMux.RUnlock() if health.Status == "running" { w.WriteHeader(http.StatusOK) w.Write([]byte(`{"ready": true}`)) } else { w.WriteHeader(http.StatusServiceUnavailable) w.Write([]byte(`{"ready": false}`)) } } func metricsHandler(w http.ResponseWriter, r *http.Request) { healthMux.RLock() defer healthMux.RUnlock() // Prometheus-style metrics w.Header().Set("Content-Type", "text/plain") fmt.Fprintf(w, "# HELP ping_service_total_pings Total number of pings\n") fmt.Fprintf(w, "# TYPE ping_service_total_pings counter\n") fmt.Fprintf(w, "ping_service_total_pings %d\n", health.TotalPings) fmt.Fprintf(w, "# HELP ping_service_successful_pings Successful pings\n") fmt.Fprintf(w, "# TYPE ping_service_successful_pings counter\n") fmt.Fprintf(w, "ping_service_successful_pings %d\n", health.SuccessfulPings) fmt.Fprintf(w, "# HELP ping_service_failed_pings Failed pings\n") fmt.Fprintf(w, "# TYPE ping_service_failed_pings counter\n") fmt.Fprintf(w, "ping_service_failed_pings %d\n", health.FailedPings) fmt.Fprintf(w, "# HELP ping_service_uptime_seconds Uptime in seconds\n") fmt.Fprintf(w, "# TYPE ping_service_uptime_seconds gauge\n") fmt.Fprintf(w, "ping_service_uptime_seconds %.0f\n", time.Since(startTime).Seconds()) } func updateHealth(status string) { healthMux.Lock() defer healthMux.Unlock() health.Status = status } func updateStats(lastRun time.Time, successful, failed int) { healthMux.Lock() defer healthMux.Unlock() health.LastRun = lastRun health.TotalPings += int64(successful + failed) health.SuccessfulPings += int64(successful) health.FailedPings += int64(failed) } func loadConfig(path string) *Config { f, err := os.ReadFile(path) if err != nil { log.Fatalf("Error reading config: %v", err) } var cfg Config err = yaml.Unmarshal(f, &cfg) if err != nil { log.Fatalf("Error parsing config: %v", err) } return &cfg } func process(cfg *Config) { startTime := time.Now() // 1. Read IPs data, err := readSource(cfg.InputFile) if err != nil { log.Printf("Input Error: %v", err) return } rawIps := strings.Fields(string(data)) if verbose { log.Printf("Read %d IPs from %s", len(rawIps), cfg.InputFile) } // Filter IPs based on cooldown var ipsToPing []string cacheMux.Lock() for _, ip := range rawIps { if last, ok := cooldownCache[ip]; ok { if time.Since(last) < time.Duration(cfg.CooldownMinutes)*time.Minute { if verbose { log.Printf("Skipping %s (cooldown: %v remaining)", ip, time.Duration(cfg.CooldownMinutes)*time.Minute-time.Since(last)) } continue } } ipsToPing = append(ipsToPing, ip) cooldownCache[ip] = time.Now() } cacheMux.Unlock() if len(ipsToPing) == 0 { if verbose { log.Println("No IPs to ping (all in cooldown)") } return } if verbose { log.Printf("Pinging %d IPs: %v", len(ipsToPing), ipsToPing) } // 2. Perform Pings var wg sync.WaitGroup results := make([]PingResult, len(ipsToPing)) successful := 0 failed := 0 for i, ip := range ipsToPing { wg.Add(1) go func(idx int, targetIP string) { defer wg.Done() results[idx] = runPing(targetIP) if results[idx].Error == "" && results[idx].Received > 0 { successful++ } else { failed++ } // If ping successful and traceroute enabled, do traceroute if cfg.EnableTraceroute && results[idx].Error == "" && results[idx].Received > 0 { if verbose { log.Printf("Running traceroute to %s...", targetIP) } results[idx].Traceroute = runTraceroute(targetIP, cfg.TracerouteMaxHops) } if verbose { if results[idx].Error != "" { log.Printf("Pinged %s: ERROR - %s", results[idx].IP, results[idx].Error) } else { log.Printf("Pinged %s: %d/%d packets, %.2f%% loss, avg RTT: %v", results[idx].IP, results[idx].Received, results[idx].Sent, results[idx].PacketLoss, results[idx].AvgRtt) if results[idx].Traceroute != nil { log.Printf(" Traceroute: %d hops via %s", len(results[idx].Traceroute.Hops), results[idx].Traceroute.Method) } } } }(i, ip) } wg.Wait() // Update stats updateStats(time.Now(), successful, failed) // 3. Write Output outputData, _ := json.MarshalIndent(results, "", " ") err = writeDestination(cfg.OutputFile, outputData) if err != nil { log.Printf("Output Error: %v", err) } else if verbose { log.Printf("Wrote results to %s (took %v)", cfg.OutputFile, time.Since(startTime)) } } func handleSocket(path string, data []byte, mode string) ([]byte, error) { if _, err := os.Stat(path); err == nil { conn, err := net.DialTimeout("unix", path, 2*time.Second) if err != nil { os.Remove(path) return handleSocket(path, data, mode) } defer conn.Close() if mode == "read" { return io.ReadAll(conn) } else { _, err = conn.Write(data) return nil, err } } else { l, err := net.Listen("unix", path) if err != nil { return nil, err } defer l.Close() defer os.Remove(path) l.(*net.UnixListener).SetDeadline(time.Now().Add(10 * time.Second)) conn, err := l.Accept() if err != nil { return nil, fmt.Errorf("timeout waiting for socket connection: %v", err) } defer conn.Close() if mode == "read" { return io.ReadAll(conn) } else { _, err = conn.Write(data) return nil, err } } } func readSource(src string) ([]byte, error) { if strings.HasPrefix(src, "http") { resp, err := http.Get(src) if err != nil { return nil, err } defer resp.Body.Close() return io.ReadAll(resp.Body) } if strings.HasSuffix(src, ".sock") { return handleSocket(src, nil, "read") } return os.ReadFile(src) } func writeDestination(dest string, data []byte) error { if strings.HasPrefix(dest, "http") { resp, err := http.Post(dest, "application/json", bytes.NewBuffer(data)) if err != nil { return err } defer resp.Body.Close() return nil } if strings.HasSuffix(dest, ".sock") { _, err := handleSocket(dest, data, "write") return err } return os.WriteFile(dest, data, 0644) } func runPing(ip string) PingResult { pinger, err := ping.NewPinger(ip) res := PingResult{IP: ip, Timestamp: time.Now()} if err != nil { res.Error = err.Error() return res } pinger.Count = 3 pinger.Timeout = time.Second * 5 err = pinger.Run() if err != nil { res.Error = err.Error() return res } stats := pinger.Statistics() res.Sent = stats.PacketsSent res.Received = stats.PacketsRecv res.PacketLoss = stats.PacketLoss res.AvgRtt = stats.AvgRtt return res } func runTraceroute(ip string, maxHops int) *TracerouteResult { result := &TracerouteResult{ Method: "icmp", Hops: []TracerouteHop{}, } if tr := tryICMPTraceroute(ip, maxHops); tr != nil { return tr } if verbose { log.Printf("ICMP traceroute failed for %s, trying TCP...", ip) } for _, port := range []int{80, 443, 22} { if tr := tryTCPTraceroute(ip, port, maxHops); tr != nil { tr.Method = fmt.Sprintf("tcp/%d", port) return tr } } result.Error = "traceroute failed (both ICMP and TCP)" return result } func tryICMPTraceroute(ip string, maxHops int) *TracerouteResult { var cmd *exec.Cmd switch { case fileExists("/usr/bin/traceroute"): cmd = exec.Command("traceroute", "-m", strconv.Itoa(maxHops), "-n", "-q", "1", ip) case fileExists("/usr/sbin/traceroute"): cmd = exec.Command("traceroute", "-m", strconv.Itoa(maxHops), "-n", "-q", "1", ip) case fileExists("/bin/traceroute"): cmd = exec.Command("traceroute", "-m", strconv.Itoa(maxHops), "-n", "-q", "1", ip) default: cmd = exec.Command("traceroute", "-m", strconv.Itoa(maxHops), "-n", "-q", "1", ip) } output, err := cmd.CombinedOutput() if err != nil { return nil } return parseTracerouteOutput(string(output), "icmp") } func tryTCPTraceroute(ip string, port int, maxHops int) *TracerouteResult { var cmd *exec.Cmd if fileExists("/usr/bin/tcptraceroute") || fileExists("/usr/sbin/tcptraceroute") { cmd = exec.Command("tcptraceroute", "-m", strconv.Itoa(maxHops), "-n", "-q", "1", ip, strconv.Itoa(port)) } else { cmd = exec.Command("traceroute", "-T", "-p", strconv.Itoa(port), "-m", strconv.Itoa(maxHops), "-n", "-q", "1", ip) } output, err := cmd.CombinedOutput() if err != nil { return nil } return parseTracerouteOutput(string(output), fmt.Sprintf("tcp/%d", port)) } func parseTracerouteOutput(output string, method string) *TracerouteResult { result := &TracerouteResult{ Method: method, Hops: []TracerouteHop{}, Completed: false, } lines := strings.Split(output, "\n") hopRegex := regexp.MustCompile(`^\s*(\d+)\s+([0-9.]+)\s+([0-9.]+)\s*ms`) timeoutRegex := regexp.MustCompile(`^\s*(\d+)\s+\*`) for _, line := range lines { if match := hopRegex.FindStringSubmatch(line); match != nil { ttl, _ := strconv.Atoi(match[1]) hopIP := match[2] rttMs, _ := strconv.ParseFloat(match[3], 64) rtt := time.Duration(rttMs * float64(time.Millisecond)) result.Hops = append(result.Hops, TracerouteHop{ TTL: ttl, IP: hopIP, Rtt: rtt, }) } else if match := timeoutRegex.FindStringSubmatch(line); match != nil { ttl, _ := strconv.Atoi(match[1]) result.Hops = append(result.Hops, TracerouteHop{ TTL: ttl, Timeout: true, }) } } if len(result.Hops) > 0 { result.Completed = true } return result } func fileExists(path string) bool { _, err := os.Stat(path) return err == nil }