Bug fixes in input and onramp. Hot config reload on signals. Added example utility scripts for signals.

This commit is contained in:
Kalzu Rekku
2026-01-17 14:47:13 +02:00
parent 7d7038d6bd
commit aa216981d2
16 changed files with 2339 additions and 306 deletions

173
input/README.md Normal file
View File

@@ -0,0 +1,173 @@
# WebSocket Streamer (JSONL Logger)
This Go program connects to a WebSocket endpoint, subscribes to a topic, and continuously writes incoming messages to hourly-rotated `.jsonl` files.
It is designed for **long-running, low-overhead data capture** with basic observability and operational safety.
Typical use cases include:
* Market data capture (e.g. crypto trades)
* Event stream archiving
* Lightweight ingestion on small servers (VPS, Raspberry Pi, etc.)
---
## Features
* WebSocket client with automatic reconnect
* Topic subscription (configurable)
* Hourly file rotation (`.jsonl` format)
* Buffered channel to decouple network I/O from disk writes
* Atomic message counters
* Periodic status logging
* Unix domain socket for live status queries
* Graceful shutdown on SIGINT / SIGTERM
* Configurable logging (file and/or stdout)
---
## How It Works
1. Connects to a WebSocket endpoint
2. Sends a subscription message for the configured topic
3. Maintains connection with periodic `ping`
4. Reads messages and pushes them into a buffered channel
5. Writes messages line-by-line to hourly JSONL files
6. Exposes runtime status via:
* Periodic log output
* Unix socket query (`--status` mode)
---
## Output Format
Messages are written **verbatim** as received, one JSON object per line:
```
output/
├── publicTrade.BTCUSDT_1700000000.jsonl
├── publicTrade.BTCUSDT_1700003600.jsonl
└── ...
```
Each file contains data for exactly one UTC hour.
---
## Configuration
The application is configured via a JSON file.
### Example `config.json`
```json
{
"output_dir": "./output",
"topic": "publicTrade.BTCUSDT",
"ws_url": "wss://stream.bybit.com/v5/public/linear",
"buffer_size": 10000,
"status_interval": 30,
"log_file": "system.log",
"log_to_stdout": false,
"status_socket": "/tmp/streamer.sock"
}
```
### Configuration Fields
| Field | Description |
| ----------------- | ----------------------------------- |
| `output_dir` | Directory for JSONL output files |
| `topic` | WebSocket subscription topic |
| `ws_url` | WebSocket endpoint URL |
| `buffer_size` | Size of internal message buffer |
| `status_interval` | Seconds between status log messages |
| `log_file` | Log file path |
| `log_to_stdout` | Also log to stdout |
| `status_socket` | Unix socket path for status queries |
Defaults are applied automatically if fields are omitted.
---
## Command Line Flags
| Flag | Description |
| --------- | -------------------------------------------- |
| `-config` | Path to config file (default: `config.json`) |
| `-debug` | Force logs to stdout (overrides config) |
| `-status` | Query running instance status and exit |
---
## Running the Streamer
```bash
go run main.go -config config.json
```
Or build a binary:
```bash
go build -o streamer
./streamer -config config.json
```
---
## Querying Runtime Status
While the streamer is running:
```bash
./streamer -status -config config.json
```
Example output:
```
Uptime: 12m34s | Total Msgs: 152340 | Rate: 7260.12 msg/min
```
This works via a Unix domain socket and does **not** interrupt the running process.
---
## Logging
* Logs are written to `log_file`
* Optional stdout logging for debugging
* Includes:
* Startup information
* Connection errors and reconnects
* Buffer overflow warnings
* Periodic status summaries
---
## Graceful Shutdown
On `SIGINT` or `SIGTERM`:
* WebSocket connection closes
* Status socket is removed
* Current output file is flushed and closed
Safe to run under systemd, Docker, or supervisord.
---
## Dependencies
* Go 1.20+
* [`github.com/gorilla/websocket`](https://github.com/gorilla/websocket)
---
## Notes & Design Choices
* **JSONL** is used for easy streaming, compression, and downstream processing
* Hourly rotation avoids large files and simplifies retention policies
* Unix socket status avoids HTTP overhead and exposed ports
* Minimal memory footprint, suitable for low-end machines

View File

@@ -5,5 +5,7 @@
"buffer_size": 10000,
"log_file": "system.log",
"log_to_stdout": false,
"status_interval": 30
"status_interval": 30,
"gzip_after_hours": 6,
"gzip_check_interval": 3000
}

View File

@@ -2,4 +2,4 @@ module input
go 1.25.0
require github.com/gorilla/websocket v1.5.3 // indirect
require github.com/gorilla/websocket v1.5.3

View File

@@ -2,19 +2,22 @@ package main
import (
"encoding/json"
"compress/gzip"
"flag"
"fmt"
"io"
"log"
"net"
"os"
"sort"
"strconv"
"os/signal"
"path/filepath"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
"github.com/gorilla/websocket"
)
@@ -83,7 +86,7 @@ func main() {
stop := make(chan os.Signal, 1)
signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
<-stop
log.Println("Shutting down gracefully...")
os.Remove(s.config.StatusSocket)
}
@@ -122,7 +125,7 @@ func (s *Streamer) statusLoop() {
}
func (s *Streamer) statusServer() {
os.Remove(s.config.StatusSocket)
os.Remove(s.config.StatusSocket)
l, err := net.Listen("unix", s.config.StatusSocket)
if err != nil {
log.Fatalf("Failed to listen on status socket: %v", err)
@@ -216,7 +219,7 @@ func (s *Streamer) rotate(t time.Time) {
if s.currentFile != nil {
s.currentFile.Close()
}
if err := os.MkdirAll(s.config.OutputDir, 0755); err != nil {
log.Printf("Error creating output dir: %v", err)
return
@@ -225,7 +228,7 @@ func (s *Streamer) rotate(t time.Time) {
s.currentHour = t.Hour()
name := fmt.Sprintf("%s_%d.jsonl", s.config.Topic, t.Truncate(time.Hour).Unix())
filePath := filepath.Join(s.config.OutputDir, name)
f, err := os.OpenFile(filePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Printf("Error opening data file: %v", err)
@@ -233,6 +236,100 @@ func (s *Streamer) rotate(t time.Time) {
return
}
s.currentFile = f
// After rotation, compress old files (keeping current and N-1 as plaintext)
go s.compressOldFiles()
}
func (s *Streamer) compressOldFiles() {
entries, err := os.ReadDir(s.config.OutputDir)
if err != nil {
log.Printf("Gzip scan error: %v", err)
return
}
// Collect all .jsonl files with their timestamps
type fileInfo struct {
path string
timestamp int64
}
var jsonlFiles []fileInfo
for _, e := range entries {
if e.IsDir() {
continue
}
name := e.Name()
if !strings.HasSuffix(name, ".jsonl") {
continue
}
// Extract timestamp from filename: topic_TIMESTAMP.jsonl
parts := strings.Split(name, "_")
if len(parts) < 2 {
continue
}
tsStr := strings.TrimSuffix(parts[len(parts)-1], ".jsonl")
ts, err := strconv.ParseInt(tsStr, 10, 64)
if err != nil {
continue
}
fullPath := filepath.Join(s.config.OutputDir, name)
jsonlFiles = append(jsonlFiles, fileInfo{path: fullPath, timestamp: ts})
}
// Sort by timestamp (newest first)
sort.Slice(jsonlFiles, func(i, j int) bool {
return jsonlFiles[i].timestamp > jsonlFiles[j].timestamp
})
// Keep the 2 newest files (current + N-1) as plaintext, gzip the rest
for i, fi := range jsonlFiles {
if i < 2 {
// Skip the 2 newest files
continue
}
if err := gzipFile(fi.path); err != nil {
log.Printf("Gzip failed for %s: %v", filepath.Base(fi.path), err)
} else {
log.Printf("Compressed %s", filepath.Base(fi.path))
}
}
}
func gzipFile(path string) error {
in, err := os.Open(path)
if err != nil {
return err
}
defer in.Close()
outPath := path + ".gz"
out, err := os.Create(outPath)
if err != nil {
return err
}
gw := gzip.NewWriter(out)
if _, err := io.Copy(gw, in); err != nil {
gw.Close()
out.Close()
return err
}
if err := gw.Close(); err != nil {
out.Close()
return err
}
if err := out.Close(); err != nil {
return err
}
return os.Remove(path)
}
func setupLogger(c *Config, debugFlag bool) {
@@ -273,4 +370,4 @@ func loadConfig(path string) (*Config, error) {
if conf.StatusSocket == "" { conf.StatusSocket = "/tmp/streamer.sock" }
return &conf, nil
}
}