improve: add robust retry logic to conflict resolution test

Replace the fixed 20-second wait with intelligent retry logic that:
- Checks for convergence every 3 seconds for up to 60 seconds
- Provides detailed progress logging showing current state
- Reduces sync interval from 8s to 3s for faster testing
- Adds 10-second cluster stabilization period

This makes the test more reliable and provides better diagnostics when
conflict resolution doesn't work as expected. The retry logic reveals
that the current conflict resolution mechanism needs investigation,
but the test infrastructure itself is now much more robust.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-09-20 19:03:15 +03:00
parent bd1d1c2c7c
commit 16c0766a15

View File

@@ -241,7 +241,7 @@ port: 8111
data_dir: "./conflict1_data" data_dir: "./conflict1_data"
seed_nodes: [] seed_nodes: []
log_level: "info" log_level: "info"
sync_interval: 8 sync_interval: 3
EOF EOF
cat > conflict2.yaml <<EOF cat > conflict2.yaml <<EOF
@@ -251,7 +251,7 @@ port: 8112
data_dir: "./conflict2_data" data_dir: "./conflict2_data"
seed_nodes: ["127.0.0.1:8111"] seed_nodes: ["127.0.0.1:8111"]
log_level: "info" log_level: "info"
sync_interval: 8 sync_interval: 3
EOF EOF
# Start nodes # Start nodes
@@ -274,15 +274,39 @@ EOF
log_info "Initial conflict state: Node1='$node1_initial_msg', Node2='$node2_initial_msg'" log_info "Initial conflict state: Node1='$node1_initial_msg', Node2='$node2_initial_msg'"
# Wait for conflict resolution (multiple sync cycles might be needed) # Allow time for cluster formation and gossip protocol to stabilize
sleep 20 log_info "Waiting for cluster formation and gossip stabilization..."
sleep 10
# Get final data (full StoredValue) # Wait for conflict resolution with retry logic (up to 60 seconds)
local node1_final_full=$(curl -s http://localhost:8111/kv/test/conflict/data) local max_attempts=20
local node2_final_full=$(curl -s http://localhost:8112/kv/test/conflict/data) local attempt=1
local node1_final_msg=""
local node2_final_msg=""
local node1_final_full=""
local node2_final_full=""
local node1_final_msg=$(echo "$node1_final_full" | jq -r '.data.message' 2>/dev/null) log_info "Waiting for conflict resolution (checking every 3 seconds, max 60 seconds)..."
local node2_final_msg=$(echo "$node2_final_full" | jq -r '.data.message' 2>/dev/null)
while [ $attempt -le $max_attempts ]; do
sleep 3
# Get current data from both nodes
node1_final_full=$(curl -s http://localhost:8111/kv/test/conflict/data)
node2_final_full=$(curl -s http://localhost:8112/kv/test/conflict/data)
node1_final_msg=$(echo "$node1_final_full" | jq -r '.data.message' 2>/dev/null)
node2_final_msg=$(echo "$node2_final_full" | jq -r '.data.message' 2>/dev/null)
# Check if they've converged
if [ "$node1_final_msg" = "$node2_final_msg" ] && [ -n "$node1_final_msg" ] && [ "$node1_final_msg" != "null" ]; then
log_info "Conflict resolution achieved after $((attempt * 3)) seconds"
break
fi
log_info "Attempt $attempt/$max_attempts: Node1='$node1_final_msg', Node2='$node2_final_msg' (not converged yet)"
attempt=$((attempt + 1))
done
# Check if they converged # Check if they converged
if [ "$node1_final_msg" = "$node2_final_msg" ] && [ -n "$node1_final_msg" ]; then if [ "$node1_final_msg" = "$node2_final_msg" ] && [ -n "$node1_final_msg" ]; then