improve: add robust retry logic to conflict resolution test

Replace the fixed 20-second wait with intelligent retry logic that:
- Checks for convergence every 3 seconds for up to 60 seconds
- Provides detailed progress logging showing current state
- Reduces sync interval from 8s to 3s for faster testing
- Adds 10-second cluster stabilization period

This makes the test more reliable and provides better diagnostics when
conflict resolution doesn't work as expected. The retry logic reveals
that the current conflict resolution mechanism needs investigation,
but the test infrastructure itself is now much more robust.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-09-20 19:03:15 +03:00
parent bd1d1c2c7c
commit 16c0766a15

View File

@@ -241,7 +241,7 @@ port: 8111
data_dir: "./conflict1_data"
seed_nodes: []
log_level: "info"
sync_interval: 8
sync_interval: 3
EOF
cat > conflict2.yaml <<EOF
@@ -251,7 +251,7 @@ port: 8112
data_dir: "./conflict2_data"
seed_nodes: ["127.0.0.1:8111"]
log_level: "info"
sync_interval: 8
sync_interval: 3
EOF
# Start nodes
@@ -274,15 +274,39 @@ EOF
log_info "Initial conflict state: Node1='$node1_initial_msg', Node2='$node2_initial_msg'"
# Wait for conflict resolution (multiple sync cycles might be needed)
sleep 20
# Allow time for cluster formation and gossip protocol to stabilize
log_info "Waiting for cluster formation and gossip stabilization..."
sleep 10
# Get final data (full StoredValue)
local node1_final_full=$(curl -s http://localhost:8111/kv/test/conflict/data)
local node2_final_full=$(curl -s http://localhost:8112/kv/test/conflict/data)
local node1_final_msg=$(echo "$node1_final_full" | jq -r '.data.message' 2>/dev/null)
local node2_final_msg=$(echo "$node2_final_full" | jq -r '.data.message' 2>/dev/null)
# Wait for conflict resolution with retry logic (up to 60 seconds)
local max_attempts=20
local attempt=1
local node1_final_msg=""
local node2_final_msg=""
local node1_final_full=""
local node2_final_full=""
log_info "Waiting for conflict resolution (checking every 3 seconds, max 60 seconds)..."
while [ $attempt -le $max_attempts ]; do
sleep 3
# Get current data from both nodes
node1_final_full=$(curl -s http://localhost:8111/kv/test/conflict/data)
node2_final_full=$(curl -s http://localhost:8112/kv/test/conflict/data)
node1_final_msg=$(echo "$node1_final_full" | jq -r '.data.message' 2>/dev/null)
node2_final_msg=$(echo "$node2_final_full" | jq -r '.data.message' 2>/dev/null)
# Check if they've converged
if [ "$node1_final_msg" = "$node2_final_msg" ] && [ -n "$node1_final_msg" ] && [ "$node1_final_msg" != "null" ]; then
log_info "Conflict resolution achieved after $((attempt * 3)) seconds"
break
fi
log_info "Attempt $attempt/$max_attempts: Node1='$node1_final_msg', Node2='$node2_final_msg' (not converged yet)"
attempt=$((attempt + 1))
done
# Check if they converged
if [ "$node1_final_msg" = "$node2_final_msg" ] && [ -n "$node1_final_msg" ]; then