forked from ryyst/kalzu-value-store
improve: add robust retry logic to conflict resolution test
Replace the fixed 20-second wait with intelligent retry logic that: - Checks for convergence every 3 seconds for up to 60 seconds - Provides detailed progress logging showing current state - Reduces sync interval from 8s to 3s for faster testing - Adds 10-second cluster stabilization period This makes the test more reliable and provides better diagnostics when conflict resolution doesn't work as expected. The retry logic reveals that the current conflict resolution mechanism needs investigation, but the test infrastructure itself is now much more robust. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -241,7 +241,7 @@ port: 8111
|
||||
data_dir: "./conflict1_data"
|
||||
seed_nodes: []
|
||||
log_level: "info"
|
||||
sync_interval: 8
|
||||
sync_interval: 3
|
||||
EOF
|
||||
|
||||
cat > conflict2.yaml <<EOF
|
||||
@@ -251,7 +251,7 @@ port: 8112
|
||||
data_dir: "./conflict2_data"
|
||||
seed_nodes: ["127.0.0.1:8111"]
|
||||
log_level: "info"
|
||||
sync_interval: 8
|
||||
sync_interval: 3
|
||||
EOF
|
||||
|
||||
# Start nodes
|
||||
@@ -274,15 +274,39 @@ EOF
|
||||
|
||||
log_info "Initial conflict state: Node1='$node1_initial_msg', Node2='$node2_initial_msg'"
|
||||
|
||||
# Wait for conflict resolution (multiple sync cycles might be needed)
|
||||
sleep 20
|
||||
# Allow time for cluster formation and gossip protocol to stabilize
|
||||
log_info "Waiting for cluster formation and gossip stabilization..."
|
||||
sleep 10
|
||||
|
||||
# Get final data (full StoredValue)
|
||||
local node1_final_full=$(curl -s http://localhost:8111/kv/test/conflict/data)
|
||||
local node2_final_full=$(curl -s http://localhost:8112/kv/test/conflict/data)
|
||||
# Wait for conflict resolution with retry logic (up to 60 seconds)
|
||||
local max_attempts=20
|
||||
local attempt=1
|
||||
local node1_final_msg=""
|
||||
local node2_final_msg=""
|
||||
local node1_final_full=""
|
||||
local node2_final_full=""
|
||||
|
||||
local node1_final_msg=$(echo "$node1_final_full" | jq -r '.data.message' 2>/dev/null)
|
||||
local node2_final_msg=$(echo "$node2_final_full" | jq -r '.data.message' 2>/dev/null)
|
||||
log_info "Waiting for conflict resolution (checking every 3 seconds, max 60 seconds)..."
|
||||
|
||||
while [ $attempt -le $max_attempts ]; do
|
||||
sleep 3
|
||||
|
||||
# Get current data from both nodes
|
||||
node1_final_full=$(curl -s http://localhost:8111/kv/test/conflict/data)
|
||||
node2_final_full=$(curl -s http://localhost:8112/kv/test/conflict/data)
|
||||
|
||||
node1_final_msg=$(echo "$node1_final_full" | jq -r '.data.message' 2>/dev/null)
|
||||
node2_final_msg=$(echo "$node2_final_full" | jq -r '.data.message' 2>/dev/null)
|
||||
|
||||
# Check if they've converged
|
||||
if [ "$node1_final_msg" = "$node2_final_msg" ] && [ -n "$node1_final_msg" ] && [ "$node1_final_msg" != "null" ]; then
|
||||
log_info "Conflict resolution achieved after $((attempt * 3)) seconds"
|
||||
break
|
||||
fi
|
||||
|
||||
log_info "Attempt $attempt/$max_attempts: Node1='$node1_final_msg', Node2='$node2_final_msg' (not converged yet)"
|
||||
attempt=$((attempt + 1))
|
||||
done
|
||||
|
||||
# Check if they converged
|
||||
if [ "$node1_final_msg" = "$node2_final_msg" ] && [ -n "$node1_final_msg" ]; then
|
||||
|
Reference in New Issue
Block a user