Implemented a comprehensive secure authentication mechanism for inter-node cluster communication with the following features: 1. Global Cluster Secret (GCS) - Auto-generated cryptographically secure random secret (256-bit) - Configurable via YAML config file - Shared across all cluster nodes for authentication 2. Cluster Authentication Middleware - Validates X-Cluster-Secret and X-Node-ID headers - Applied to all cluster endpoints (/members/*, /merkle_tree/*, /kv_range) - Comprehensive logging of authentication attempts 3. Authenticated HTTP Client - Custom HTTP client with cluster auth headers - TLS support with configurable certificate verification - Protocol-aware (http/https based on TLS settings) 4. Secure Bootstrap Endpoint - New /auth/cluster-bootstrap endpoint - Protected by JWT authentication with admin scope - Allows new nodes to securely obtain cluster secret 5. Updated Cluster Communication - All gossip protocol requests include auth headers - All Merkle tree sync requests include auth headers - All data replication requests include auth headers 6. Configuration - cluster_secret: Shared secret (auto-generated if not provided) - cluster_tls_enabled: Enable TLS for inter-node communication - cluster_tls_cert_file: Path to TLS certificate - cluster_tls_key_file: Path to TLS private key - cluster_tls_skip_verify: Skip TLS verification (testing only) This implementation addresses the security vulnerability of unprotected cluster endpoints and provides a flexible, secure approach to protecting internal cluster communication while allowing for automated node bootstrapping. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
336 lines
9.7 KiB
Go
336 lines
9.7 KiB
Go
package server
|
||
|
||
import (
|
||
"context"
|
||
"encoding/json"
|
||
"fmt"
|
||
"net/http"
|
||
"os"
|
||
"path/filepath"
|
||
"strings"
|
||
"sync"
|
||
"time"
|
||
|
||
"github.com/dgraph-io/badger/v4"
|
||
"github.com/robfig/cron/v3"
|
||
"github.com/sirupsen/logrus"
|
||
|
||
"kvs/auth"
|
||
"kvs/cluster"
|
||
"kvs/storage"
|
||
"kvs/types"
|
||
"kvs/utils"
|
||
)
|
||
|
||
// Server represents the KVS node
|
||
type Server struct {
|
||
config *types.Config
|
||
db *badger.DB
|
||
mode string // "normal", "read-only", "syncing"
|
||
modeMu sync.RWMutex
|
||
logger *logrus.Logger
|
||
httpServer *http.Server
|
||
ctx context.Context
|
||
cancel context.CancelFunc
|
||
wg sync.WaitGroup
|
||
|
||
// Cluster services
|
||
gossipService *cluster.GossipService
|
||
syncService *cluster.SyncService
|
||
merkleService *cluster.MerkleService
|
||
bootstrapService *cluster.BootstrapService
|
||
|
||
// Storage services
|
||
storageService *storage.StorageService
|
||
revisionService *storage.RevisionService
|
||
|
||
// Backup system
|
||
cronScheduler *cron.Cron // Cron scheduler for backups
|
||
backupStatus types.BackupStatus // Current backup status
|
||
backupMu sync.RWMutex // Protects backup status
|
||
|
||
// Authentication service
|
||
authService *auth.AuthService
|
||
clusterAuthService *auth.ClusterAuthService
|
||
}
|
||
|
||
// NewServer initializes and returns a new Server instance
|
||
func NewServer(config *types.Config) (*Server, error) {
|
||
logger := logrus.New()
|
||
logger.SetFormatter(&logrus.JSONFormatter{})
|
||
|
||
level, err := logrus.ParseLevel(config.LogLevel)
|
||
if err != nil {
|
||
level = logrus.InfoLevel
|
||
}
|
||
logger.SetLevel(level)
|
||
|
||
// Create data directory
|
||
if err := os.MkdirAll(config.DataDir, 0755); err != nil {
|
||
return nil, fmt.Errorf("failed to create data directory: %v", err)
|
||
}
|
||
|
||
// Open BadgerDB
|
||
opts := badger.DefaultOptions(filepath.Join(config.DataDir, "badger"))
|
||
opts.Logger = nil // Disable badger's internal logging
|
||
db, err := badger.Open(opts)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to open BadgerDB: %v", err)
|
||
}
|
||
|
||
ctx, cancel := context.WithCancel(context.Background())
|
||
|
||
// Initialize cluster services
|
||
merkleService := cluster.NewMerkleService(db, logger)
|
||
gossipService := cluster.NewGossipService(config, logger)
|
||
syncService := cluster.NewSyncService(db, config, gossipService, merkleService, logger)
|
||
var server *Server // Forward declaration
|
||
bootstrapService := cluster.NewBootstrapService(config, gossipService, syncService, logger, func(mode string) {
|
||
if server != nil {
|
||
server.setMode(mode)
|
||
}
|
||
})
|
||
|
||
server = &Server{
|
||
config: config,
|
||
db: db,
|
||
mode: "normal",
|
||
logger: logger,
|
||
ctx: ctx,
|
||
cancel: cancel,
|
||
gossipService: gossipService,
|
||
syncService: syncService,
|
||
merkleService: merkleService,
|
||
bootstrapService: bootstrapService,
|
||
}
|
||
|
||
if config.ReadOnly {
|
||
server.setMode("read-only")
|
||
}
|
||
|
||
// Initialize storage services
|
||
storageService, err := storage.NewStorageService(db, config, logger)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to initialize storage service: %v", err)
|
||
}
|
||
server.storageService = storageService
|
||
|
||
// Initialize revision service
|
||
server.revisionService = storage.NewRevisionService(storageService)
|
||
|
||
// Initialize authentication service
|
||
server.authService = auth.NewAuthService(db, logger, config)
|
||
|
||
// Initialize cluster authentication service (Issue #13)
|
||
if config.ClusteringEnabled {
|
||
server.clusterAuthService = auth.NewClusterAuthService(config.ClusterSecret, logger)
|
||
}
|
||
|
||
// Setup initial root account if needed (Issue #3)
|
||
if config.AuthEnabled {
|
||
if err := server.setupRootAccount(); err != nil {
|
||
return nil, fmt.Errorf("failed to setup root account: %v", err)
|
||
}
|
||
}
|
||
|
||
// Initialize Merkle tree using cluster service
|
||
if err := server.syncService.InitializeMerkleTree(); err != nil {
|
||
return nil, fmt.Errorf("failed to initialize Merkle tree: %v", err)
|
||
}
|
||
|
||
return server, nil
|
||
}
|
||
|
||
// getMode returns the current server mode
|
||
func (s *Server) getMode() string {
|
||
s.modeMu.RLock()
|
||
defer s.modeMu.RUnlock()
|
||
return s.mode
|
||
}
|
||
|
||
// setMode sets the server mode
|
||
func (s *Server) setMode(mode string) {
|
||
s.modeMu.Lock()
|
||
defer s.modeMu.Unlock()
|
||
oldMode := s.mode
|
||
s.mode = mode
|
||
s.logger.WithFields(logrus.Fields{
|
||
"old_mode": oldMode,
|
||
"new_mode": mode,
|
||
}).Info("Mode changed")
|
||
}
|
||
|
||
// addMember adds a member using cluster service
|
||
func (s *Server) addMember(member *types.Member) {
|
||
s.gossipService.AddMember(member)
|
||
}
|
||
|
||
// removeMember removes a member using cluster service
|
||
func (s *Server) removeMember(nodeID string) {
|
||
s.gossipService.RemoveMember(nodeID)
|
||
}
|
||
|
||
// getMembers returns all cluster members
|
||
func (s *Server) getMembers() []*types.Member {
|
||
return s.gossipService.GetMembers()
|
||
}
|
||
|
||
// getJoinedTimestamp returns this node's joined timestamp (startup time)
|
||
func (s *Server) getJoinedTimestamp() int64 {
|
||
// For now, use a simple approach - this should be stored persistently
|
||
return time.Now().UnixMilli()
|
||
}
|
||
|
||
// getBackupStatus returns the current backup status
|
||
func (s *Server) getBackupStatus() types.BackupStatus {
|
||
s.backupMu.RLock()
|
||
defer s.backupMu.RUnlock()
|
||
|
||
status := s.backupStatus
|
||
|
||
// Calculate next backup time if scheduler is running
|
||
if s.cronScheduler != nil && len(s.cronScheduler.Entries()) > 0 {
|
||
nextRun := s.cronScheduler.Entries()[0].Next
|
||
if !nextRun.IsZero() {
|
||
status.NextBackupTime = nextRun.Unix()
|
||
}
|
||
}
|
||
|
||
return status
|
||
}
|
||
|
||
// setupRootAccount creates an initial root account if no users exist and no seed nodes are configured
|
||
func (s *Server) setupRootAccount() error {
|
||
// Only create root account if:
|
||
// 1. No users exist in the database
|
||
// 2. No seed nodes are configured (standalone mode)
|
||
hasUsers, err := s.authService.HasUsers()
|
||
if err != nil {
|
||
return fmt.Errorf("failed to check if users exist: %v", err)
|
||
}
|
||
|
||
// If users already exist or we have seed nodes, no need to create root account
|
||
if hasUsers || len(s.config.SeedNodes) > 0 {
|
||
return nil
|
||
}
|
||
|
||
s.logger.Info("Creating initial root account for empty database with no seed nodes")
|
||
|
||
// Import required packages for user creation
|
||
// Note: We need these imports at the top of the file
|
||
return s.createRootUserAndToken()
|
||
}
|
||
|
||
// createRootUserAndToken creates the root user, admin group, and initial token
|
||
func (s *Server) createRootUserAndToken() error {
|
||
rootNickname := "root"
|
||
adminGroupName := "admin"
|
||
|
||
// Generate UUIDs
|
||
rootUserUUID := "root-" + time.Now().Format("20060102-150405")
|
||
adminGroupUUID := "admin-" + time.Now().Format("20060102-150405")
|
||
now := time.Now().Unix()
|
||
|
||
// Create admin group
|
||
adminGroup := types.Group{
|
||
UUID: adminGroupUUID,
|
||
NameHash: hashGroupName(adminGroupName),
|
||
Members: []string{rootUserUUID},
|
||
CreatedAt: now,
|
||
UpdatedAt: now,
|
||
}
|
||
|
||
// Create root user
|
||
rootUser := types.User{
|
||
UUID: rootUserUUID,
|
||
NicknameHash: hashUserNickname(rootNickname),
|
||
Groups: []string{adminGroupUUID},
|
||
CreatedAt: now,
|
||
UpdatedAt: now,
|
||
}
|
||
|
||
// Store group and user in database
|
||
if err := s.storeUserAndGroup(&rootUser, &adminGroup); err != nil {
|
||
return fmt.Errorf("failed to store root user and admin group: %v", err)
|
||
}
|
||
|
||
// Create API token with full administrative scopes
|
||
adminScopes := []string{
|
||
"admin:users:create", "admin:users:read", "admin:users:update", "admin:users:delete",
|
||
"admin:groups:create", "admin:groups:read", "admin:groups:update", "admin:groups:delete",
|
||
"admin:tokens:create", "admin:tokens:revoke",
|
||
"read", "write", "delete",
|
||
}
|
||
|
||
// Generate token with 24 hour expiration for initial setup
|
||
tokenString, expiresAt, err := auth.GenerateJWT(rootUserUUID, adminScopes, 24)
|
||
if err != nil {
|
||
return fmt.Errorf("failed to generate root token: %v", err)
|
||
}
|
||
|
||
// Store token in database
|
||
if err := s.storeAPIToken(tokenString, rootUserUUID, adminScopes, expiresAt); err != nil {
|
||
return fmt.Errorf("failed to store root token: %v", err)
|
||
}
|
||
|
||
// Log the token securely (one-time display)
|
||
s.logger.WithFields(logrus.Fields{
|
||
"user_uuid": rootUserUUID,
|
||
"group_uuid": adminGroupUUID,
|
||
"expires_at": time.Unix(expiresAt, 0).Format(time.RFC3339),
|
||
"expires_in": "24 hours",
|
||
}).Warn("Root account created - SAVE THIS TOKEN:")
|
||
|
||
// Display token prominently
|
||
fmt.Printf("\n" + strings.Repeat("=", 80) + "\n")
|
||
fmt.Printf("🔐 ROOT ACCOUNT CREATED - INITIAL SETUP TOKEN\n")
|
||
fmt.Printf("===========================================\n")
|
||
fmt.Printf("User UUID: %s\n", rootUserUUID)
|
||
fmt.Printf("Group UUID: %s\n", adminGroupUUID)
|
||
fmt.Printf("Token: %s\n", tokenString)
|
||
fmt.Printf("Expires: %s (24 hours)\n", time.Unix(expiresAt, 0).Format(time.RFC3339))
|
||
fmt.Printf("\n⚠️ IMPORTANT: Save this token immediately!\n")
|
||
fmt.Printf(" This is the only time it will be displayed.\n")
|
||
fmt.Printf(" Use this token to authenticate and create additional users.\n")
|
||
fmt.Printf(strings.Repeat("=", 80) + "\n\n")
|
||
|
||
return nil
|
||
}
|
||
|
||
// hashUserNickname creates a hash of the user nickname (similar to handlers.go)
|
||
func hashUserNickname(nickname string) string {
|
||
return utils.HashSHA3512(nickname)
|
||
}
|
||
|
||
// hashGroupName creates a hash of the group name (similar to handlers.go)
|
||
func hashGroupName(groupname string) string {
|
||
return utils.HashSHA3512(groupname)
|
||
}
|
||
|
||
// storeUserAndGroup stores both user and group in the database
|
||
func (s *Server) storeUserAndGroup(user *types.User, group *types.Group) error {
|
||
return s.db.Update(func(txn *badger.Txn) error {
|
||
// Store user
|
||
userData, err := json.Marshal(user)
|
||
if err != nil {
|
||
return fmt.Errorf("failed to marshal user data: %v", err)
|
||
}
|
||
|
||
if err := txn.Set([]byte(auth.UserStorageKey(user.UUID)), userData); err != nil {
|
||
return fmt.Errorf("failed to store user: %v", err)
|
||
}
|
||
|
||
// Store group
|
||
groupData, err := json.Marshal(group)
|
||
if err != nil {
|
||
return fmt.Errorf("failed to marshal group data: %v", err)
|
||
}
|
||
|
||
if err := txn.Set([]byte(auth.GroupStorageKey(group.UUID)), groupData); err != nil {
|
||
return fmt.Errorf("failed to store group: %v", err)
|
||
}
|
||
|
||
return nil
|
||
})
|
||
}
|