@@ -526,9 +526,16 @@ Enable diagnostic logging during vector graph build progress (heap/off-heap memo
526526 "Example: localhost:2434:2480:10,192.168.0.1:2434:2480:0" ,
527527 String .class , "" ),
528528
529+ HA_SERVER_ROLE ("arcadedb.ha.serverRole" , SCOPE .SERVER ,
530+ "Enforces a role in a cluster. 'any' (default) means this node can be elected leader. "
531+ + "'replica' sets the Raft peer priority to 0 so the node is never elected leader "
532+ + "(useful for read-scale or witness deployments)." ,
533+ String .class , "any" , Set .of ("any" , "replica" )),
534+
529535 HA_QUORUM ("arcadedb.ha.quorum" , SCOPE .SERVER ,
530- "Default quorum between 'none', one, two, three, 'majority' and 'all' servers. Default is majority" , String .class , "majority" ,
531- Set .of ("none" , "one" , "two" , "three" , "majority" , "all" )),
536+ "Write quorum: 'majority' (standard Raft, default) or 'all' (every configured peer must acknowledge). "
537+ + "Legacy values 'none', 'one', 'two', 'three' are no longer supported." ,
538+ String .class , "majority" , Set .of ("majority" , "all" )),
532539
533540 HA_QUORUM_TIMEOUT ("arcadedb.ha.quorumTimeout" , SCOPE .SERVER , "Timeout waiting for the quorum" , Long .class , 10000 ),
534541
@@ -544,6 +551,21 @@ Enable diagnostic logging during vector graph build progress (heap/off-heap memo
544551 HA_APPEND_BUFFER_SIZE ("arcadedb.ha.appendBufferSize" , SCOPE .SERVER ,
545552 "AppendEntries batch byte limit for replication (e.g. '4MB')" , String .class , "4MB" ),
546553
554+ HA_WRITE_BUFFER_SIZE ("arcadedb.ha.writeBufferSize" , SCOPE .SERVER ,
555+ "Raft log write buffer size (e.g. '8MB'). Must be at least appendBufferSize + 8 bytes, "
556+ + "otherwise the server fails to start with ConfigurationException." ,
557+ String .class , "8MB" ),
558+
559+ HA_LOG_PURGE_GAP ("arcadedb.ha.logPurgeGap" , SCOPE .SERVER ,
560+ "Number of Raft log entries retained after a snapshot as a buffer for slightly lagging followers. "
561+ + "Lower values free disk faster but raise the chance a slow follower needs a full snapshot resync." ,
562+ Integer .class , 1024 ),
563+
564+ HA_LOG_PURGE_UPTO_SNAPSHOT ("arcadedb.ha.logPurgeUptoSnapshot" , SCOPE .SERVER ,
565+ "When true (default), deletes old Raft log segments after each snapshot to bound disk growth. "
566+ + "Set to false to retain full log history for debugging/auditing." ,
567+ Boolean .class , true ),
568+
547569 HA_REPLICATION_CHUNK_MAXSIZE ("arcadedb.ha.replicationChunkMaxSize" , SCOPE .SERVER ,
548570 "Maximum channel chunk size for replicating messages between servers. Default is 16777216" , Integer .class , 16384 * 1024 ),
549571
@@ -574,19 +596,30 @@ Enable diagnostic logging during vector graph build progress (heap/off-heap memo
574596 "set to true for durable deployments." ,
575597 Boolean .class , false ),
576598
577- HA_RAFT_SNAPSHOT_THRESHOLD ("arcadedb.ha.raftSnapshotThreshold " , SCOPE .SERVER ,
578- "Number of Raft log entries after which the leader automatically takes a snapshot. " +
579- "Lower values cause more frequent snapshots and earlier log compaction." ,
580- Long .class , 10000L ),
599+ HA_SNAPSHOT_THRESHOLD ("arcadedb.ha.snapshotThreshold " , SCOPE .SERVER ,
600+ "Number of Raft log entries after which the leader automatically takes a snapshot. "
601+ + "Lower values cause more frequent snapshots and earlier log compaction." ,
602+ Long .class , 100_000L ),
581603
582604 HA_LOG_VERBOSE ("arcadedb.ha.logVerbose" , SCOPE .SERVER ,
583605 "HA verbose logging level: 0=off, 1=basic (elections, leader changes), 2=detailed (replication, forwarding), 3=trace (every state machine apply)" ,
584606 Integer .class , 0 ),
585607
586- HA_RAFT_GROUP_COMMIT_BATCH_SIZE ("arcadedb.ha.raftGroupCommitBatchSize" , SCOPE .SERVER ,
587- "Maximum number of Raft log entries to batch in a single group commit flush. Higher values improve throughput under concurrent load." ,
608+ HA_GROUP_COMMIT_BATCH_SIZE ("arcadedb.ha.groupCommitBatchSize" , SCOPE .SERVER ,
609+ "Maximum number of Raft log entries to batch in a single group commit flush. "
610+ + "Higher values improve throughput under concurrent load." ,
588611 Integer .class , 500 ),
589612
613+ HA_GROUP_COMMIT_QUEUE_SIZE ("arcadedb.ha.groupCommitQueueSize" , SCOPE .SERVER ,
614+ "Maximum pending transactions allowed in the Raft group-commit queue. "
615+ + "When the queue is full, the server applies backpressure by throwing ReplicationQueueFullException "
616+ + "(a NeedRetryException that clients can retry)." ,
617+ Integer .class , 10_000 ),
618+
619+ HA_GROUP_COMMIT_OFFER_TIMEOUT ("arcadedb.ha.groupCommitOfferTimeout" , SCOPE .SERVER ,
620+ "Timeout in ms waiting for space in the group-commit queue before throwing ReplicationQueueFullException." ,
621+ Integer .class , 100 ),
622+
590623 HA_CLUSTER_TOKEN ("arcadedb.ha.clusterToken" , SCOPE .SERVER ,
591624 "Shared secret for inter-node request forwarding authentication. " +
592625 "Must be identical on all cluster nodes. " +
@@ -605,6 +638,10 @@ Enable diagnostic logging during vector graph build progress (heap/off-heap memo
605638 "Maximum number of concurrent snapshot downloads served by the leader. Requests over this limit receive HTTP 503." ,
606639 Integer .class , 2 ),
607640
641+ HA_SNAPSHOT_DOWNLOAD_TIMEOUT ("arcadedb.ha.snapshotDownloadTimeout" , SCOPE .SERVER ,
642+ "Read timeout in ms for downloading a database snapshot from the leader during follower resync." ,
643+ Integer .class , 300_000 ),
644+
608645 HA_SNAPSHOT_INSTALL_RETRIES ("arcadedb.ha.snapshotInstallRetries" , SCOPE .SERVER ,
609646 "Maximum retry attempts for snapshot download from the leader during snapshot installation." ,
610647 Integer .class , 3 ),
@@ -633,10 +670,17 @@ Enable diagnostic logging during vector graph build progress (heap/off-heap memo
633670 "Delay in milliseconds between RemoteDatabase election retries." ,
634671 Long .class , 2000L ),
635672
673+ HA_RATIS_RESTART_MAX_RETRIES ("arcadedb.ha.ratisRestartMaxRetries" , SCOPE .SERVER ,
674+ "Maximum consecutive Ratis restart attempts by the health monitor before the server shuts down "
675+ + "for cluster-level recovery. Raise when partition-recovery scenarios cause legitimate rapid restarts." ,
676+ Integer .class , 10 ),
677+
636678 HA_STOP_SERVER_ON_REPLICATION_FAILURE ("arcadedb.ha.stopServerOnReplicationFailure" , SCOPE .SERVER ,
637- "If true, stops the JVM after exhausting step-down retries on a phase-2 replication failure. "
638- + "If false, logs CRITICAL but leaves the server running (useful for debugging)." ,
639- Boolean .class , true ),
679+ "After a phase-2 local commit fails on the leader while followers have applied the entry, step-down "
680+ + "is attempted first. If every step-down fails and this flag is true, the JVM exits so an "
681+ + "orchestrator can restart and let Raft log replay correct the state. "
682+ + "Default is false: the server keeps running and logs CRITICAL, useful for debugging without an orchestrator." ,
683+ Boolean .class , false ),
640684
641685 HA_SNAPSHOT_WRITE_TIMEOUT ("arcadedb.ha.snapshotWriteTimeout" , SCOPE .SERVER ,
642686 "Timeout in milliseconds for writing a snapshot to a follower. "
@@ -664,6 +708,12 @@ Enable diagnostic logging during vector graph build progress (heap/off-heap memo
664708 "Maximum number of entries in the HTTP idempotency cache. Oldest entry is evicted when full." ,
665709 Integer .class , 10_000 ),
666710
711+ HA_PEER_ALLOWLIST_ENABLED ("arcadedb.ha.peerAllowlist.enabled" , SCOPE .SERVER ,
712+ "Reject inbound Raft gRPC connections whose remote address does not resolve to a host in "
713+ + "arcadedb.ha.serverList. Loopback is always allowed. Does not provide peer identity or encryption: "
714+ + "use mTLS on untrusted networks." ,
715+ Boolean .class , true ),
716+
667717 HA_GRPC_ALLOWLIST_REFRESH_MS ("arcadedb.ha.grpcAllowlistRefreshMs" , SCOPE .SERVER ,
668718 "Rate-limiting interval in milliseconds for DNS re-resolution in the gRPC peer address allowlist filter." ,
669719 Long .class , 30_000L ),
0 commit comments