@@ -390,7 +390,16 @@ Status ConsensusCoordinator::UpdateSlave(const std::string& ip, int port, const
390390 }
391391 {
392392 std::lock_guard l (slave_ptr->slave_mu );
393- slave_ptr->acked_offset = end;
393+ // Treat this ACK as confirming everything before and including end
394+ LogOffset updated_offset;
395+ // Use empty start to indicate from the beginning of the window
396+ slave_ptr->sync_win .Update (SyncWinItem (LogOffset ()), SyncWinItem (end), &updated_offset);
397+ if (!(updated_offset == LogOffset ())) {
398+ slave_ptr->acked_offset = updated_offset;
399+ } else {
400+ // Fallback to end if window was empty or no progress detected
401+ slave_ptr->acked_offset = end;
402+ }
394403 sync_pros_.AddMatchIndex (ip, port, slave_ptr->acked_offset );
395404 // LOG(INFO) << "PacificA slave ip: " << ip << ", port :" << port << ",slave acked_offset "
396405 // << slave_ptr->acked_offset.ToString();
@@ -835,20 +844,44 @@ bool ConsensusCoordinator::checkFinished(const LogOffset& offset) {
835844void ConsensusCoordinator::SyncBinlogLoop () {
836845 while (!thread_stop_.load ()) {
837846 std::unique_lock<pstd::Mutex> lock (sync_mu_);
838- // timed wait to allow coalescing multiple appends
839- auto coalesce = std::chrono::milliseconds (g_pika_conf->consensus_timeout ());
840- sync_cv_.wait_for (lock, coalesce, [this ] { return needs_sync_.load () || thread_stop_.load (); });
841-
847+ // Wait until there is at least one pending append
848+ sync_cv_.wait (lock, [this ] { return needs_sync_.load () || thread_stop_.load (); });
842849 if (thread_stop_.load ()) {
843850 break ;
844851 }
845- if (!needs_sync_.load ()) {
846- continue ;
847- }
852+ // Coalesce multiple appends in the next timeout window
853+ auto coalesce = std::chrono::milliseconds (g_pika_conf->consensus_timeout ());
854+ lock.unlock ();
855+ std::this_thread::sleep_for (coalesce);
856+ lock.lock ();
848857
849858 needs_sync_.store (false );
850859 pstd::Status s = stable_logger_->Logger ()->Sync ();
851860
861+ // Record fsynced offset (not beyond prepared_id_)
862+ {
863+ std::shared_lock prep_lock (prepared_id__rwlock_);
864+ std::lock_guard fs_lock (fsynced_id_rwlock_);
865+ if (prepared_id_ > last_fsynced_id_) {
866+ last_fsynced_id_ = prepared_id_;
867+ }
868+ }
869+
870+ // After fsync, try to advance committed_id up to min(desired, fsynced)
871+ {
872+ std::shared_lock fs_lock (fsynced_id_rwlock_);
873+ std::lock_guard commit_lock (committed_id_rwlock_);
874+ LogOffset target = desired_committed_id_;
875+ if (target > last_fsynced_id_) {
876+ target = last_fsynced_id_;
877+ }
878+ if (target > committed_id_) {
879+ committed_id_ = target;
880+ context_->UpdateAppliedIndex (committed_id_);
881+ committed_id_cv_.notify_all ();
882+ }
883+ }
884+
852885 std::lock_guard<pstd::Mutex> guard (promises_mu_);
853886 for (auto & p : sync_promises_) {
854887 p.set_value (s);
@@ -920,7 +953,9 @@ Status ConsensusCoordinator::AppendSlaveEntries(const std::shared_ptr<Cmd>& cmd_
920953 << " cur last index " << last_index.l_offset .index ;
921954 return Status::OK ();
922955 }
956+ auto start_us = pstd::NowMicros ();
923957 Status s = PersistAppendBinlog (cmd_ptr);
958+ auto end_us = pstd::NowMicros ();
924959 if (!s.ok ()) {
925960 return s;
926961 }
@@ -961,7 +996,7 @@ Status ConsensusCoordinator::UpdateCommittedID() {
961996 LogOffset slave_prepared_id = LogOffset ();
962997
963998 for (const auto & slave : slaves) {
964- if (slave.second ->slave_state == kSlaveBinlogSync ) {
999+ if (slave.second ->slave_state == kSlaveBinlogSync || slave. second -> slave_state == SlaveState::KCandidate ) {
9651000 if (slave_prepared_id == LogOffset ()) {
9661001 slave_prepared_id = slave.second ->acked_offset ;
9671002 } else if (slave.second ->acked_offset < slave_prepared_id) {
@@ -1029,8 +1064,9 @@ pstd::Status ConsensusCoordinator::SendBinlog(const std::shared_ptr<SlaveNode>&
10291064 return Status::OK ();
10301065 }
10311066
1067+ // Gate: allow only one in-flight batch until ACK clears the sync window
10321068 int batch_size = g_pika_conf->consensus_batch_size ();
1033- for (int i = start_index; i < logs_->Size () && tasks.size () < batch_size; ++i) {
1069+ for (int i = start_index; i < logs_->Size () && static_cast < int >( tasks.size () ) < batch_size; ++i) {
10341070 const auto & item = logs_->At (i);
10351071 tasks.emplace_back (RmNode (slave_ptr->Ip (), slave_ptr->Port (), db_name, slave_ptr->SessionId ()),
10361072 BinlogChip (item.offset , item.binlog_ ), item.offset , committed_index);
@@ -1045,13 +1081,21 @@ pstd::Status ConsensusCoordinator::SendBinlog(const std::shared_ptr<SlaveNode>&
10451081 // decide if we should send now based on size or timeout window
10461082 bool size_triggered = (static_cast <int >(tasks.size ()) >= batch_size);
10471083 bool timeout_triggered = false ;
1048- if (slave_ptr->pending_since_us_ == 0 && !size_triggered) {
1084+
1085+ // one-shot immediate send to close current window
1086+ bool force_now = immediate_send_once_.exchange (false );
1087+
1088+ if (slave_ptr->pending_since_us_ == 0 && !size_triggered && !force_now) {
10491089 // start pending window and wait for more logs or timeout
10501090 slave_ptr->pending_since_us_ = now;
10511091 return Status::OK ();
10521092 }
1053- if (slave_ptr->pending_since_us_ > 0 ) {
1054- timeout_triggered = (now - slave_ptr->pending_since_us_ ) >= (static_cast <uint64_t >(g_pika_conf->consensus_timeout ()) * 1000ULL );
1093+ if (!size_triggered) {
1094+ if (force_now) {
1095+ timeout_triggered = true ;
1096+ } else if (slave_ptr->pending_since_us_ > 0 ) {
1097+ timeout_triggered = (now - slave_ptr->pending_since_us_ ) >= (static_cast <uint64_t >(g_pika_conf->consensus_timeout ()) * 1000ULL );
1098+ }
10551099 }
10561100 if (!size_triggered && !timeout_triggered) {
10571101 return Status::OK ();
@@ -1068,14 +1112,21 @@ pstd::Status ConsensusCoordinator::SendBinlog(const std::shared_ptr<SlaveNode>&
10681112 std::vector<WriteTask> final_tasks_to_send;
10691113 final_tasks_to_send.push_back (batched_task);
10701114 g_pika_rm->ProduceWriteQueue (slave_ptr->Ip (), slave_ptr->Port (), db_name, final_tasks_to_send);
1115+ // Immediately consume the write queue to send over network
1116+ // g_pika_rm->ConsumeWriteQueue();
10711117
10721118 // Update slave node's state
10731119 slave_ptr->sent_offset = last_task.binlog_chip_ .offset_ ;
1120+ // Track every log item so ACK can consume the window in order
10741121 for (const auto & task : tasks) {
1075- slave_ptr->sync_win .Push (SyncWinItem (task.binlog_chip_ .offset_ ));
1122+ slave_ptr->sync_win .Push (SyncWinItem (task.binlog_chip_ .offset_ , task. binlog_chip_ . binlog_ . size () ));
10761123 }
10771124 // reset pending timer after sending
10781125 slave_ptr->pending_since_us_ = 0 ;
1126+ // start ACK timeout tracking for this in-flight batch
1127+ if (slave_ptr->ack_timeout_start_time_us_ == 0 ) {
1128+ slave_ptr->ack_timeout_start_time_us_ = now;
1129+ }
10791130
10801131 // trigger fsync coalesced with network send
10811132 {
0 commit comments