diff --git a/conf/pika.conf b/conf/pika.conf index 97d171d419..6486574996 100644 --- a/conf/pika.conf +++ b/conf/pika.conf @@ -153,6 +153,25 @@ replication-num : 0 # The default value of consensus-level is 0, which means this feature is not enabled. consensus-level : 0 +# Batch processing configuration (used by both command collection and consensus mechanism) +# The maximum number of items in a batch (both command collection and consensus) +# Default: 100 +batch-size : 100 + +# Batch processing configuration (used by both command collection and consensus mechanism) +# The maximum waiting batch for (both command collection and consensus) +# Default: 5 +batch-max-wait-time : 5 + +# The timeout in milliseconds for waiting for a batch ACK from a slave. +# Default: 500 +replication-ack-timeout : 500 + +# Enable command batch processing for better performance +# When enabled, write commands will be collected and processed in batches +# Default: no +command-batch-enabled : yes + # The Prefix of dump file's name. # All the files that generated by command "bgsave" will be name with this prefix. dump-prefix : diff --git a/include/pika_binlog.h b/include/pika_binlog.h index 43615ae0b4..888531807e 100644 --- a/include/pika_binlog.h +++ b/include/pika_binlog.h @@ -37,7 +37,7 @@ class Version final : public pstd::noncopyable { void debug() { std::shared_lock l(rwlock_); - printf("Current pro_num %u pro_offset %llu\n", pro_num_, pro_offset_); + printf("Current pro_num %u pro_offset %lu\n", pro_num_, pro_offset_); } private: @@ -61,6 +61,8 @@ class Binlog : public pstd::noncopyable { * Set Producer pro_num and pro_offset with lock */ pstd::Status SetProducerStatus(uint32_t pro_num, uint64_t pro_offset, uint32_t term = 0, uint64_t index = 0); + // Force sync data to disk + pstd::Status Sync(); // Need to hold Lock(); pstd::Status Truncate(uint32_t pro_num, uint64_t pro_offset, uint64_t index); diff --git a/include/pika_client_conn.h b/include/pika_client_conn.h index bc4c28db6a..eb2735029f 100644 --- a/include/pika_client_conn.h +++ b/include/pika_client_conn.h @@ -8,6 +8,7 @@ #include #include +#include #include "acl.h" #include "include/pika_command.h" @@ -52,6 +53,7 @@ class PikaClientConn : public net::RedisConn { bool cache_miss_in_rtc_; }; + struct TxnStateBitMask { public: static constexpr uint8_t Start = 0; diff --git a/include/pika_command_collector.h b/include/pika_command_collector.h new file mode 100644 index 0000000000..d65da913bf --- /dev/null +++ b/include/pika_command_collector.h @@ -0,0 +1,192 @@ +// Copyright (c) 2015-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#ifndef PIKA_COMMAND_COLLECTOR_H_ +#define PIKA_COMMAND_COLLECTOR_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "include/pika_command.h" +#include "include/pika_define.h" +#include "pstd/include/pstd_status.h" + +#include "include/pika_consensus.h" + +/** + * @brief PikaCommandCollector is used to collect write commands and process them in batches + * + * Main functions: + * 1. Collect write commands and process them in optimized batches after reaching the threshold + * 2. Handle the conflict of the same key (the later command will overwrite the earlier command) + * 3. Send commands in batches to the consensus coordinator with batch-level synchronization + * 4. Support asynchronous callback notification of command processing results + * 5. Track performance metrics for batch processing + * 6. Provide intelligent retry mechanisms for failed batches +*/ +class PikaCommandCollector { + public: + // Callback function type after command processing is completed + using CommandCallback = std::function; + + /** + * @brief constructor + * @param coordinator consensus coordinator reference + * @param batch_size batch size (number of commands) + * @param batch_max_wait_time forced flush interval (milliseconds) + */ + // Constructor with raw pointer (original) + PikaCommandCollector(ConsensusCoordinator* coordinator, size_t batch_size = 100, int batch_max_wait_time = 5); + + // Constructor with shared_ptr (for compatibility with make_shared calls) + PikaCommandCollector(std::shared_ptr coordinator, size_t batch_size = 100, int batch_max_wait_time = 5); + + + ~PikaCommandCollector(); + + /** + * @brief Add command to collector + * @param cmd_ptr command pointer + * @param callback callback function after processing is completed + * @return whether the addition was successful + */ + bool AddCommand(const std::shared_ptr& cmd_ptr, CommandCallback callback); + + /** + * @brief Called periodically by external systems to process batches + * @param force Force processing even if batch is not full or timeout not reached + * @return Number of commands processed + */ + + /** + * @brief Immediately process all currently collected commands + * @return The number of commands processed + */ + size_t FlushCommands(bool force = false); + + + /** + * @brief Get the current number of pending commands + * @return number of commands + */ + size_t PendingCommands() const; + + /** + * @brief Set the batch size + * @param batch_size batch size + */ + void SetBatchSize(size_t batch_size); + + /** + * @brief Set the batch max wait time + * @param batch_max_wait_time maximum wait time in milliseconds + */ + void SetBatchMaxWaitTime(int batch_max_wait_time); + + /** + * @brief Get batch processing statistics + * @return Pair of (total_processed_commands, total_batches) + */ + std::pair GetBatchStats() const; + + /** + * @brief Get average batch processing time in milliseconds + * @return Average processing time or nullopt if no batches processed + */ + std::optional GetAverageBatchTime() const; + + private: + + /** + * @brief batch processing command + * @param batch command batch + * @return Whether the processing is successful + */ + pstd::Status ProcessBatch(const std::vector>& commands, + const std::vector& callbacks); + + /** + * @brief Check for conflicts based on command type and key name + * @param cmd_ptr command pointer + * @return true if there is a conflict (should be replaced), false if there is no conflict + */ + bool CheckConflict(const std::shared_ptr& cmd_ptr) const; + + /** + * @brief Handle key conflicts and remove conflicting commands + * @param cmd_ptr new command + */ + void HandleConflict(const std::shared_ptr& cmd_ptr); + + /** + * @brief Retry batch processing commands + * @param commands List of commands to retry + * @param callbacks Corresponding callback function list + * @param priority Priority level for the retry (higher means more urgent) + * @return Whether the commands were successfully requeued + */ + bool RetryBatch(const std::vector>& commands, + const std::vector& callbacks, + int priority = 100); + + private: + //Consensus coordinator reference + ConsensusCoordinator* coordinator_; + + // Batch processing configuration + std::atomic batch_size_; + std::atomic batch_max_wait_time_; + + // Retry configuration + std::atomic max_retry_attempts_{3}; + std::atomic retry_backoff_ms_{50}; + + // Command collection and processing + mutable std::mutex mutex_; + + // Pending command queue and callbacks + std::list, CommandCallback>> pending_commands_; + + // Priority queue for retries + std::deque>, std::vector>> retry_queue_; + + // Command key mapping, used to handle same-key conflicts + std::unordered_map, CommandCallback>>::iterator> key_map_; + + // Batch statistics + std::atomic total_processed_{0}; + std::atomic total_batches_{0}; + std::atomic total_retries_{0}; + std::atomic total_conflicts_{0}; + std::atomic total_batch_time_ms_{0}; + std::chrono::time_point batch_start_time_; + + // Performance tracking + struct BatchMetrics { + uint64_t batch_size; + uint64_t processing_time_ms; + uint64_t wait_time_ms; + bool successful; + }; + + // Circular buffer for recent batch metrics + static constexpr size_t kMetricsBufferSize = 100; + std::vector recent_metrics_; + std::mutex metrics_mutex_; +}; + +#endif // PIKA_COMMAND_COLLECTOR_H_ \ No newline at end of file diff --git a/include/pika_conf.h b/include/pika_conf.h index 80d5abe8f0..d9cd9a91bd 100644 --- a/include/pika_conf.h +++ b/include/pika_conf.h @@ -69,6 +69,21 @@ class PikaConf : public pstd::BaseConf { std::shared_lock l(rwlock_); return sync_thread_num_; } + + bool command_batch_enabled() { + std::shared_lock l(rwlock_); + return command_batch_enabled_; + } + + int batch_size() { + std::shared_lock l(rwlock_); + return batch_size_; + } + + int batch_max_wait_time() { + std::shared_lock l(rwlock_); + return batch_max_wait_time_; + } int sync_binlog_thread_num() { std::shared_lock l(rwlock_); return sync_binlog_thread_num_; @@ -350,6 +365,16 @@ class PikaConf : public pstd::BaseConf { int max_conn_rbuf_size() { return max_conn_rbuf_size_.load(); } int consensus_level() { return consensus_level_.load(); } int replication_num() { return replication_num_.load(); } + int replication_ack_timeout() { + std::shared_lock l(rwlock_); + return replication_ack_timeout_; + } + + // Function to set replication acknowledgment timeout (used by batch system) + void SetReplicationAckTimeout(int timeout) { + std::lock_guard l(rwlock_); + replication_ack_timeout_ = timeout; + } int rate_limiter_mode() { std::shared_lock l(rwlock_); return rate_limiter_mode_; @@ -436,7 +461,6 @@ class PikaConf : public pstd::BaseConf { bool is_admin_cmd(const std::string& cmd) { return admin_cmd_set_.find(cmd) != admin_cmd_set_.end(); } - // Immutable config items, we don't use lock. bool daemonize() { return daemonize_; } bool rtc_cache_read_enabled() { return rtc_cache_read_enabled_; } @@ -462,6 +486,23 @@ class PikaConf : public pstd::BaseConf { std::lock_guard l(rwlock_); thread_num_ = value; } + + void SetCommandBatchEnabled(const bool value) { + std::lock_guard l(rwlock_); + TryPushDiffCommands("command-batch-enabled", value ? "yes" : "no"); + command_batch_enabled_ = value; + } + + void SetCommandBatchSize(const int value) { + std::lock_guard l(rwlock_); + TryPushDiffCommands("batch-size", std::to_string(value)); + batch_size_ = value; + } + void SetCommandBatchMaxWaitTime(const int value) { + std::lock_guard l(rwlock_); + TryPushDiffCommands("batch-max-wait-time", std::to_string(value)); + batch_max_wait_time_ = value; + } void SetTimeout(const int value) { std::lock_guard l(rwlock_); TryPushDiffCommands("timeout", std::to_string(value)); @@ -665,6 +706,17 @@ class PikaConf : public pstd::BaseConf { TryPushDiffCommands("max-conn-rbuf-size", std::to_string(value)); max_conn_rbuf_size_.store(value); } + void SetConsensusBatchSize(const int value) { + std::lock_guard l(rwlock_); + TryPushDiffCommands("batch-size", std::to_string(value)); + batch_size_ = value; + } + // This method is used by config update system + void UpdateReplicationAckTimeout(const int value) { + std::lock_guard l(rwlock_); + TryPushDiffCommands("replication-ack-timeout", std::to_string(value)); + replication_ack_timeout_ = value; + } void SetMaxCacheFiles(const int& value) { std::lock_guard l(rwlock_); TryPushDiffCommands("max-cache-files", std::to_string(value)); @@ -929,6 +981,12 @@ class PikaConf : public pstd::BaseConf { std::string server_id_; std::string run_id_; std::string replication_id_; + + // 命令批处理相关配置 + bool command_batch_enabled_ = true; + int batch_size_ = 100; + int batch_max_wait_time_ = 5; + int replication_ack_timeout_ = 5000; std::string requirepass_; std::string masterauth_; std::string userpass_; @@ -1047,7 +1105,7 @@ class PikaConf : public pstd::BaseConf { int throttle_bytes_per_second_ = 200 << 20; // 200MB/s int max_rsync_parallel_num_ = kMaxRsyncParallelNum; std::atomic_int64_t rsync_timeout_ms_ = 1000; - + /* kUninitialized = 0, // unknown setting kDisable = 1, // disable perf stats diff --git a/include/pika_consensus.h b/include/pika_consensus.h index 78e20eb3ab..a30ada1b07 100644 --- a/include/pika_consensus.h +++ b/include/pika_consensus.h @@ -155,6 +155,8 @@ class ConsensusCoordinator { pstd::Status Reset(const LogOffset& offset); pstd::Status ProposeLog(const std::shared_ptr& cmd_ptr); + // Batch processing of commands + pstd::Status BatchProposeLog(const std::vector>& cmd_ptrs, std::vector* offsets); pstd::Status UpdateSlave(const std::string& ip, int port, const LogOffset& start, const LogOffset& end); pstd::Status AddSlaveNode(const std::string& ip, int port, int session_id); pstd::Status RemoveSlaveNode(const std::string& ip, int port); @@ -244,6 +246,10 @@ class ConsensusCoordinator { SyncProgress sync_pros_; std::shared_ptr stable_logger_; std::shared_ptr mem_logger_; + + // Make db_name accessible to external classes + public: + const std::string& db_name() const { return db_name_; } // pacificA public: @@ -273,9 +279,16 @@ class ConsensusCoordinator { prepared_id_ = offset; } void SetCommittedId(const LogOffset& offset) { - std::lock_guard l(committed_id_rwlock_); - committed_id_ = offset; - context_->UpdateAppliedIndex(committed_id_); + { + std::lock_guard l(committed_id_rwlock_); + if (committed_id_ >= offset) { + return; + } + committed_id_ = offset; + context_->UpdateAppliedIndex(committed_id_); + } + notification_counter_.fetch_add(1); + LOG(INFO) << "SetCommittedId: Updated to " << offset.ToString(); } private: @@ -286,6 +299,7 @@ class ConsensusCoordinator { bool is_consistency_ = false; std::shared_mutex committed_id_rwlock_; LogOffset committed_id_ = LogOffset(); + std::atomic notification_counter_{0}; std::shared_mutex prepared_id__rwlock_; LogOffset prepared_id_ = LogOffset(); std::shared_ptr logs_; diff --git a/include/pika_define.h b/include/pika_define.h index c09d0d7c38..f3b4fe7023 100644 --- a/include/pika_define.h +++ b/include/pika_define.h @@ -140,7 +140,9 @@ struct LogOffset { bool operator<=(const LogOffset& other) const { return b_offset <= other.b_offset; } bool operator>=(const LogOffset& other) const { return b_offset >= other.b_offset; } bool operator>(const LogOffset& other) const { return b_offset > other.b_offset; } + bool operator!=(const LogOffset& other) const { return b_offset != other.b_offset; } std::string ToString() const { return b_offset.ToString() + " " + l_offset.ToString(); } + bool IsValid() const { return b_offset.filenum > 0 || b_offset.offset > 0; } BinlogOffset b_offset; LogicOffset l_offset; }; @@ -178,10 +180,18 @@ const std::string BinlogSyncStateMsg[] = {"NotSync", "ReadFromCache", "ReadFromF struct BinlogChip { LogOffset offset_; std::string binlog_; - BinlogChip(const LogOffset& offset, std::string binlog) : offset_(offset), binlog_(std::move(binlog)) {} + bool is_batch_ = false; + + BinlogChip(const LogOffset& offset, std::string binlog) + : offset_(offset), binlog_(std::move(binlog)), is_batch_(false) {} + + BinlogChip(const LogOffset& offset, std::string binlog, bool is_batch) + : offset_(offset), binlog_(std::move(binlog)), is_batch_(is_batch) {} + BinlogChip(const BinlogChip& binlog_chip) { offset_ = binlog_chip.offset_; binlog_ = binlog_chip.binlog_; + is_batch_ = binlog_chip.is_batch_; } }; @@ -257,7 +267,7 @@ class RmNode : public Node { void SetSessionId(int32_t session_id) { session_id_ = session_id; } int32_t SessionId() const { return session_id_; } std::string ToString() const { - return "db=" + DBName() + "_,ip_port=" + Ip() + ":" + + return "db=" + DBName() + ",ip_port=" + Ip() + ":" + std::to_string(Port()) + ",session id=" + std::to_string(SessionId()); } void SetLastSendTime(uint64_t last_send_time) { last_send_time_ = last_send_time; } @@ -354,6 +364,14 @@ const int64_t kPoolSize = 1073741824; const std::string kBinlogPrefix = "write2file"; const size_t kBinlogPrefixLen = 10; +/* + * PIKA_BATCH_MAGIC: Core identifier for binlog batch processing. + * - Master: Prefixes batched binlogs with this magic in SendBinlog + * - Slave: Detects this magic in HandleBGWorkerWriteBinlog + * to switch between batch and single-binlog parsing modes. + */ +const uint32_t PIKA_BATCH_MAGIC = 0x42544348; // "BTCH" in ASCII + const std::string kPikaMeta = "meta"; const std::string kManifest = "manifest"; const std::string kContext = "context"; diff --git a/include/pika_repl_client.h b/include/pika_repl_client.h index 4faf8285a3..2eaccc5805 100644 --- a/include/pika_repl_client.h +++ b/include/pika_repl_client.h @@ -88,14 +88,22 @@ class PikaReplClient { async_write_db_task_counts_[db_index].fetch_sub(incr_step, std::memory_order::memory_order_seq_cst); } - int32_t GetUnfinishedAsyncWriteDBTaskCount(const std::string& db_name) { - int32_t db_index = db_name.back() - '0'; - assert(db_index >= 0 && db_index <= 7); - return async_write_db_task_counts_[db_index].load(std::memory_order_seq_cst); - } + int32_t GetUnfinishedAsyncWriteDBTaskCount(const std::string& db_name); + void SignalAsyncWriteDBTaskEnd(const std::string& db_name); + void WaitForAsyncWriteDBTaskEnd(const std::string& db_name); + + // unfinished_async_write_db_tasks related + pstd::Mutex unfinished_async_write_db_tasks_mu_; + std::unordered_map unfinished_async_write_db_tasks_; + pstd::CondVar async_write_db_tasks_cond_; + + // db_write_block_fds_ related + pstd::Mutex db_write_block_fds_mu_; + std::set db_write_block_fds_; private: - size_t GetBinlogWorkerIndexByDBName(const std::string &db_name); + size_t GetBinlogWorkerIndexByDBName(const std::string& db_name); + size_t GetDBWorkerIndexByDBName(const std::string& db_name); size_t GetHashIndexByKey(const std::string& key); void UpdateNextAvail() { next_avail_ = (next_avail_ + 1) % static_cast(write_binlog_workers_.size()); } diff --git a/include/pika_rm.h b/include/pika_rm.h index 709d5722cc..4f2208658a 100644 --- a/include/pika_rm.h +++ b/include/pika_rm.h @@ -16,13 +16,15 @@ #include "pstd/include/pstd_status.h" #include "include/pika_binlog_reader.h" -#include "include/pika_consensus.h" #include "include/pika_repl_client.h" #include "include/pika_repl_server.h" #include "include/pika_slave_node.h" #include "include/pika_stable_log.h" #include "include/rsync_client.h" +#include "include/pika_consensus.h" +#include "include/pika_command_collector.h" + #define kBinlogSendPacketNum 40 #define kBinlogSendBatchNum 100 @@ -30,7 +32,6 @@ #define kSendKeepAliveTimeout (2 * 1000000) #define kRecvKeepAliveTimeout (20 * 1000000) - class SyncDB { public: SyncDB(const std::string& db_name); @@ -69,6 +70,7 @@ class SyncMasterDB : public SyncDB { // consensus use pstd::Status ConsensusUpdateSlave(const std::string& ip, int port, const LogOffset& start, const LogOffset& end); pstd::Status ConsensusProposeLog(const std::shared_ptr& cmd_ptr); + pstd::Status ConsensusBatchProposeLog(const std::vector>& cmd_ptrs, std::vector* offsets); pstd::Status ConsensusProcessLeaderLog(const std::shared_ptr& cmd_ptr, const BinlogItem& attribute); LogOffset ConsensusCommittedIndex(); @@ -83,16 +85,21 @@ class SyncMasterDB : public SyncDB { return coordinator_.StableLogger()->Logger(); } + std::shared_ptr GetSlaveNode(const std::string& ip, int port); + // Make coordinator_ accessible to StableLog class + ConsensusCoordinator& GetCoordinator() { return coordinator_; } + std::shared_ptr GetCommandCollector(); + private: // invoker need to hold slave_mu_ pstd::Status ReadBinlogFileToWq(const std::shared_ptr& slave_ptr); - std::shared_ptr GetSlaveNode(const std::string& ip, int port); std::unordered_map> GetAllSlaveNodes(); pstd::Mutex session_mu_; int32_t session_id_ = 0; ConsensusCoordinator coordinator_; + std::shared_ptr command_collector_; //pacificA public: public: @@ -108,12 +115,14 @@ class SyncMasterDB : public SyncDB { LogOffset GetPreparedId(); LogOffset GetCommittedId(); pstd::Status AppendSlaveEntries(const std::shared_ptr& cmd_ptr, const BinlogItem& attribute); + pstd::Status BatchAppendSlaveEntries(const std::vector>& cmd_ptrs, const std::vector& attributes); pstd::Status AppendCandidateBinlog(const std::string& ip, int port, const LogOffset& offset); pstd::Status UpdateCommittedID(); pstd::Status CommitAppLog(const LogOffset& master_committed_id); pstd::Status Truncate(const LogOffset& offset); - - + pstd::Status WaitForSlaveAcks(const LogOffset& target_offset, int timeout_ms); + // last purge timestamp + std::atomic last_purge_time_us_ = 0; }; class SyncSlaveDB : public SyncDB { @@ -163,6 +172,9 @@ class PikaReplicaManager { pstd::Status SendMetaSyncRequest(); pstd::Status SendRemoveSlaveNodeRequest(const std::string& table); pstd::Status SendTrySyncRequest(const std::string& db_name); + + // 获取指定数据库的共识协调器 + std::shared_ptr GetConsensusCoordinator(const std::string& db_name); pstd::Status SendDBSyncRequest(const std::string& db_name); pstd::Status SendBinlogSyncAckRequest(const std::string& table, const LogOffset& ack_start, const LogOffset& ack_end, bool is_first_send = false); @@ -207,6 +219,8 @@ class PikaReplicaManager { const std::shared_ptr& res, const std::shared_ptr& conn, void* res_private_data); void ScheduleWriteDBTask(const std::shared_ptr& cmd_ptr, const std::string& db_name); + void SignalAsyncWriteDBTaskEnd(const std::string& db_name); + void WaitForAsyncWriteDBTaskEnd(const std::string& db_name); void ScheduleReplClientBGTaskByDBName(net::TaskFunc , void* arg, const std::string &db_name); void ReplServerRemoveClientConn(int fd); void ReplServerUpdateClientConnMap(const std::string& ip_port, int fd); @@ -243,10 +257,27 @@ class PikaReplicaManager { pstd::Mutex write_queue_mu_; + // db_name -> a queue of write task + using DBWriteTaskQueue = std::map>; + // ip:port -> a map of DBWriteTaskQueue + using SlaveWriteTaskQueue = std::map; + // every host owns a queue, the key is "ip + port" - std::unordered_map>> write_queues_; + SlaveWriteTaskQueue write_queues_; + + // client for replica std::unique_ptr pika_repl_client_; std::unique_ptr pika_repl_server_; + + // one-shot switch to force immediate send on next SendBinlog + std::atomic immediate_send_once_{false}; + + // Condition variable for signaling when the write queue has new items + pstd::CondVar write_queue_cv_; + + std::shared_mutex is_consistency_rwlock_; + bool is_consistency_ = false; + std::shared_mutex committed_id_rwlock_; }; #endif // PIKA_RM_H diff --git a/include/pika_server.h b/include/pika_server.h index 41a8c9b346..fd858696a8 100644 --- a/include/pika_server.h +++ b/include/pika_server.h @@ -78,6 +78,8 @@ struct TaskArg { void DoBgslotscleanup(void* arg); void DoBgslotsreload(void* arg); +class PikaCommandCollector; + class PikaServer : public pstd::noncopyable { public: PikaServer(); @@ -523,6 +525,11 @@ class PikaServer : public pstd::noncopyable { exec_stat_map.insert(std::make_pair(cmd_name, 0)); } } + + // 命令收集器访问 + std::shared_ptr CommandCollector() { + return command_collector_; + } private: /* @@ -575,6 +582,11 @@ class PikaServer : public pstd::noncopyable { std::unique_ptr pika_admin_cmd_thread_pool_; std::unique_ptr pika_dispatch_thread_ = nullptr; + /* + * command collector for batch processing + */ + std::shared_ptr command_collector_; + /* * Slave used */ diff --git a/include/pika_stable_log.h b/include/pika_stable_log.h index 300e0d0fc5..8cb6dc6267 100644 --- a/include/pika_stable_log.h +++ b/include/pika_stable_log.h @@ -11,6 +11,9 @@ #include "include/pika_binlog.h" +// Forward declaration to avoid circular dependency +class ConsensusCoordinator; + class StableLog : public std::enable_shared_from_this { public: StableLog(std::string table_name, std::string log_path); @@ -25,6 +28,9 @@ class StableLog : public std::enable_shared_from_this { std::shared_lock l(offset_rwlock_); return first_offset_; } + // Return a direct reference to the ConsensusCoordinator without copying + std::shared_ptr coordinator(); + void set_coordinator(std::shared_ptr coordinator); // Need to hold binlog lock pstd::Status TruncateTo(const LogOffset& offset); diff --git a/src/net/src/pb_conn.cc b/src/net/src/pb_conn.cc index 5185e8f51d..15e4024da0 100644 --- a/src/net/src/pb_conn.cc +++ b/src/net/src/pb_conn.cc @@ -153,7 +153,7 @@ WriteStatus PbConn::SendReply() { if (item_len - write_buf_.item_pos_ != 0) { return kWriteHalf; } - LOG(ERROR) << "write item success"; + //LOG(ERROR) << "write item success"; } return kWriteAll; } diff --git a/src/pika_auxiliary_thread.cc b/src/pika_auxiliary_thread.cc index e94104b442..685e370160 100644 --- a/src/pika_auxiliary_thread.cc +++ b/src/pika_auxiliary_thread.cc @@ -39,14 +39,26 @@ void* PikaAuxiliaryThread::ThreadMain() { if (!s.ok()) { LOG(WARNING) << s.ToString(); } - // send to peer - int res = g_pika_server->SendToPeer(); - if (res == 0) { - // sleep 100 ms + // send to peer (only for master nodes) + int current_role = g_pika_server->role(); + static int role_log_counter = 0; + // if (++role_log_counter % 100 == 0) { // Log role every 100 iterations + // LOG(INFO) << "PikaAuxiliaryThread: Current role=" << current_role << " (MASTER=" << PIKA_ROLE_MASTER << ")"; + // } + + if (current_role & PIKA_ROLE_MASTER) { + int res = g_pika_server->SendToPeer(); + if (res == 0) { + // sleep 100 ms + std::unique_lock lock(mu_); + cv_.wait_for(lock, 100ms); + } else { + LOG(INFO) << "PikaAuxiliaryThread: Processed " << res << " binlog tasks"; + } + } else { + // For slave nodes, just sleep std::unique_lock lock(mu_); cv_.wait_for(lock, 100ms); - } else { - // LOG_EVERY_N(INFO, 1000) << "Consume binlog number " << res; } } return nullptr; diff --git a/src/pika_binlog.cc b/src/pika_binlog.cc index 187d63d8ad..e051108317 100644 --- a/src/pika_binlog.cc +++ b/src/pika_binlog.cc @@ -469,3 +469,10 @@ Status Binlog::Truncate(uint32_t pro_num, uint64_t pro_offset, uint64_t index) { return Status::OK(); } + +Status Binlog::Sync() { + if (queue_) { + return queue_->Sync(); + } + return Status::Corruption("Logger not initialized"); +} \ No newline at end of file diff --git a/src/pika_client_conn.cc b/src/pika_client_conn.cc index a6cd5ec62f..7881c08da2 100644 --- a/src/pika_client_conn.cc +++ b/src/pika_client_conn.cc @@ -16,9 +16,11 @@ #include "include/pika_define.h" #include "include/pika_rm.h" #include "include/pika_server.h" +#include "include/pika_command_collector.h" #include "net/src/dispatch_thread.h" #include "net/src/worker_thread.h" #include "src/pstd/include/scope_record_lock.h" +#include #include "rocksdb/perf_context.h" #include "rocksdb/iostats_context.h" @@ -357,14 +359,84 @@ void PikaClientConn::DoBackgroundTask(void* arg) { } void PikaClientConn::BatchExecRedisCmd(const std::vector& argvs, bool cache_miss_in_rtc) { - resp_num.store(static_cast(argvs.size())); - for (const auto& argv : argvs) { - std::shared_ptr resp_ptr = std::make_shared(); - resp_array.push_back(resp_ptr); - ExecRedisCmd(argv, resp_ptr, cache_miss_in_rtc); + if (argvs.empty()) { + return; + } + + // Always use command collector batch processing when enabled, regardless of batch size + if (g_pika_conf->command_batch_enabled()) { + std::vector> responses(argvs.size()); + std::vector> cmd_ptrs(argvs.size()); + bool has_write_cmds = false; + + // First, prepare response objects and parse commands for each command in the batch + for (size_t i = 0; i < argvs.size(); i++) { + responses[i] = std::make_shared(); + + std::string opt = argvs[i][0]; + pstd::StringToLower(opt); + if (opt == kClusterPrefix && argvs[i].size() >= 2) { + opt += argvs[i][1]; + pstd::StringToLower(opt); + } + + cmd_ptrs[i] = DoCmd(argvs[i], opt, responses[i], cache_miss_in_rtc); + + // If it's a write command and not in a transaction, we'll need batch processing + if (cmd_ptrs[i]->is_write() && !IsInTxn()) { + has_write_cmds = true; + } else { + // For read commands or commands in transactions, process result immediately + *(responses[i]) = std::move(cmd_ptrs[i]->res().message()); + } + } + + // For writing commands, use the command collector for efficient batch processing + if (has_write_cmds) { + // Log batch processing metrics + LOG(INFO) << "BatchExecRedisCmd: Processing batch of " << argvs.size() << " commands, with write commands"; + + // Group all write commands for batch processing + for (size_t i = 0; i < cmd_ptrs.size(); i++) { + if (cmd_ptrs[i]->is_write() && !IsInTxn()) { + // Use shared responses array to avoid memory copy + g_pika_server->CommandCollector()->AddCommand(cmd_ptrs[i], + [responses, i, cmd = cmd_ptrs[i]](const LogOffset& offset, pstd::Status status) { + if (!status.ok()) { + LOG(WARNING) << "Command failed with status: " << status.ToString(); + *(responses[i]) = "-ERR " + status.ToString() + "\r\n"; + } else { + *(responses[i]) = cmd->res().message(); + } + }); + } + } + + // Flush the batch immediately to optimize latency + g_pika_server->CommandCollector()->FlushCommands(true); + } + + // Send all responses to client + for (const auto& resp : responses) { + WriteResp(*resp); + } + + if (write_completed_cb_) { + write_completed_cb_(); + write_completed_cb_ = nullptr; + } + NotifyEpoll(true); + } else { + // Legacy non-batched processing path + resp_num.store(static_cast(argvs.size())); + for (const auto& argv : argvs) { + std::shared_ptr resp_ptr = std::make_shared(); + resp_array.push_back(resp_ptr); + ExecRedisCmd(argv, resp_ptr, cache_miss_in_rtc); + } + time_stat_->process_done_ts_ = pstd::NowMicros(); + TryWriteResp(); } - time_stat_->process_done_ts_ = pstd::NowMicros(); - TryWriteResp(); } bool PikaClientConn::ReadCmdInCache(const net::RedisCmdArgsType& argv, const std::string& opt) { @@ -553,8 +625,20 @@ void PikaClientConn::ExecRedisCmd(const PikaCmdArgsType& argv, std::shared_ptr cmd_ptr = DoCmd(argv, opt, resp_ptr, cache_miss_in_rtc); - *resp_ptr = std::move(cmd_ptr->res().message()); - resp_num--; + if (cmd_ptr->is_write() && g_pika_conf->command_batch_enabled() && !IsInTxn()) { + // Use command collector for batch processing + g_pika_server->CommandCollector()->AddCommand(cmd_ptr, [this, resp_ptr](const LogOffset& offset, pstd::Status status) { + if (!status.ok()) { + *resp_ptr = "-ERR " + status.ToString() + "\r\n"; + } + // Decrease response count + resp_num--; + }); + } else { + // Handling for non-write commands or when batch processing is disabled + *resp_ptr = std::move(cmd_ptr->res().message()); + resp_num--; + } } std::queue> PikaClientConn::GetTxnCmdQue() { return txn_cmd_que_; } diff --git a/src/pika_command_collector.cc b/src/pika_command_collector.cc new file mode 100644 index 0000000000..1d91876010 --- /dev/null +++ b/src/pika_command_collector.cc @@ -0,0 +1,508 @@ +// Copyright (c) 2015-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include "include/pika_command_collector.h" +#include "include/pika_conf.h" +#include "include/pika_server.h" +#include "include/pika_rm.h" +#include "include/pika_define.h" +#include +#include + +extern std::unique_ptr g_pika_conf; +extern std::unique_ptr g_pika_rm; + +PikaCommandCollector::PikaCommandCollector(ConsensusCoordinator* coordinator, size_t batch_size, int batch_max_wait_time) + : coordinator_(coordinator), + batch_size_(batch_size), + batch_max_wait_time_(batch_max_wait_time) { + + // Check if coordinator is null + if (!coordinator_) { + LOG(FATAL) << "PikaCommandCollector: ConsensusCoordinator cannot be null! " + << "This usually means SyncMasterDB is not initialized yet."; + return; + } + + LOG(INFO) << "PikaCommandCollector created with batch_size=" << batch_size << ", batch_max_wait_time=" << batch_max_wait_time << "ms"; + + // Initialize metrics buffer + recent_metrics_.reserve(kMetricsBufferSize); +} + +// Constructor with shared_ptr (for compatibility with make_shared calls) +PikaCommandCollector::PikaCommandCollector(std::shared_ptr coordinator, size_t batch_size, int batch_max_wait_time) + : coordinator_(coordinator.get()), + batch_size_(batch_size), + batch_max_wait_time_(batch_max_wait_time) { + + // Check if coordinator is null + if (!coordinator_) { + LOG(FATAL) << "PikaCommandCollector: ConsensusCoordinator cannot be null! " + << "This usually means SyncMasterDB is not initialized yet."; + return; + } + + LOG(INFO) << "PikaCommandCollector created from shared_ptr with batch_size=" << batch_size << ", batch_max_wait_time=" << batch_max_wait_time << "ms"; + + // Initialize metrics buffer + recent_metrics_.reserve(kMetricsBufferSize); +} + +PikaCommandCollector::~PikaCommandCollector() { + // Process any remaining commands + FlushCommands(true); + + LOG(INFO) << "PikaCommandCollector stopped, processed " << total_processed_.load() + << " commands in " << total_batches_.load() << " batches" + << ", retries: " << total_retries_.load() + << ", conflicts: " << total_conflicts_.load(); +} + +bool PikaCommandCollector::AddCommand(const std::shared_ptr& cmd_ptr, CommandCallback callback) { + if (!cmd_ptr || !cmd_ptr->is_write()) { + LOG(WARNING) << "Attempt to add non-write command to CommandCollector"; + return false; + } + + std::lock_guard lock(mutex_); + + if (pending_commands_.empty()) { + batch_start_time_ = std::chrono::steady_clock::now(); + } + + // Check if we should immediately flush the batch + bool should_flush = pending_commands_.size() >= static_cast(batch_size_.load()); + if (should_flush) { + FlushCommands(false); + } + + // Handle same Key conflict - counts updated inside HandleConflict + HandleConflict(cmd_ptr); + + // Add command to queue + pending_commands_.emplace_back(cmd_ptr, std::move(callback)); + + // Update Key Mapping + std::vector keys = cmd_ptr->current_key(); + for (const auto& key : keys) { + key_map_[key] = std::prev(pending_commands_.end()); + } + + + return true; +} + +size_t PikaCommandCollector::FlushCommands(bool force) { + std::vector> commands; + std::vector callbacks; + + // Record batch metrics variables + auto batch_start = std::chrono::steady_clock::now(); + uint64_t wait_time_ms = 0; + bool batch_successful = false; + + { + std::lock_guard lock(mutex_); + if (pending_commands_.empty()) { + // Check if there are any retries to process + if (!retry_queue_.empty() && force) { + auto& [pri, cmds, cbs] = retry_queue_.front(); + commands = cmds; + callbacks = cbs; + retry_queue_.pop_front(); + LOG(INFO) << "FlushCommands: Processing retry batch with priority " << pri << ", size: " << commands.size(); + } else { + return 0; + } + } else { + auto now = std::chrono::steady_clock::now(); + auto elapsed_ms = std::chrono::duration_cast(now - batch_start_time_).count(); + wait_time_ms = elapsed_ms; // Record wait time for metrics + + bool should_flush = force || + pending_commands_.size() >= static_cast(batch_size_.load()) || + elapsed_ms > batch_max_wait_time_.load(); + + if (!should_flush) { + return 0; + } + + size_t batch_count = pending_commands_.size(); + if (!force) { + batch_count = std::min(batch_count, static_cast(batch_size_.load())); + } + + commands.reserve(batch_count); + callbacks.reserve(batch_count); + + auto it = pending_commands_.begin(); + for (size_t i = 0; i < batch_count; ++i, ++it) { + commands.push_back(it->first); + callbacks.push_back(std::move(it->second)); + } + + // Clear queue and map + for (const auto& cmd : commands) { + std::vector keys = cmd->current_key(); + for (const auto& key : keys) { + key_map_.erase(key); + } + } + pending_commands_.erase(pending_commands_.begin(), std::next(pending_commands_.begin(), batch_count)); + + if (!pending_commands_.empty()) { + // Reset timer for the next batch + batch_start_time_ = std::chrono::steady_clock::now(); + } + } + } + + size_t batch_size = commands.size(); + if (batch_size > 0) { + LOG(INFO) << "Processing batch of " << batch_size << " commands"; + + auto process_start = std::chrono::steady_clock::now(); + pstd::Status status = ProcessBatch(commands, callbacks); + auto process_end = std::chrono::steady_clock::now(); + + uint64_t processing_time_ms = std::chrono::duration_cast(process_end - process_start).count(); + batch_successful = status.ok(); + + if (!batch_successful) { + LOG(ERROR) << "Error processing command batch: " << status.ToString(); + } else { + LOG(INFO) << "Successfully processed batch in " << processing_time_ms << "ms"; + } + + // Update statistics + total_processed_.fetch_add(batch_size); + total_batches_.fetch_add(1); + total_batch_time_ms_.fetch_add(processing_time_ms); + + // Record batch metrics + { + std::lock_guard metrics_lock(metrics_mutex_); + if (recent_metrics_.size() >= kMetricsBufferSize) { + recent_metrics_.erase(recent_metrics_.begin()); + } + recent_metrics_.push_back({batch_size, processing_time_ms, wait_time_ms, batch_successful}); + } + + // Process any retries if there were failures but we have pending retries + if (!batch_successful) { + std::lock_guard lock(mutex_); + if (!retry_queue_.empty()) { + LOG(INFO) << "FlushCommands: Processing retries due to batch failure"; + // Schedule immediate follow-up flush to process retries + return batch_size + FlushCommands(true); + } + } + } + + return batch_size; +} + +pstd::Status PikaCommandCollector::ProcessBatch( + const std::vector>& commands, + const std::vector& callbacks) { + + if (commands.empty()) { + return pstd::Status::OK(); + } + + // Implement batch processing logic here + // 1. Generate binlogs for each command + // 2. Write binlogs to production queue in batches + // 3. Main node will update memory data structures in batches + // 4. Trigger asynchronous persistence + + // Store the log offset for each command + std::vector offsets; + + // Check if coordinator is valid + if (!coordinator_) { + LOG(ERROR) << "ProcessBatch: ConsensusCoordinator is null"; + return pstd::Status::InvalidArgument("ConsensusCoordinator is null"); + } + + // Get SyncMasterDB and submit commands in batch + DBInfo db_info(coordinator_->db_name()); + auto master_db = g_pika_rm->GetSyncMasterDBByName(db_info); + if (!master_db) { + LOG(ERROR) << "Failed to get SyncMasterDB for " << coordinator_->db_name(); + return pstd::Status::NotFound("SyncMasterDB not found"); + } + + // Submit to consensus coordinator in batch + LOG(INFO) << "ProcessBatch: Processing " << commands.size() << " commands in batch"; + pstd::Status batch_status = master_db->ConsensusBatchProposeLog(commands, &offsets); + + // Log the batch status + if (!batch_status.ok()) { + LOG(WARNING) << "ProcessBatch: Batch operation failed with status: " << batch_status.ToString(); + if (batch_status.IsTimeout()) { + LOG(WARNING) << "ProcessBatch: Timeout occurred, triggering batch retry mechanism"; + + // Get the last command's offset + LogOffset last_offset; + if (!offsets.empty()) { + last_offset = offsets.back(); + } + + // Roll back committed_id on master and all slave nodes + if (last_offset.IsValid()) { + LOG(WARNING) << "ProcessBatch: Rolling back committed_id to before " << last_offset.ToString(); + + // Get current committed_id + LogOffset current_committed_id = master_db->GetCommittedId(); + + // Calculate rollback target committed_id (assuming rollback to the previous batch's committed_id) + // In actual implementation, you may need to adjust the rollback target based on specific situations + LogOffset rollback_target = current_committed_id; + rollback_target.l_offset.index -= commands.size(); // Simple rollback, may need more complex logic in practice + if (rollback_target.l_offset.index < 0) { + rollback_target.l_offset.index = 0; + } + + // Execute rollback operation + LOG(WARNING) << "ProcessBatch: Rolling back from " << current_committed_id.ToString() + << " to " << rollback_target.ToString(); + + // Call rollback API + pstd::Status truncate_status = master_db->Truncate(rollback_target); + if (!truncate_status.ok()) { + LOG(ERROR) << "ProcessBatch: Failed to rollback committed_id: " << truncate_status.ToString(); + } else { + LOG(INFO) << "ProcessBatch: Successfully rolled back committed_id"; + + // Call batch retry mechanism + LOG(INFO) << "ProcessBatch: Triggering batch retry mechanism for " << commands.size() << " commands"; + bool retry_result = RetryBatch(commands, callbacks, 100); // Use high priority for timeouts + if (retry_result) { + LOG(INFO) << "ProcessBatch: Successfully requeued commands for retry"; + // Already requeued, no need to execute callbacks + return batch_status; + } else { + LOG(ERROR) << "ProcessBatch: Failed to requeue commands for retry"; + } + } + } + } + } else { + LOG(INFO) << "ProcessBatch: Batch operation completed successfully"; + } + + // Execute callback for each command + for (size_t i = 0; i < commands.size() && i < callbacks.size(); ++i) { + if (callbacks[i]) { + pstd::Status cmd_status; + // If batch processing status failed, all commands should fail + if (!batch_status.ok()) { + // Pass the upper layer error status to the client + cmd_status = batch_status; + } else { + // If the offset is empty, it means the command failed to be added + cmd_status = offsets[i].IsValid() ? pstd::Status::OK() : pstd::Status::IOError("Failed to append command"); + } + + // Log information before executing each callback function + LOG(INFO) << "ProcessBatch: Executing callback for command " << i + << ", cmd=" << (commands[i] ? commands[i]->name() : "null") + << ", status=" << cmd_status.ToString() + << ", offset=" << (offsets[i].IsValid() ? offsets[i].ToString() : "invalid"); + // Execute callback + callbacks[i](offsets[i], cmd_status); + + // Log information after callback execution + LOG(INFO) << "ProcessBatch: Callback executed for command " << i; + } + } + return batch_status; +} + +bool PikaCommandCollector::CheckConflict(const std::shared_ptr& cmd_ptr) const { + if (!cmd_ptr) { + return false; + } + + std::vector keys = cmd_ptr->current_key(); + for (const auto& key : keys) { + if (key_map_.find(key) != key_map_.end()) { + return true; + } + } + + return false; +} + +void PikaCommandCollector::HandleConflict(const std::shared_ptr& cmd_ptr) { + if (!cmd_ptr) { + return; + } + + std::vector keys = cmd_ptr->current_key(); + std::vector, CommandCallback>>::iterator> to_remove; + + // Find all conflicting commands + for (const auto& key : keys) { + auto it = key_map_.find(key); + if (it != key_map_.end()) { + // Check if this iterator is already added + bool already_added = false; + for (const auto& iter : to_remove) { + if (iter == it->second) { + already_added = true; + break; + } + } + if (!already_added) { + to_remove.push_back(it->second); + } + key_map_.erase(it); + } + } + + // Track conflict metrics + if (!to_remove.empty()) { + total_conflicts_.fetch_add(to_remove.size()); + } + + // Check command importance to prevent important commands from being overwritten + for (auto it : to_remove) { + auto old_cmd = it->first; + auto new_cmd = cmd_ptr; + + // Determine command importance + // If the old command type exists in the important command list, consider it important + bool is_important_cmd = false; + // Check commands with keywords such as "EXEC", "MULTI", "WATCH", etc. + std::string cmd_name = old_cmd->name(); + if (cmd_name == "MULTI" || cmd_name == "EXEC" || cmd_name == "WATCH") { + is_important_cmd = true; + } + + if (is_important_cmd) { + // 对于重要命令,我们保留原来的命令,并拒绝新命令 + // 恢复已移除的key映射 + std::vector old_keys = old_cmd->current_key(); + for (const auto& key : old_keys) { + key_map_[key] = it; + } + + // 从待删除列表中移除该命令 + for (auto iter = to_remove.begin(); iter != to_remove.end(); ++iter) { + if (*iter == it) { + to_remove.erase(iter); + break; + } + } + + } + } + + // 删除非重要的冲突命令 + for (auto it : to_remove) { + // 执行回调通知命令被取消 + if (it->second) { + it->second(LogOffset(), pstd::Status::Busy("Command replaced by newer command with same key")); + } + + // 从队列中移除 + pending_commands_.erase(it); + } +} + + + + + +void PikaCommandCollector::SetBatchSize(size_t batch_size) { + batch_size_.store(batch_size); + LOG(INFO) << "BatchSize set to " << batch_size; +} + +void PikaCommandCollector::SetBatchMaxWaitTime(int batch_max_wait_time) { + batch_max_wait_time_.store(batch_max_wait_time); + LOG(INFO) << "BatchMaxWaitTime set to " << batch_max_wait_time << "ms"; +} + +std::pair PikaCommandCollector::GetBatchStats() const { + return {total_processed_.load(), total_batches_.load()}; +} + +std::optional PikaCommandCollector::GetAverageBatchTime() const { + uint64_t total_batches = total_batches_.load(); + if (total_batches == 0) { + return std::nullopt; + } + return static_cast(total_batch_time_ms_.load()) / total_batches; +} + +size_t PikaCommandCollector::PendingCommands() const { + std::lock_guard lock(mutex_); + return pending_commands_.size(); +} + + + +bool PikaCommandCollector::RetryBatch( + const std::vector>& commands, + const std::vector& callbacks, + int priority) { + + if (commands.empty() || callbacks.empty() || commands.size() != callbacks.size()) { + LOG(WARNING) << "RetryBatch: Invalid input parameters, commands size: " << commands.size() + << ", callbacks size: " << callbacks.size(); + return false; + } + + std::lock_guard lock(mutex_); + + LOG(INFO) << "RetryBatch: Retrying " << commands.size() << " commands with priority " << priority; + + // Add to retry queue with priority + retry_queue_.emplace_front(std::make_tuple(priority, commands, callbacks)); + + // Update statistics + total_retries_.fetch_add(1); + + // Sort retry queue by priority (higher values first) + std::sort(retry_queue_.begin(), retry_queue_.end(), + [](const auto& a, const auto& b) { return std::get<0>(a) > std::get<0>(b); }); + + // Process highest priority retry immediately if possible + if (priority > 50 && pending_commands_.empty()) { + // Process retry queue directly + auto& [pri, cmds, cbs] = retry_queue_.front(); + + // Add each command to the queue + for (size_t i = 0; i < cmds.size(); ++i) { + if (cmds[i]) { + // Handle key conflicts + HandleConflict(cmds[i]); + + // Add to queue front for priority processing + pending_commands_.push_front(std::make_pair(cmds[i], cbs[i])); + + // Update key mapping + std::vector keys = cmds[i]->current_key(); + for (const auto& key : keys) { + key_map_[key] = pending_commands_.begin(); + } + + LOG(INFO) << "RetryBatch: Immediately requeued command " << i << ": " << cmds[i]->name(); + } + } + + // Remove from retry queue + retry_queue_.pop_front(); + + // Trigger immediate flush + FlushCommands(true); + } + + return true; +} \ No newline at end of file diff --git a/src/pika_conf.cc b/src/pika_conf.cc index 94071eac7f..a4a7178cb7 100644 --- a/src/pika_conf.cc +++ b/src/pika_conf.cc @@ -412,6 +412,20 @@ int PikaConf::Load() { max_cache_statistic_keys_ = 0; } + // 命令批处理相关配置 + std::string command_batch_enabled; + GetConfStr("command-batch-enabled", &command_batch_enabled); + command_batch_enabled_ = (command_batch_enabled == "yes"); + + GetConfInt("batch-size", &batch_size_); + if (batch_size_ <= 0) { + batch_size_ = 100; + } + + GetConfInt("batch_max_wait_time", &batch_max_wait_time_); + if (batch_max_wait_time_ <= 0) { + batch_max_wait_time_ = 5; + } // disable_auto_compactions GetConfBool("disable_auto_compactions", &disable_auto_compactions_); @@ -707,9 +721,13 @@ int PikaConf::Load() { rsync_timeout_ms_.store(tmp_rsync_timeout_ms); } - return ret; -} - + GetConfInt("replication-ack-timeout", &replication_ack_timeout_); + if (replication_ack_timeout_ <= 0) { + replication_ack_timeout_ = 5000; + } + return ret; + } + void PikaConf::TryPushDiffCommands(const std::string& command, const std::string& value) { if (!CheckConfExist(command)) { diff_commands_[command] = value; @@ -770,6 +788,9 @@ int PikaConf::ConfigRewrite() { SetConfStr("run-id", run_id_); SetConfStr("replication-id", replication_id_); SetConfInt("max-cache-statistic-keys", max_cache_statistic_keys_); + SetConfStr("command-batch-enabled", command_batch_enabled_ ? "yes" : "no"); + SetConfInt("batch-size", batch_size_); + SetConfInt("batch-max-wait-time", batch_max_wait_time_); SetConfInt("small-compaction-threshold", small_compaction_threshold_); SetConfInt("small-compaction-duration-threshold", small_compaction_duration_threshold_); SetConfInt("max-client-response-size", static_cast(max_client_response_size_)); @@ -790,6 +811,7 @@ int PikaConf::ConfigRewrite() { SetConfInt("replication-num", replication_num_.load()); SetConfStr("slow-cmd-list", pstd::Set2String(slow_cmd_set_, ',')); SetConfInt("max-conn-rbuf-size", max_conn_rbuf_size_.load()); + SetConfInt("replication-ack-timeout", replication_ack_timeout_); // options for storage engine SetConfInt("max-cache-files", max_cache_files_); SetConfInt("max-background-compactions", max_background_compactions_); diff --git a/src/pika_consensus.cc b/src/pika_consensus.cc index ef1960d589..64a0da4472 100644 --- a/src/pika_consensus.cc +++ b/src/pika_consensus.cc @@ -102,8 +102,9 @@ std::unordered_map> SyncProgress::GetAll Status SyncProgress::AddSlaveNode(const std::string& ip, int port, const std::string& db_name, int session_id) { std::string slave_key = MakeSlaveKey(ip, port); - std::shared_ptr exist_ptr = GetSlaveNode(ip, port); - if (exist_ptr) { + std::lock_guard l(rwlock_); + if (slaves_.find(slave_key) != slaves_.end()) { + std::shared_ptr exist_ptr = slaves_[slave_key]; LOG(WARNING) << "SlaveNode " << exist_ptr->ToString() << " already exist, set new session " << session_id; exist_ptr->SetSessionId(session_id); return Status::OK(); @@ -112,12 +113,11 @@ Status SyncProgress::AddSlaveNode(const std::string& ip, int port, const std::st slave_ptr->SetLastSendTime(pstd::NowMicros()); slave_ptr->SetLastRecvTime(pstd::NowMicros()); - { - std::lock_guard l(rwlock_); - slaves_[slave_key] = slave_ptr; - // add slave to match_index - match_index_[slave_key] = LogOffset(); - } + slaves_[slave_key] = slave_ptr; + // add slave to match_index + match_index_[slave_key] = LogOffset(); + + LOG(INFO) << "AddSlaveNode: Successfully added slave " << ip << ":" << port << " with session " << session_id; return Status::OK(); } @@ -228,6 +228,16 @@ ConsensusCoordinator::ConsensusCoordinator(const std::string& db_name) stable_logger_ = std::make_shared(db_name, log_path); mem_logger_ = std::make_shared(); logs_ = std::make_shared(); + + // Initialize prepared_id + LogOffset last_offset; + BinlogOffset b_offset; + Status s = stable_logger_->Logger()->GetProducerStatus(&b_offset.filenum, &b_offset.offset); + if (s.ok()) { + last_offset.b_offset = b_offset; + SetPreparedId(last_offset); + LOG(INFO) << "Init: Initialized prepared_id to " << last_offset.ToString(); + } } ConsensusCoordinator::~ConsensusCoordinator() = default; @@ -290,6 +300,7 @@ void ConsensusCoordinator::Init() { redis_parser.data = nullptr; mem_logger_->AppendLog(MemLog::LogItem(offset, cmd_ptr, nullptr, nullptr)); + logs_->AppendLog(Log::LogItem(offset, cmd_ptr, binlog)); } } @@ -335,6 +346,72 @@ Status ConsensusCoordinator::ProposeLog(const std::shared_ptr& cmd_ptr) { return Status::OK(); } +Status ConsensusCoordinator::BatchProposeLog(const std::vector>& cmd_ptrs, std::vector* offsets) { + if (cmd_ptrs.empty()) { + return Status::OK(); + } + + // Reserve space for all command offsets + offsets->resize(cmd_ptrs.size()); + + // Process each command in the batch + for (size_t i = 0; i < cmd_ptrs.size(); i++) { + const auto& cmd_ptr = cmd_ptrs[i]; + std::vector keys = cmd_ptr->current_key(); + + // Skip slot keys + if (cmd_ptr->name() == kCmdNameSAdd && !keys.empty() && + (keys[0].compare(0, SlotKeyPrefix.length(), SlotKeyPrefix) == 0 || + keys[0].compare(0, SlotTagPrefix.length(), SlotTagPrefix) == 0)) { + continue; + } + + // Generate binlog content + std::string content = cmd_ptr->ToRedisProtocol(); + std::string binlog; + + // Get offset for this command + LogOffset offset; + Status s = stable_logger_->Logger()->Put(content, &offset, binlog); + + if (!s.ok()) { + std::string db_name = cmd_ptr->db_name().empty() ? g_pika_conf->default_db() : cmd_ptr->db_name(); + std::shared_ptr db = g_pika_server->GetDB(db_name); + if (db) { + db->SetBinlogIoError(); + } + return s; + } + + // Store the offset for this command + (*offsets)[i] = offset; + + // If we're using PacificA consistency, append to logs + LOG(INFO) << "In BatchProposeLog, checking consistency mode for db " << db_name_ << ". IsConsistency: " << (GetISConsistency() ? "true" : "false"); + if (GetISConsistency()) { + LOG(INFO) << "BatchProposeLog: [Thread " << std::this_thread::get_id() << "] Adding log entry to logs_ with offset " << offset.ToString() << ", current logs size: " << logs_->Size() << ", logs_ addr: " << logs_.get(); + logs_->AppendLog(Log::LogItem(offset, cmd_ptr, binlog)); + LOG(INFO) << "BatchProposeLog: [Thread " << std::this_thread::get_id() << "] After adding, logs size: " << logs_->Size(); + } else { + LOG(INFO) << "BatchProposeLog: Consistency disabled, not adding to logs_"; + } + } + + // Signal auxiliary thread to handle these binlogs + g_pika_server->SignalAuxiliary(); + + // Master node batch Binlog flush disk - this ensures all data is persisted to disk + stable_logger_->Logger()->Sync(); + + // 成功提交日志后,更新prepared_id为最后一个日志的offset + if (!offsets->empty()) { + SetPreparedId(offsets->back()); + LOG(INFO) << "BatchProposeLog: Updated prepared_id to " << offsets->back().ToString(); + } + + return Status::OK(); +} + Status ConsensusCoordinator::InternalAppendLog(const std::shared_ptr& cmd_ptr) { return InternalAppendBinlog(cmd_ptr); } @@ -380,6 +457,15 @@ Status ConsensusCoordinator::UpdateSlave(const std::string& ip, int port, const } { std::lock_guard l(slave_ptr->slave_mu); + // Check if the received ACK is stale + if (end < slave_ptr->acked_offset) { + LOG(WARNING) << "Received older ACK from slave - ip: " << ip << ", port: " << port + << ", received: " << end.ToString() + << ", current: " << slave_ptr->acked_offset.ToString(); + // Ignore the stale ACK but do not return an error + return Status::OK(); + } + // Update the slave's acknowledged offset slave_ptr->acked_offset = end; sync_pros_.AddMatchIndex(ip, port, slave_ptr->acked_offset); LOG(INFO) << "PacificA slave ip: " << ip << ", port :" << port << ",slave acked_offset " @@ -823,6 +909,7 @@ bool ConsensusCoordinator::checkFinished(const LogOffset& offset) { //// pacificA private: Status ConsensusCoordinator::PersistAppendBinlog(const std::shared_ptr& cmd_ptr, LogOffset& cur_offset) { + std::lock_guard l(order_mu_); std::string content = cmd_ptr->ToRedisProtocol(); std::string binlog = std::string(); LogOffset offset = LogOffset(); @@ -868,9 +955,23 @@ Status ConsensusCoordinator::AppendEntries(const std::shared_ptr& cmd_ptr, } Status ConsensusCoordinator::AppendSlaveEntries(const std::shared_ptr& cmd_ptr, const BinlogItem& attribute) { LogOffset last_index = logs_->LastOffset(); + // Check for index gaps but allow normal continuous log appending if (attribute.logic_id() < last_index.l_offset.index) { + // If the log index gap is too large, it may be due to expired logs LOG(WARNING) << DBInfo(db_name_).ToString() << "Drop log from leader logic_id " << attribute.logic_id() << " cur last index " << last_index.l_offset.index; + // log the details of the received log + LOG(INFO) << "Leader log details: filenum: " << attribute.filenum() + << " offset: " << attribute.offset() + << " term: " << attribute.term_id() + << " logic_id: " << attribute.logic_id(); + return Status::InvalidArgument("log expired"); + } + + // 允许接收与当前索引相同的日志(防止重复发送的情况) + if (attribute.logic_id() == last_index.l_offset.index) { + LOG(INFO) << DBInfo(db_name_).ToString() << "Received duplicate log with logic_id " + << attribute.logic_id() << ", ignored"; return Status::OK(); } LogOffset offset = LogOffset(); @@ -885,46 +986,187 @@ Status ConsensusCoordinator::AppendSlaveEntries(const std::shared_ptr& cmd_ * @brief Commit logs up to the given offset and update the committed ID. */ Status ConsensusCoordinator::CommitAppLog(const LogOffset& master_committed_id) { - int index = logs_->FindOffset(logs_->FirstOffset()); - int log_size = logs_->Size(); // Cache log size - for (int i = index; i < log_size; ++i) { - Log::LogItem log = logs_->At(i); - if (master_committed_id >= log.offset) { - LOG(INFO) << "PacificA master_committed_id: " << master_committed_id.ToString() - << ", ApplyLog: " << log.offset.ToString(); - ApplyBinlog(log.cmd_ptr); + LOG(INFO) << "CommitAppLog: Starting - master_committed_id: " << master_committed_id.ToString() + << ", db_name: " << db_name_; + + // 获取当前的committed_id + LogOffset current_committed_id = GetCommittedId(); + + // 如果收到的master_committed_id小于当前的committed_id,记录日志但不更新 + if (master_committed_id < current_committed_id) { + LOG(WARNING) << "CommitAppLog: Received master_committed_id " << master_committed_id.ToString() + << " is less than current committed_id " << current_committed_id.ToString() + << ", ignoring update"; + return Status::OK(); + } + + // 如果收到的master_committed_id等于当前的committed_id,无需更新 + if (master_committed_id == current_committed_id) { + LOG(INFO) << "CommitAppLog: Received master_committed_id equals current committed_id: " + << current_committed_id.ToString() << ", no update needed"; + return Status::OK(); + } + + // 记录日志条目数量和第一个条目的偏移量 + int logs_size = logs_->Size(); + LogOffset first_offset = logs_->FirstOffset(); + LOG(INFO) << "CommitAppLog: Found " << logs_size << " logs, starting from index 0, first_offset: " << first_offset.ToString(); + + // 应用所有小于等于master_committed_id的日志条目 + int applied_count = 0; + for (int i = 0; i < logs_size; ++i) { + Log::LogItem item = logs_->At(i); + + // 如果日志条目的偏移量大于master_committed_id,跳过 + if (item.offset > master_committed_id) { + LOG(INFO) << "CommitAppLog: Skipping log at offset " << item.offset.ToString() + << " because it's beyond committed_id: " << master_committed_id.ToString(); + continue; } + + // 如果日志条目的偏移量小于等于当前的committed_id,说明已经应用过,跳过 + if (item.offset <= current_committed_id) { + LOG(INFO) << "CommitAppLog: Skipping log at offset " << item.offset.ToString() + << " because it's already committed: " << current_committed_id.ToString(); + continue; + } + + // 应用日志条目 + LOG(INFO) << "CommitAppLog: Applying log at offset " << item.offset.ToString(); + Status s = ApplyBinlog(item.cmd_ptr); + if (!s.ok()) { + LOG(WARNING) << "CommitAppLog: Failed to apply log at offset " << item.offset.ToString() + << ", error: " << s.ToString(); + return s; + } + applied_count++; } - logs_->TruncateFrom(master_committed_id); // Truncate logs - SetCommittedId(master_committed_id); // Update committed ID + LOG(INFO) << "CommitAppLog: Applied " << applied_count << " logs"; + + // 更新committed_id + if (master_committed_id > current_committed_id) { + LOG(INFO) << "CommitAppLog: Updated committed_id from " << current_committed_id.ToString() + << " to " << master_committed_id.ToString(); + SetCommittedId(master_committed_id); + } + return Status::OK(); } /** - * @brief Update the committed ID based on the Prepared ID of the slave + * @brief Update the committed ID based on the acknowledged offsets of all slaves. */ Status ConsensusCoordinator::UpdateCommittedID() { + // 获取当前的prepared_id + LogOffset current_prepared_id; + { + std::lock_guard l(prepared_id__rwlock_); + current_prepared_id = prepared_id_; + } + + // 获取当前的committed_id + LogOffset current_committed_id; + { + std::lock_guard l(committed_id_rwlock_); + current_committed_id = committed_id_; + } + std::unordered_map> slaves = sync_pros_.GetAllSlaveNodes(); - LogOffset slave_prepared_id = LogOffset(); - - for (const auto& slave : slaves) { - if (slave.second->slave_state == kSlaveBinlogSync) { - if (slave_prepared_id == LogOffset()) { - slave_prepared_id = slave.second->acked_offset; - } else if (slave.second->acked_offset < slave_prepared_id) { - slave_prepared_id = slave.second->acked_offset; + LogOffset min_acked_offset; + int active_slave_count = 0; + int total_slave_count = slaves.size(); + int binlog_sync_slave_count = 0; + + // 初始化min_acked_offset为主节点的prepared_id + min_acked_offset = current_prepared_id; + + LOG(INFO) << "UpdateCommittedID: Master prepared_id: " << current_prepared_id.ToString() + << ", current_committed_id: " << current_committed_id.ToString() + << ", Total slaves: " << total_slave_count; + + // 如果没有从节点或没有活跃从节点,且prepared_id > committed_id,直接更新committed_id + if (total_slave_count == 0 && current_prepared_id > current_committed_id) { + LOG(INFO) << "UpdateCommittedID: No slaves, updating committed_id to prepared_id: " + << current_prepared_id.ToString(); + SetCommittedId(current_prepared_id); + return Status::OK(); + } + + // 查找所有活跃从节点中的最小acked_offset + for (const auto& slave_pair : slaves) { + const auto& slave = slave_pair.second; + // 记录从节点的状态和acked_offset,无论状态如何 + LOG(INFO) << "UpdateCommittedID: Slave " << slave->Ip() << ":" << slave->Port() + << " state: " << SlaveStateMsg[slave->slave_state] + << " acked_offset: " << slave->acked_offset.ToString(); + + // 考虑所有状态的从节点,只要它们有有效的acked_offset + if (slave->acked_offset.IsValid()) { + if (active_slave_count == 0) { + min_acked_offset = slave->acked_offset; + } else { + if (slave->acked_offset < min_acked_offset) { + min_acked_offset = slave->acked_offset; + } + } + active_slave_count++; + + // 如果是BinlogSync或Candidate状态,计数 + if (slave->slave_state == kSlaveBinlogSync || slave->slave_state == KCandidate) { + binlog_sync_slave_count++; } } } - if (slave_prepared_id < GetCommittedId()) { - LOG(WARNING) << "Error: slave_prepared_id (" << slave_prepared_id.ToString() << ") < master_committedId (" - << GetCommittedId().ToString() << ")"; - return Status::Error("slave_prepared_id < master_committedId"); + + LOG(INFO) << "UpdateCommittedID: Active slaves: " << active_slave_count + << ", Total slaves: " << total_slave_count + << ", BinlogSync slaves: " << binlog_sync_slave_count; + + // 确定新的committed_id + LogOffset new_committed_id; + if (active_slave_count == 0) { + // 如果没有活跃的从节点,使用prepared_id作为new_committed_id + new_committed_id = current_prepared_id; + LOG(INFO) << "UpdateCommittedID: No active slaves, using prepared_id: " << current_prepared_id.ToString(); + } else if (active_slave_count < total_slave_count) { + // 如果有部分从节点活跃,使用min_acked_offset作为new_committed_id + new_committed_id = min_acked_offset; + LOG(INFO) << "UpdateCommittedID: Partial slaves acknowledged (" << active_slave_count << "/" << total_slave_count + << "), using minimum acked offset: " << min_acked_offset.ToString(); + } else { + // 如果所有从节点都活跃,使用min_acked_offset作为new_committed_id + new_committed_id = min_acked_offset; + LOG(INFO) << "UpdateCommittedID: All slaves acknowledged (" << active_slave_count << "/" << total_slave_count + << "), using minimum acked offset: " << min_acked_offset.ToString(); + } + + // 确保new_committed_id不低于当前的committed_id + if (new_committed_id < current_committed_id) { + LOG(WARNING) << "UpdateCommittedID: New committed_id " << new_committed_id.ToString() + << " is less than current committed_id " << current_committed_id.ToString() + << ", keeping current value"; + new_committed_id = current_committed_id; + } + + // 确保new_committed_id不超过prepared_id + if (new_committed_id > current_prepared_id) { + LOG(WARNING) << "UpdateCommittedID: new_committed_id " << new_committed_id.ToString() + << " exceeds prepared_id " << current_prepared_id.ToString() + << ", adjusting to prepared_id"; + new_committed_id = current_prepared_id; + } + + // 如果committed_id有变化,更新它 + if (new_committed_id > current_committed_id) { + LOG(INFO) << "UpdateCommittedID: Updating committed_id from " << current_committed_id.ToString() + << " to " << new_committed_id.ToString(); + SetCommittedId(new_committed_id); + return Status::OK(); + } else { + LOG(INFO) << "UpdateCommittedID: No update needed, current committed ID: " << current_committed_id.ToString(); + return Status::OK(); } - SetCommittedId(slave_prepared_id); - LOG(INFO) << "PacificA update CommittedID: " << GetCommittedId().ToString(); - return Status::OK(); } Status ConsensusCoordinator::ProcessCoordination() { LogOffset offset = LogOffset(); @@ -966,29 +1208,98 @@ Status ConsensusCoordinator::ApplyBinlog(const std::shared_ptr& cmd_ptr) { Status ConsensusCoordinator::SendBinlog(std::shared_ptr slave_ptr, std::string db_name) { std::vector tasks; + const int MAX_BATCH_SIZE = 100; // Maximum number of logs to send in a single batch + + // Get current committed_id to ensure it's sent to the slave + LogOffset current_committed_id = GetCommittedId(); + LOG(INFO) << "SendBinlog: [Thread " << std::this_thread::get_id() << "] Current committed_id: " << current_committed_id.ToString() + << ", sending to slave " << slave_ptr->Ip() << ":" << slave_ptr->Port() + << ", logs_ addr: " << logs_.get() << ", db_name: " << db_name_; // Check if there are new log entries that need to be sent to the slave - if (logs_->LastOffset() >= slave_ptr->acked_offset) { + LOG(INFO) << "SendBinlog: logs_->LastOffset()=" << logs_->LastOffset().ToString() + << ", slave_ptr->acked_offset=" << slave_ptr->acked_offset.ToString() + << ", logs_->Size()=" << logs_->Size(); + + if (logs_->Size() > 0 && logs_->LastOffset() >= slave_ptr->acked_offset) { // Find the index of the log entry corresponding to the slave's acknowledged offset int index = logs_->FindOffset(slave_ptr->acked_offset); + int entries_to_send = logs_->Size() - index; + LOG(INFO) << "SendBinlog: Found " << entries_to_send << " new log entries to send, " + << "starting from index " << index << " of " << logs_->Size(); + if (index < logs_->Size()) { - for (int i = index; i < logs_->Size(); ++i) { - const Log::LogItem& item = logs_->At(i); - - slave_ptr->SetLastSendTime(pstd::NowMicros()); - - RmNode rm_node(slave_ptr->Ip(), slave_ptr->Port(), slave_ptr->DBName(), slave_ptr->SessionId()); - WriteTask task(rm_node, BinlogChip(item.offset, item.binlog_), slave_ptr->sent_offset, GetCommittedId()); - tasks.emplace_back(std::move(task)); - - slave_ptr->sent_offset = item.offset; + // Send log entries in optimized batches + RmNode rm_node(slave_ptr->Ip(), slave_ptr->Port(), db_name, slave_ptr->SessionId()); + + // For large batches, use specialized batch handling + if (entries_to_send > MAX_BATCH_SIZE) { + LOG(INFO) << "SendBinlog: Using optimized batch sending for " << entries_to_send << " entries"; + + // Process in chunks of MAX_BATCH_SIZE + for (int batch_start = index; batch_start < logs_->Size(); batch_start += MAX_BATCH_SIZE) { + int batch_end = std::min(batch_start + MAX_BATCH_SIZE, logs_->Size()); + std::string combined_binlog; + LogOffset first_offset, last_offset; + + // Combine binlogs in the batch + for (int i = batch_start; i < batch_end; ++i) { + Log::LogItem item = logs_->At(i); + if (i == batch_start) first_offset = item.offset; + if (i == batch_end - 1) last_offset = item.offset; + combined_binlog += item.binlog_; + } + + // Create a batch task with the combined binlog + // The offset field contains the first offset in the batch + // The task knows this is a batch through a special flag + WriteTask batch_task(rm_node, BinlogChip(first_offset, combined_binlog, true), + last_offset, current_committed_id); + tasks.push_back(batch_task); + + LOG(INFO) << "SendBinlog: Created batch from index " << batch_start + << " to " << (batch_end-1) << " with offsets " + << first_offset.ToString() << " to " << last_offset.ToString(); + } + } else { + // For small batches, use the original approach + for (int i = index; i < logs_->Size(); ++i) { + Log::LogItem item = logs_->At(i); + WriteTask task(rm_node, BinlogChip(item.offset, item.binlog_), item.offset, current_committed_id); + tasks.push_back(task); + } } + } else { + LOG(INFO) << "SendBinlog: No new log entries to send, index " << index << " is out of range (logs size: " << logs_->Size() << ")"; + } + } else { + if (logs_->Size() == 0) { + LOG(INFO) << "SendBinlog: No logs available yet (logs_->Size()=0), will send empty binlog to maintain connection"; + } else { + LOG(INFO) << "SendBinlog: Slave is already up to date, last offset: " << logs_->LastOffset().ToString() + << ", slave acked offset: " << slave_ptr->acked_offset.ToString(); } } + // Even if there are no new log entries, send an empty binlog with the current committed_id + // This ensures slaves always receive the latest committed_id + if (tasks.empty()) { + LOG(INFO) << "SendBinlog: Sending empty binlog with current committed_id: " << current_committed_id.ToString(); + RmNode rm_node(slave_ptr->Ip(), slave_ptr->Port(), db_name, slave_ptr->SessionId()); + // Create an empty WriteTask that includes the current committed_id + WriteTask empty_task(rm_node, BinlogChip(LogOffset(), ""), LogOffset(), current_committed_id); + tasks.push_back(empty_task); + } + + // Send the tasks to the slave if (!tasks.empty()) { + LOG(INFO) << "SendBinlog: Sending " << tasks.size() << " tasks to slave " << slave_ptr->Ip() << ":" << slave_ptr->Port(); + extern std::unique_ptr g_pika_rm; g_pika_rm->ProduceWriteQueue(slave_ptr->Ip(), slave_ptr->Port(), db_name, tasks); + } else { + LOG(INFO) << "SendBinlog: No tasks to send to slave " << slave_ptr->Ip() << ":" << slave_ptr->Port(); } + return Status::OK(); } diff --git a/src/pika_repl_bgworker.cc b/src/pika_repl_bgworker.cc index 5340533160..ca7633cff3 100644 --- a/src/pika_repl_bgworker.cc +++ b/src/pika_repl_bgworker.cc @@ -5,12 +5,16 @@ #include "include/pika_repl_bgworker.h" +#include +#include +#include #include #include "include/pika_cmd_table_manager.h" #include "include/pika_conf.h" #include "include/pika_rm.h" #include "include/pika_server.h" +#include "pstd/include/pstd_string.h" #include "pstd/include/pstd_defer.h" #include "src/pstd/include/scope_record_lock.h" #include "include/pika_conf.h" @@ -53,6 +57,10 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { PikaReplBgWorker* worker = task_arg->worker; worker->ip_port_ = conn->ip_port(); + LOG(INFO) << "HandleBGWorkerWriteBinlog: Received binlog from master " << worker->ip_port_ + << ", index size: " << index->size() + << ", binlog_sync_size: " << res->binlog_sync_size(); + DEFER { delete index; delete task_arg; @@ -94,8 +102,10 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { if (only_keepalive) { ack_start = LogOffset(); + ack_end = LogOffset(); } else { ack_start = pb_begin; + ack_end = pb_end; } // because DispatchBinlogRes() have been order them. @@ -115,6 +125,7 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { return; } + int processed_count = 0; for (int i : *index) { const InnerMessage::InnerResponse::BinlogSync& binlog_res = res->binlog_sync(i); // if pika are not current a slave or DB not in @@ -129,8 +140,8 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { << slave_db->MasterPort() << ", " << slave_db->SyncDBInfo().ToString() << " expected_session: " << binlog_res.session_id() << ", actual_session:" << slave_db->MasterSessionId(); - LOG(WARNING) << "Check Session failed " << binlog_res.slot().db_name(); - slave_db->SetReplState(ReplState::kTryConnect); + LOG(WARNING) << "Check Session failed " << binlog_res.slot().db_name() << ", setting to kError (no auto-reconnect)"; + slave_db->SetReplState(ReplState::kError); return; } if(db->GetISConsistency()){ @@ -143,23 +154,96 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { } // empty binlog treated as keepalive packet if (binlog_res.binlog().empty()) { + LOG(INFO) << "HandleBGWorkerWriteBinlog: Skipping empty binlog (keepalive packet) at index " << i; + continue; + } + // Handle binlog data with batch magic number detection + const std::string& received_binlog = binlog_res.binlog(); + std::string binlog_str = received_binlog; + + // Check if this is the first binlog entry in a batch (contains PIKA_BATCH_MAGIC) + bool has_batch_magic = false; + if (i == (*index)[0] && received_binlog.size() >= sizeof(uint32_t)) { + uint32_t magic_num = 0; + memcpy(&magic_num, received_binlog.data(), sizeof(uint32_t)); + if (magic_num == PIKA_BATCH_MAGIC) { + has_batch_magic = true; + // Remove the magic number from the binlog data + binlog_str = received_binlog.substr(sizeof(uint32_t)); + LOG(INFO) << "HandleBGWorkerWriteBinlog: Detected PIKA_BATCH_MAGIC in binlog entry " << i + << ", processing as batch, original size: " << received_binlog.size() + << ", new size after removing magic: " << binlog_str.size(); + } + } + + // Validate binlog data + if (binlog_str.empty()) { + LOG(WARNING) << "HandleBGWorkerWriteBinlog: Empty binlog data after processing"; continue; } - if (!PikaBinlogTransverter::BinlogItemWithoutContentDecode(TypeFirst, binlog_res.binlog(), &worker->binlog_item_)) { - LOG(WARNING) << "Binlog item decode failed"; + + if (binlog_str.size() < BINLOG_ENCODE_LEN) { + LOG(WARNING) << "HandleBGWorkerWriteBinlog: Binlog data too small (" << binlog_str.size() + << " bytes), minimum required: " << BINLOG_ENCODE_LEN; + continue; + } + + // Decode the binlog item + if (!PikaBinlogTransverter::BinlogItemWithoutContentDecode(TypeFirst, binlog_str, &worker->binlog_item_)) { + LOG(WARNING) << "HandleBGWorkerWriteBinlog: Binlog item decode failed for entry " << i; slave_db->SetReplState(ReplState::kTryConnect); return; } - const char* redis_parser_start = binlog_res.binlog().data() + BINLOG_ENCODE_LEN; - int redis_parser_len = static_cast(binlog_res.binlog().size()) - BINLOG_ENCODE_LEN; + + // Extract Redis command data + const char* redis_parser_start = binlog_str.data() + BINLOG_ENCODE_LEN; + int redis_parser_len = static_cast(binlog_str.size()) - BINLOG_ENCODE_LEN; + + if (redis_parser_len <= 0) { + LOG(WARNING) << "HandleBGWorkerWriteBinlog: No Redis command data after binlog header for entry " << i; + continue; + } + + // Validate Redis protocol data + if (redis_parser_start[0] != '*' && redis_parser_start[0] != '+' && + redis_parser_start[0] != '-' && redis_parser_start[0] != ':' && + redis_parser_start[0] != '$') { + LOG(WARNING) << "HandleBGWorkerWriteBinlog: Invalid Redis protocol start character: " + << static_cast(redis_parser_start[0]) << " for binlog entry " << i; + continue; + } + + // Create a new parser instance for each binlog entry to ensure clean state + net::RedisParser temp_parser; + net::RedisParserSettings settings; + settings.DealMessage = &(PikaReplBgWorker::HandleWriteBinlog); + temp_parser.RedisParserInit(REDIS_PARSER_REQUEST, settings); + temp_parser.data = worker; + int processed_len = 0; net::RedisParserStatus ret = - worker->redis_parser_.ProcessInputBuffer(redis_parser_start, redis_parser_len, &processed_len); + temp_parser.ProcessInputBuffer(redis_parser_start, redis_parser_len, &processed_len); + if (ret != net::kRedisParserDone) { - LOG(WARNING) << "Redis parser failed"; - slave_db->SetReplState(ReplState::kTryConnect); - return; - } + LOG(WARNING) << "HandleBGWorkerWriteBinlog: Redis parser failed for entry " << i + << ", status: " << static_cast(ret) + << ", processed: " << processed_len << " of " << redis_parser_len << " bytes" + << (has_batch_magic ? " (batch mode)" : ""); + // For batch processing, continue with other entries instead of failing completely + if (has_batch_magic && (*index).size() > 1) { + LOG(INFO) << "HandleBGWorkerWriteBinlog: Continuing with next entry after parser error in batch"; + continue; + } else { + slave_db->SetReplState(ReplState::kTryConnect); + return; + } + } + + // Successfully processed this binlog entry + processed_count++; + LOG(INFO) << "HandleBGWorkerWriteBinlog: Successfully processed binlog entry " << i + << (has_batch_magic ? " (batch mode)" : ""); + db = g_pika_rm->GetSyncMasterDBByName(DBInfo(worker->db_name_)); if (!db) { LOG(WARNING) << "DB " << worker->db_name_ << " Not Found"; @@ -169,17 +253,30 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { if (only_keepalive) { ack_end = LogOffset(); + LOG(INFO) << "HandleBGWorkerWriteBinlog: Only keepalive packets received, sending empty ACK"; + // Log more detailed debug information about the keepalive situation + LOG(WARNING) << "HandleBGWorkerWriteBinlog: No binlog entries were processed. This may indicate an issue with binlog transmission or format."; + LOG(INFO) << "HandleBGWorkerWriteBinlog: Total binlog entries received: " << index->size() + << ", processed: " << processed_count; } else { - LogOffset productor_status; - // Reply Ack to master immediately - std::shared_ptr logger = db->Logger(); - logger->GetProducerStatus(&productor_status.b_offset.filenum, &productor_status.b_offset.offset, - &productor_status.l_offset.term, &productor_status.l_offset.index); - ack_end = productor_status; - ack_end.l_offset.term = pb_end.l_offset.term; + ack_end = pb_end; + LOG(INFO) << "HandleBGWorkerWriteBinlog: Processed " << processed_count << " binlog entries, sending ACK" + << " for " << db_name + << ", ack_start: " << ack_start.ToString() + << ", ack_end: " << ack_end.ToString(); } - g_pika_rm->SendBinlogSyncAckRequest(db_name, ack_start, ack_end); + // Get current slave DB state before sending ACK + ReplState current_state = slave_db->State(); + LOG(INFO) << "HandleBGWorkerWriteBinlog: Current slave state before sending ACK: " + << static_cast(current_state) + << " (" << (current_state == ReplState::kConnected ? "Connected" : + (current_state == ReplState::kWaitDBSync ? "WaitDBSync" : "Other")) << ")"; + + // Send the ACK with detailed logging + Status ack_status = g_pika_rm->SendBinlogSyncAckRequest(db_name, ack_start, ack_end); + LOG(INFO) << "HandleBGWorkerWriteBinlog: ACK send result: " + << (ack_status.ok() ? "Success" : "Failed: " + ack_status.ToString()); } int PikaReplBgWorker::HandleWriteBinlog(net::RedisParser* parser, const net::RedisCmdArgsType& argv) { @@ -217,17 +314,32 @@ int PikaReplBgWorker::HandleWriteBinlog(net::RedisParser* parser, const net::Red LOG(WARNING) << worker->db_name_ << "Not found."; return -1; } - if(db->GetISConsistency()){ - db->AppendSlaveEntries(c_ptr, worker->binlog_item_); - }else{ + if (db->GetISConsistency()) { + Status s = db->AppendSlaveEntries(c_ptr, worker->binlog_item_); + if (s.IsInvalidArgument()) { + // This happens when the slave receives a binlog with an old index, + // which is likely caused by a lost ACK. + // We should resend our latest ACK to help the master correct its state. + LogOffset last_offset; + db->Logger()->GetProducerStatus(&last_offset.b_offset.filenum, &last_offset.b_offset.offset, &last_offset.l_offset.term, &last_offset.l_offset.index); + LOG(INFO) << "Resending ACK for " << worker->db_name_ << " because of expired log. ACK offset: " << last_offset.ToString(); + g_pika_rm->SendBinlogSyncAckRequest(worker->db_name_, last_offset, last_offset); + } + } else { db->ConsensusProcessLeaderLog(c_ptr, worker->binlog_item_); } + + // log details of the binlog item + LOG(INFO) << "Current binlog item from master - term: " << worker->binlog_item_.term_id() + << " logic_id: " << worker->binlog_item_.logic_id() + << " filenum: " << worker->binlog_item_.filenum() + << " offset: " << worker->binlog_item_.offset(); return 0; } void PikaReplBgWorker::HandleBGWorkerWriteDB(void* arg) { - std::unique_ptr task_arg(static_cast(arg)); - const std::shared_ptr c_ptr = task_arg->cmd_ptr; + std::unique_ptr> cmd_ptr_ptr(static_cast*>(arg)); + const std::shared_ptr c_ptr = *cmd_ptr_ptr; WriteDBInSyncWay(c_ptr); } diff --git a/src/pika_repl_client.cc b/src/pika_repl_client.cc index 80b9b4b7bb..80dc7f1ced 100644 --- a/src/pika_repl_client.cc +++ b/src/pika_repl_client.cc @@ -119,18 +119,53 @@ void PikaReplClient::ScheduleWriteBinlogTask(const std::string& db_name, } void PikaReplClient::ScheduleWriteDBTask(const std::shared_ptr& cmd_ptr, const std::string& db_name) { + std::lock_guard lock(unfinished_async_write_db_tasks_mu_); const PikaCmdArgsType& argv = cmd_ptr->argv(); std::string dispatch_key = argv.size() >= 2 ? argv[1] : argv[0]; size_t index = GetHashIndexByKey(dispatch_key); - auto task_arg = new ReplClientWriteDBTaskArg(cmd_ptr); + auto task_arg = new std::shared_ptr(cmd_ptr); - IncrAsyncWriteDBTaskCount(db_name, 1); - std::function task_finish_call_back = [this, db_name]() { this->DecrAsyncWriteDBTaskCount(db_name, 1); }; + unfinished_async_write_db_tasks_[db_name]++; + LOG(INFO) << "Scheduling WriteDB task for db " << db_name << ", command: " << argv[0] << ". Unfinished tasks: " + << unfinished_async_write_db_tasks_[db_name]; + std::function task_finish_call_back = [this, db_name]() { this->SignalAsyncWriteDBTaskEnd(db_name); }; write_db_workers_[index]->Schedule(&PikaReplBgWorker::HandleBGWorkerWriteDB, static_cast(task_arg), task_finish_call_back); } +int32_t PikaReplClient::GetUnfinishedAsyncWriteDBTaskCount(const std::string& db_name) { + std::lock_guard lock(unfinished_async_write_db_tasks_mu_); + if (unfinished_async_write_db_tasks_.find(db_name) == unfinished_async_write_db_tasks_.end()) { + return 0; + } + return unfinished_async_write_db_tasks_.at(db_name); +} + +void PikaReplClient::SignalAsyncWriteDBTaskEnd(const std::string& db_name) { + std::lock_guard lock(unfinished_async_write_db_tasks_mu_); + if (unfinished_async_write_db_tasks_.find(db_name) != unfinished_async_write_db_tasks_.end()) { + unfinished_async_write_db_tasks_[db_name]--; + LOG(INFO) << "Finished WriteDB task for db " << db_name << ". Unfinished tasks: " << unfinished_async_write_db_tasks_[db_name]; + if (unfinished_async_write_db_tasks_[db_name] == 0) { + LOG(INFO) << "All WriteDB tasks finished for db " << db_name << ". Notifying waiting threads."; + async_write_db_tasks_cond_.notify_all(); + } + } +} + +void PikaReplClient::WaitForAsyncWriteDBTaskEnd(const std::string& db_name) { + std::unique_lock lock(unfinished_async_write_db_tasks_mu_); + if (unfinished_async_write_db_tasks_.count(db_name) && unfinished_async_write_db_tasks_[db_name] > 0) { + LOG(INFO) << "Waiting for " << unfinished_async_write_db_tasks_[db_name] + << " async write DB tasks to end for db " << db_name; + } + while (unfinished_async_write_db_tasks_.count(db_name) && unfinished_async_write_db_tasks_[db_name] > 0) { + async_write_db_tasks_cond_.wait(lock); + } + LOG(INFO) << "Finished waiting for async write DB tasks for db " << db_name; +} + size_t PikaReplClient::GetBinlogWorkerIndexByDBName(const std::string &db_name) { char db_num_c = db_name.back(); int32_t db_num = db_num_c - '0'; @@ -144,6 +179,10 @@ size_t PikaReplClient::GetBinlogWorkerIndexByDBName(const std::string &db_name) return db_num % write_binlog_workers_.size(); } +size_t PikaReplClient::GetDBWorkerIndexByDBName(const std::string& db_name) { + return std::hash()(db_name) % write_db_workers_.size(); +} + size_t PikaReplClient::GetHashIndexByKey(const std::string& key) { size_t hash_base = write_db_workers_.size(); return (str_hash(key) % hash_base); diff --git a/src/pika_repl_server.cc b/src/pika_repl_server.cc index c8f1c9f9dc..66f3df9a90 100644 --- a/src/pika_repl_server.cc +++ b/src/pika_repl_server.cc @@ -53,6 +53,7 @@ int PikaReplServer::Stop() { pstd::Status PikaReplServer::SendSlaveBinlogChips(const std::string& ip, int port, const std::vector& tasks) { + LOG(INFO) << "SendSlaveBinlogChips: Preparing to send " << tasks.size() << " tasks to " << ip << ":" << port; InnerMessage::InnerResponse response; BuildBinlogSyncResp(tasks, &response); @@ -77,7 +78,10 @@ pstd::Status PikaReplServer::SendSlaveBinlogChips(const std::string& ip, int por } return pstd::Status::OK(); } - return Write(ip, port, binlog_chip_pb); + LOG(INFO) << "SendSlaveBinlogChips: Calling Write to send " << binlog_chip_pb.size() << " bytes to " << ip << ":" << port; + pstd::Status result = Write(ip, port, binlog_chip_pb); + LOG(INFO) << "SendSlaveBinlogChips: Write result: " << (result.ok() ? "SUCCESS" : result.ToString()); + return result; } void PikaReplServer::BuildBinlogOffset(const LogOffset& offset, InnerMessage::BinlogOffset* boffset) { @@ -87,12 +91,26 @@ void PikaReplServer::BuildBinlogOffset(const LogOffset& offset, InnerMessage::Bi boffset->set_index(offset.l_offset.index); } -void PikaReplServer::BuildBinlogSyncResp(const std::vector& tasks, InnerMessage::InnerResponse* response) { - response->set_code(InnerMessage::kOk); +void PikaReplServer::BuildBinlogSyncResp(const std::vector& tasks, InnerMessage::InnerResponse* response){ + if (tasks.empty()) { + return; + } + + LOG(INFO) << "BuildBinlogSyncResp: Building response for " << tasks.size() << " tasks"; + response->set_type(InnerMessage::Type::kBinlogSync); - for (const auto& task : tasks) { + response->set_code(InnerMessage::kOk); + + // Add batch magic number if there are multiple tasks + bool is_batch = tasks.size() > 1; + LOG(INFO) << "BuildBinlogSyncResp: is_batch=" << (is_batch ? "true" : "false") << ", batch_size=" << tasks.size(); + + for (size_t task_idx = 0; task_idx < tasks.size(); task_idx++) { + const auto& task = tasks[task_idx]; InnerMessage::InnerResponse::BinlogSync* binlog_sync = response->add_binlog_sync(); - binlog_sync->set_session_id(task.rm_node_.SessionId()); + const RmNode& node = task.rm_node_; + binlog_sync->set_session_id(node.SessionId()); + InnerMessage::Slot* db = binlog_sync->mutable_slot(); db->set_db_name(task.rm_node_.DBName()); /* @@ -103,11 +121,57 @@ void PikaReplServer::BuildBinlogSyncResp(const std::vector& tasks, In db->set_slot_id(0); InnerMessage::BinlogOffset* boffset = binlog_sync->mutable_binlog_offset(); BuildBinlogOffset(task.binlog_chip_.offset_, boffset); - if(g_pika_server->IsConsistency()){ + + LOG(INFO) << "BuildBinlogSyncResp: Task " << task_idx << " offset=" << task.binlog_chip_.offset_.ToString() + << " binlog_size=" << task.binlog_chip_.binlog_.size(); + + // Always add committed_id, regardless of strong consistency mode InnerMessage::BinlogOffset* committed_id = binlog_sync->mutable_committed_id(); BuildBinlogOffset(task.committed_id_, committed_id); + LOG(INFO) << "BuildBinlogSyncResp: Task " << task_idx << " committed_id=" << task.committed_id_.ToString(); + + // For batch binlog transmission, add PIKA_BATCH_MAGIC at the beginning of the first binlog entry + if (is_batch && binlog_sync == response->mutable_binlog_sync(0)) { + // Prepend the magic number to indicate this is a batch + std::string magic_binlog; + magic_binlog.resize(sizeof(uint32_t)); + memcpy(&magic_binlog[0], &PIKA_BATCH_MAGIC, sizeof(uint32_t)); + + // Log the magic number as hex for debugging + LOG(INFO) << "BuildBinlogSyncResp: Adding magic number: 0x" + << std::hex << PIKA_BATCH_MAGIC << std::dec; + + // Check if binlog is empty before appending + if (task.binlog_chip_.binlog_.empty()) { + LOG(WARNING) << "BuildBinlogSyncResp: WARNING - Empty binlog content in batch task " << task_idx; + } + + magic_binlog.append(task.binlog_chip_.binlog_); + binlog_sync->set_binlog(magic_binlog); + + // Detailed logging of the binlog content + LOG(INFO) << "BuildBinlogSyncResp: Added PIKA_BATCH_MAGIC (0x" << std::hex << PIKA_BATCH_MAGIC << std::dec + << ") to first binlog in batch of size " << tasks.size() + << ", original size=" << task.binlog_chip_.binlog_.size() + << ", new size=" << magic_binlog.size(); + + // Verify the magic number was correctly added by reading it back + if (magic_binlog.size() >= sizeof(uint32_t)) { + uint32_t verification = 0; + memcpy(&verification, magic_binlog.data(), sizeof(uint32_t)); + LOG(INFO) << "BuildBinlogSyncResp: Verified magic number in prepared binlog: 0x" + << std::hex << verification << std::dec; + } + } else { + // Check if binlog is empty before setting + if (task.binlog_chip_.binlog_.empty()) { + LOG(WARNING) << "BuildBinlogSyncResp: WARNING - Empty binlog content in regular task " << task_idx; + } + + binlog_sync->set_binlog(task.binlog_chip_.binlog_); + LOG(INFO) << "BuildBinlogSyncResp: Regular binlog for task " << task_idx + << ", size=" << task.binlog_chip_.binlog_.size(); } - binlog_sync->set_binlog(task.binlog_chip_.binlog_); } } diff --git a/src/pika_repl_server_conn.cc b/src/pika_repl_server_conn.cc index 091c85a0de..1227f4cb7b 100644 --- a/src/pika_repl_server_conn.cc +++ b/src/pika_repl_server_conn.cc @@ -21,6 +21,7 @@ PikaReplServerConn::PikaReplServerConn(int fd, const std::string& ip_port, net:: PikaReplServerConn::~PikaReplServerConn() = default; void PikaReplServerConn::HandleMetaSyncRequest(void* arg) { + //LOG(INFO) << "ReplServer BG thread handle MetaSync Request"; std::unique_ptr task_arg(static_cast(arg)); const std::shared_ptr req = task_arg->req; std::shared_ptr conn = task_arg->conn; @@ -100,6 +101,7 @@ void PikaReplServerConn::HandleMetaSyncRequest(void* arg) { } void PikaReplServerConn::HandleTrySyncRequest(void* arg) { + //LOG(INFO) << "ReplServer BG thread handle TrySync Request"; std::unique_ptr task_arg(static_cast(arg)); const std::shared_ptr req = task_arg->req; std::shared_ptr conn = task_arg->conn; @@ -138,8 +140,13 @@ void PikaReplServerConn::HandleTrySyncRequest(void* arg) { response.set_code(InnerMessage::kOk); } + LOG(INFO) << "HandleTrySyncRequest: pre_success=" << pre_success; if (pre_success && TrySyncOffsetCheck(db, try_sync_request, try_sync_response)) { + LOG(INFO) << "HandleTrySyncRequest: TrySyncOffsetCheck passed, calling TrySyncUpdateSlaveNode"; TrySyncUpdateSlaveNode(db, try_sync_request, conn, try_sync_response); + LOG(INFO) << "HandleTrySyncRequest: TrySyncUpdateSlaveNode completed"; + } else { + LOG(WARNING) << "HandleTrySyncRequest: TrySyncOffsetCheck failed or pre_success=false"; } std::string reply_str; @@ -148,6 +155,8 @@ void PikaReplServerConn::HandleTrySyncRequest(void* arg) { conn->NotifyClose(); return; } + LOG(INFO) << "HandleTrySyncRequest: Response sent successfully to " << try_sync_request.node().ip() + << ":" << try_sync_request.node().port(); conn->NotifyWrite(); } @@ -156,7 +165,13 @@ bool PikaReplServerConn::TrySyncUpdateSlaveNode(const std::shared_ptr& conn, InnerMessage::InnerResponse::TrySync* try_sync_response) { const InnerMessage::Node& node = try_sync_request.node(); - if (!db->CheckSlaveNodeExist(node.ip(), node.port())) { + LOG(INFO) << "TrySyncUpdateSlaveNode: Starting for slave " << node.ip() << ":" << node.port(); + + LOG(INFO) << "TrySyncUpdateSlaveNode: Checking if slave node exists..."; + bool slave_exists = db->CheckSlaveNodeExist(node.ip(), node.port()); + LOG(INFO) << "TrySyncUpdateSlaveNode: Slave exists check result: " << slave_exists; + + if (!slave_exists) { int32_t session_id = db->GenSessionId(); if (session_id == -1) { try_sync_response->set_reply_code(InnerMessage::InnerResponse::TrySync::kError); @@ -165,7 +180,9 @@ bool PikaReplServerConn::TrySyncUpdateSlaveNode(const std::shared_ptrset_session_id(session_id); // incremental sync + LOG(INFO) << "TrySyncUpdateSlaveNode: Adding new slave node with session_id=" << session_id; Status s = db->AddSlaveNode(node.ip(), node.port(), session_id); + LOG(INFO) << "TrySyncUpdateSlaveNode: AddSlaveNode result: " << s.ToString(); if (!s.ok()) { try_sync_response->set_reply_code(InnerMessage::InnerResponse::TrySync::kError); LOG(WARNING) << "DB: " << db->DBName() << " TrySync Failed, " << s.ToString(); @@ -176,8 +193,10 @@ bool PikaReplServerConn::TrySyncUpdateSlaveNode(const std::shared_ptrset_reply_code(InnerMessage::InnerResponse::TrySync::kOk); LOG(INFO) << "DB: " << db->DBName() << " TrySync Success, Session: " << session_id; } else { + LOG(INFO) << "TrySyncUpdateSlaveNode: Slave already exists, getting session ID..."; int32_t session_id; Status s = db->GetSlaveNodeSession(node.ip(), node.port(), &session_id); + LOG(INFO) << "TrySyncUpdateSlaveNode: GetSlaveNodeSession result: " << s.ToString(); if (!s.ok()) { try_sync_response->set_reply_code(InnerMessage::InnerResponse::TrySync::kError); LOG(WARNING) << "DB: " << db->DBName() << " Get Session id Failed" << s.ToString(); @@ -187,6 +206,7 @@ bool PikaReplServerConn::TrySyncUpdateSlaveNode(const std::shared_ptrset_session_id(session_id); LOG(INFO) << "DB: " << db->DBName() << " TrySync Success, Session: " << session_id; } + LOG(INFO) << "TrySyncUpdateSlaveNode: Completed successfully, returning true"; return true; } @@ -238,9 +258,13 @@ bool PikaReplServerConn::TrySyncOffsetCheck(const std::shared_ptr& return false; } LOG(INFO)<<"master_CommittedId >= slave committed_id"; + LOG(INFO) << "TrySyncOffsetCheck: Seeking to committed_id: " << committed_id.b_offset.filenum + << ":" << committed_id.b_offset.offset; reader.Seek(db->Logger(), committed_id.b_offset.filenum, committed_id.b_offset.offset); BinlogOffset seeked_offset; reader.GetReaderStatus(&(seeked_offset.filenum), &(seeked_offset.offset)); + LOG(INFO) << "TrySyncOffsetCheck: Seeked to: " << seeked_offset.filenum << ":" << seeked_offset.offset; + if (seeked_offset.filenum != committed_id.b_offset.filenum || seeked_offset.offset != committed_id.b_offset.offset) { try_sync_response->set_reply_code(InnerMessage::InnerResponse::TrySync::kError); @@ -252,6 +276,7 @@ bool PikaReplServerConn::TrySyncOffsetCheck(const std::shared_ptr& << ", offset: " << seeked_offset.offset; return false; } + LOG(INFO) << "TrySyncOffsetCheck: Seek validation passed for committed_id"; InnerMessage::BinlogOffset* master_prepared_id = try_sync_response->mutable_prepared_id(); g_pika_rm->BuildBinlogOffset(db->GetPreparedId(), master_prepared_id); }else{ @@ -261,9 +286,13 @@ bool PikaReplServerConn::TrySyncOffsetCheck(const std::shared_ptr& } } + LOG(INFO) << "TrySyncOffsetCheck: Now checking slave binlog offset: " << slave_boffset.filenum() + << ":" << slave_boffset.offset(); reader.Seek(db->Logger(), slave_boffset.filenum(), slave_boffset.offset()); BinlogOffset seeked_offset; reader.GetReaderStatus(&(seeked_offset.filenum), &(seeked_offset.offset)); + LOG(INFO) << "TrySyncOffsetCheck: Slave offset seek result: " << seeked_offset.filenum << ":" << seeked_offset.offset; + if (seeked_offset.filenum != slave_boffset.filenum() || seeked_offset.offset != slave_boffset.offset()) { try_sync_response->set_reply_code(InnerMessage::InnerResponse::TrySync::kError); LOG(WARNING) << "Slave offset is not a start point of cur log, Slave ip: " << node.ip() @@ -271,10 +300,13 @@ bool PikaReplServerConn::TrySyncOffsetCheck(const std::shared_ptr& << seeked_offset.filenum << ", offset: " << seeked_offset.offset; return false; } + + LOG(INFO) << "TrySyncOffsetCheck: All validations passed, returning true"; return true; } void PikaReplServerConn::HandleDBSyncRequest(void* arg) { + //LOG(INFO) << "ReplServer BG thread handle DBSync Request"; std::unique_ptr task_arg(static_cast(arg)); const std::shared_ptr req = task_arg->req; std::shared_ptr conn = task_arg->conn; @@ -356,6 +388,7 @@ void PikaReplServerConn::HandleDBSyncRequest(void* arg) { } void PikaReplServerConn::HandleBinlogSyncRequest(void* arg) { + //LOG(INFO) << "ReplServer BG thread handle BinlogSync Request"; std::unique_ptr task_arg(static_cast(arg)); const std::shared_ptr req = task_arg->req; std::shared_ptr conn = task_arg->conn; @@ -408,13 +441,18 @@ void PikaReplServerConn::HandleBinlogSyncRequest(void* arg) { } if (master_db->GetISConsistency()) { Status s = master_db->AppendCandidateBinlog(node.ip(), node.port(), range_start); + if (!s.ok()) { + LOG(WARNING) << "Append Candidate Binlog failed " << slave_node.ToString() << " " << s.ToString(); + conn->NotifyClose(); + return; + } } else { Status s = master_db->ActivateSlaveBinlogSync(node.ip(), node.port(), range_start); - } - if (!s.ok()) { - LOG(WARNING) << "Activate Binlog Sync failed " << slave_node.ToString() << " " << s.ToString(); - conn->NotifyClose(); - return; + if (!s.ok()) { + LOG(WARNING) << "Activate Binlog Sync failed " << slave_node.ToString() << " " << s.ToString(); + conn->NotifyClose(); + return; + } } return; } @@ -422,8 +460,12 @@ void PikaReplServerConn::HandleBinlogSyncRequest(void* arg) { // not the first_send the range_ack cant be 0 // set this case as ping if (range_start.b_offset == BinlogOffset() && range_end.b_offset == BinlogOffset()) { + LOG(INFO) << "Received ping from slave: " << slave_node.ToString(); return; } + LOG(INFO) << "Received binlog ack from slave: " << slave_node.ToString() + << ", range_start: " << range_start.ToString() + << ", range_end: " << range_end.ToString(); s = g_pika_rm->UpdateSyncBinlogStatus(slave_node, range_start, range_end); if (!s.ok()) { LOG(WARNING) << "Update binlog ack failed " << db_name << " " << s.ToString(); @@ -435,6 +477,7 @@ void PikaReplServerConn::HandleBinlogSyncRequest(void* arg) { } void PikaReplServerConn::HandleRemoveSlaveNodeRequest(void* arg) { + //LOG(INFO) << "ReplServer BG thread handle RemoveSlaveNode Request"; std::unique_ptr task_arg(static_cast(arg)); const std::shared_ptr req = task_arg->req; std::shared_ptr conn = task_arg->conn; diff --git a/src/pika_rm.cc b/src/pika_rm.cc index 9c777339ab..138544c09f 100644 --- a/src/pika_rm.cc +++ b/src/pika_rm.cc @@ -12,6 +12,7 @@ #include #include +#include #include "net/include/net_cli.h" @@ -25,6 +26,7 @@ using pstd::Status; extern std::unique_ptr g_pika_rm; extern PikaServer* g_pika_server; +extern std::unique_ptr g_pika_conf; /* SyncDB */ @@ -53,11 +55,15 @@ Status SyncMasterDB::GetSlaveNodeSession(const std::string& ip, int port, int32_ return Status::NotFound("slave " + ip + ":" + std::to_string(port) + " not found"); } - slave_ptr->Lock(); - *session = slave_ptr->SessionId(); - slave_ptr->Unlock(); - - return Status::OK(); + if (slave_ptr->slave_mu.try_lock()) { + *session = slave_ptr->SessionId(); + slave_ptr->slave_mu.unlock(); + LOG(INFO) << "GetSlaveNodeSession: Successfully got session " << *session << " for " << ip << ":" << port; + return Status::OK(); + } else { + LOG(WARNING) << "GetSlaveNodeSession: Failed to acquire lock for " << ip << ":" << port << ", slave may be busy"; + return Status::Busy("Slave node is busy, try again later"); + } } Status SyncMasterDB::AddSlaveNode(const std::string& ip, int port, int session_id) { @@ -146,13 +152,12 @@ Status SyncMasterDB::ActivateSlaveDbSync(const std::string& ip, int port) { } Status SyncMasterDB::ReadBinlogFileToWq(const std::shared_ptr& slave_ptr) { - int cnt = slave_ptr->sync_win.Remaining(); std::shared_ptr reader = slave_ptr->binlog_reader; if (!reader) { return Status::OK(); } - std::vector tasks; - for (int i = 0; i < cnt; ++i) { + // Try to read binlogs as long as the sync window allows + while (slave_ptr->sync_win.Remaining() > 0) { std::string msg; uint32_t filenum; uint64_t offset; @@ -173,20 +178,19 @@ Status SyncMasterDB::ReadBinlogFileToWq(const std::shared_ptr& slave_ LOG(WARNING) << "Binlog item decode failed"; return Status::Corruption("Binlog item decode failed"); } - BinlogOffset sent_b_offset = BinlogOffset(filenum, offset); - LogicOffset sent_l_offset = LogicOffset(item.term_id(), item.logic_id()); + BinlogOffset sent_b_offset(filenum, offset); + LogicOffset sent_l_offset(item.term_id(), item.logic_id()); LogOffset sent_offset(sent_b_offset, sent_l_offset); - + // Update sync window and sent_offset immediately slave_ptr->sync_win.Push(SyncWinItem(sent_offset, msg.size())); - slave_ptr->SetLastSendTime(pstd::NowMicros()); + slave_ptr->sent_offset = sent_offset; + // Create a task and a vector to hold it RmNode rm_node(slave_ptr->Ip(), slave_ptr->Port(), slave_ptr->DBName(), slave_ptr->SessionId()); WriteTask task(rm_node, BinlogChip(sent_offset, msg), slave_ptr->sent_offset); - tasks.push_back(task); - slave_ptr->sent_offset = sent_offset; - } - - if (!tasks.empty()) { - g_pika_rm->ProduceWriteQueue(slave_ptr->Ip(), slave_ptr->Port(), db_info_.db_name_, tasks); + std::vector tasks_to_send; + tasks_to_send.push_back(task); + // Produce to write queue immediately, one binlog at a time + g_pika_rm->ProduceWriteQueue(slave_ptr->Ip(), slave_ptr->Port(), db_info_.db_name_, tasks_to_send); } return Status::OK(); } @@ -231,23 +235,86 @@ Status SyncMasterDB::GetSlaveState(const std::string& ip, int port, SlaveState* Status SyncMasterDB::WakeUpSlaveBinlogSync() { std::unordered_map> slaves = GetAllSlaveNodes(); std::vector> to_del; + if (slaves.empty()) { + // LOG(INFO) << "WakeUpSlaveBinlogSync: No slaves connected for DB " << db_info_.db_name_; + //cleanup(); + return Status::OK(); + } + // LOG(INFO) << "WakeUpSlaveBinlogSync: Found " << slaves.size() << " slaves for DB " << db_info_.db_name_; for (auto& slave_iter : slaves) { std::shared_ptr slave_ptr = slave_iter.second; - std::lock_guard l(slave_ptr->slave_mu); - if (slave_ptr->sent_offset == slave_ptr->acked_offset) { - Status s; - if (coordinator_.GetISConsistency()) { - if(slave_ptr->slave_state == SlaveState::kSlaveBinlogSync||slave_ptr->slave_state == SlaveState::KCandidate){ - s = coordinator_.SendBinlog(slave_ptr, db_info_.db_name_); + SlaveState current_state; + std::string slave_info; + { + std::lock_guard l(slave_ptr->slave_mu); + current_state = slave_ptr->slave_state; + slave_info = slave_ptr->Ip() + ":" + std::to_string(slave_ptr->Port()); + } + LOG(INFO) << "WakeUpSlaveBinlogSync: Processing slave " << slave_info + << ", state: " << SlaveStateMsg[current_state] << " (" << current_state << ")"; + + Status s; + bool should_send_binlog = false; + bool should_promote_state = false; + + if (GetISConsistency()) { + if (current_state == SlaveState::kSlaveBinlogSync || current_state == SlaveState::KCandidate) { + should_send_binlog = true; + LOG(INFO) << "WakeUpSlaveBinlogSync: Will send binlog to slave " << slave_info; + } else if (current_state == SlaveState::kSlaveDbSync) { + should_promote_state = true; + should_send_binlog = true; + LOG(INFO) << "WakeUpSlaveBinlogSync: Will promote slave " << slave_info << " to BinlogSync state"; + } else if (current_state == SlaveState::kSlaveNotSync) { + // Slave node is not synced, need to promote it to BinlogSync state and send logs + should_promote_state = true; + should_send_binlog = true; + LOG(INFO) << "WakeUpSlaveBinlogSync: Will promote not-sync slave " << slave_info << " to BinlogSync state"; + } else { + LOG(INFO) << "WakeUpSlaveBinlogSync: Slave " << slave_info + << " not in supported sync state (" << SlaveStateMsg[current_state] << "), skipping"; + continue; + } + } else { + LOG(INFO) << "WakeUpSlaveBinlogSync: Non-consistency mode, will read binlog file for slave " << slave_info; + } + + if (should_promote_state) { + std::lock_guard l(slave_ptr->slave_mu); + if (current_state == SlaveState::kSlaveDbSync || current_state == SlaveState::kSlaveNotSync) { + slave_ptr->slave_state = SlaveState::kSlaveBinlogSync; + LOG(INFO) << "WakeUpSlaveBinlogSync: Promoted slave " << slave_info << " to kSlaveBinlogSync state"; } - } else { + } + + if (should_send_binlog) { + // In strong consistency mode, only call SendBinlog when there are logs to send + if (GetISConsistency()) { + LogOffset current_prepared_id = GetPreparedId(); + LogOffset current_committed_id = GetCommittedId(); + LOG(INFO) << "WakeUpSlaveBinlogSync: prepared_id=" << current_prepared_id.ToString() + << ", committed_id=" << current_committed_id.ToString(); + + // Only when prepared_id > committed_id are there new logs to send + if (current_prepared_id > current_committed_id) { + LOG(INFO) << "WakeUpSlaveBinlogSync: Has new logs to send, calling SendBinlog"; + s = coordinator_.SendBinlog(slave_ptr, db_info_.db_name_); + } else { + LOG(INFO) << "WakeUpSlaveBinlogSync: No new logs to send, skipping SendBinlog"; + s = Status::OK(); + } + } else { + s = coordinator_.SendBinlog(slave_ptr, db_info_.db_name_); + } + } else if (!GetISConsistency()) { s = ReadBinlogFileToWq(slave_ptr); - } - if (!s.ok()) { - to_del.push_back(slave_ptr); - LOG(WARNING) << "WakeUpSlaveBinlogSync failed, marking for deletion: " - << slave_ptr->ToStringStatus() << " - " << s.ToString(); - } + } + + if (!s.ok()) { + to_del.push_back(slave_ptr); + LOG(WARNING) << "WakeUpSlaveBinlogSync: Failed for slave " << slave_info << ": " << s.ToString(); + } else { + // LOG(INFO) << "WakeUpSlaveBinlogSync: Successfully triggered for slave " << slave_info; } } @@ -256,6 +323,14 @@ Status SyncMasterDB::WakeUpSlaveBinlogSync() { LOG(INFO) << "Removed slave: " << to_del_slave->ToStringStatus(); } + // Update committed_id + if (GetISConsistency() && !slaves.empty()) { + LOG(INFO) << "WakeUpSlaveBinlogSync: Updating committed_id for DB " << db_info_.db_name_; + Status s = UpdateCommittedID(); + if (!s.ok()) { + LOG(WARNING) << "WakeUpSlaveBinlogSync: Failed to update committed_id: " << s.ToString(); + } + } return Status::OK(); } @@ -334,7 +409,11 @@ Status SyncMasterDB::CheckSyncTimeout(uint64_t now) { std::lock_guard l(slave_ptr->slave_mu); if (slave_ptr->LastRecvTime() + kRecvKeepAliveTimeout < now) { to_del.emplace_back(slave_ptr->Ip(), slave_ptr->Port()); - } else if (slave_ptr->LastSendTime() + kSendKeepAliveTimeout < now && + LOG(WARNING) << SyncDBInfo().ToString() << " ACK timeout with slave " << slave_ptr->Ip() << ":" + << slave_ptr->Port(); + continue; + } + if (slave_ptr->LastSendTime() + kSendKeepAliveTimeout < now && slave_ptr->sent_offset == slave_ptr->acked_offset) { std::vector task; RmNode rm_node(slave_ptr->Ip(), slave_ptr->Port(), slave_ptr->DBName(), slave_ptr->SessionId()); @@ -424,11 +503,30 @@ LogOffset SyncMasterDB::GetCommittedId(){ Status SyncMasterDB::AppendSlaveEntries(const std::shared_ptr& cmd_ptr, const BinlogItem& attribute) { return coordinator_.AppendSlaveEntries(cmd_ptr, attribute); } + +Status SyncMasterDB::BatchAppendSlaveEntries(const std::vector>& cmd_ptrs, + const std::vector& attributes) { + for (size_t i = 0; i < cmd_ptrs.size() && i < attributes.size(); i++) { + Status s = coordinator_.AppendSlaveEntries(cmd_ptrs[i], attributes[i]); + if (!s.ok()) { + return s; + } + } + return Status::OK(); +} Status SyncMasterDB::ProcessCoordination(){ return coordinator_.ProcessCoordination(); } Status SyncMasterDB::UpdateCommittedID(){ - return coordinator_.UpdateCommittedID(); + Status s = coordinator_.UpdateCommittedID(); + if (s.ok()) { + // LOG(INFO) << "UpdateCommittedID success: " << coordinator_.GetCommittedId().ToString(); + int slave_count = GetNumberOfSlaveNode(); + // LOG(INFO) << "UpdateCommittedID: Success, master node has acknowledged, " << slave_count << " slave(s) connected"; + } else { + LOG(WARNING) << "UpdateCommittedID failed: " << s.ToString(); + } + return s; } Status SyncMasterDB::Truncate(const LogOffset& offset){ return coordinator_.Truncate(offset); @@ -471,33 +569,201 @@ Status SyncMasterDB::AppendCandidateBinlog(const std::string& ip, int port, cons return Status::OK(); } +pstd::Status SyncMasterDB::WaitForSlaveAcks(const LogOffset& target_offset, int timeout_ms) { + // Get slave count + int slave_count = GetNumberOfSlaveNode(); + LOG(INFO) << "WaitForSlaveAcks: Waiting for ACKs from master and " << slave_count << " slave(s) for target " << target_offset.ToString(); + + // If no slaves, return success immediately + if (slave_count == 0) { + LOG(INFO) << "WaitForSlaveAcks: No slaves connected, returning success immediately"; + // Update committed_id + coordinator_.SetCommittedId(target_offset); + return Status::OK(); + } + + g_pika_rm->WakeUpBinlogSync(); + + // Use efficient polling mechanism to avoid creating extra threads + auto start_time = std::chrono::steady_clock::now(); + auto timeout_duration = std::chrono::milliseconds(timeout_ms); + const int POLL_INTERVAL_MS = 10; // 10ms polling interval + + while (true) { + // Check if timeout + auto elapsed = std::chrono::steady_clock::now() - start_time; + if (elapsed >= timeout_duration) { + LOG(WARNING) << "WaitForSlaveAcks: Timeout after " << timeout_ms << "ms waiting for target " << target_offset.ToString(); + return Status::Timeout("Strong consistency replication timed out"); + } + + // Check acknowledgment status of each slave node + std::unordered_map> slaves = GetAllSlaveNodes(); + int ack_count = 1; // Master node is already acknowledged + + for (const auto& slave_pair : slaves) { + std::shared_ptr slave = slave_pair.second; + if (slave) { + slave->Lock(); + LogOffset acked_offset = slave->acked_offset; + slave->Unlock(); + + if (acked_offset >= target_offset) { + ack_count++; + } + } + } + + // Check if all nodes have acknowledged (1 master + N slaves) + int expected_acks = 1 + slave_count; + if (ack_count >= expected_acks) { + LOG(INFO) << "WaitForSlaveAcks: All " << expected_acks << " nodes have acknowledged target " << target_offset.ToString(); + // Update committed_id + coordinator_.SetCommittedId(target_offset); + return Status::OK(); + } + + // Sleep briefly then retry + std::this_thread::sleep_for(std::chrono::milliseconds(POLL_INTERVAL_MS)); + + // Periodically trigger binlog sync to ensure slave nodes receive data + if (elapsed.count() % 100 == 0) { // Trigger every 100ms + g_pika_rm->WakeUpBinlogSync(); + } + } +} + + + Status SyncMasterDB::ConsensusProposeLog(const std::shared_ptr& cmd_ptr) { // If consistency is not required, directly propose the log without waiting for consensus if (!coordinator_.GetISConsistency()) { return coordinator_.ProposeLog(cmd_ptr); } + + // Wait for consensus to be achieved within 10 seconds + // while (std::chrono::duration_cast(std::chrono::steady_clock::now() - start).count() < 10) { + // // Check if consensus has been achieved for the given log offset + // if (checkFinished(offset)) { + // return Status::OK(); + // } + // // TODO: 这里暂时注掉了sleep等待,50ms耗时过长,影响写入链路,后期需要改成条件变量唤醒方式 + // //std::this_thread::sleep_for(std::chrono::milliseconds(50)); + // } + + // For now, just propose the log and return OK + Status s = coordinator_.ProposeLog(cmd_ptr); + return s; +} - auto start = std::chrono::steady_clock::now(); - LogOffset offset; - Status s = coordinator_.AppendEntries(cmd_ptr, offset); // Append the log entry to the coordinator - +Status SyncMasterDB::ConsensusBatchProposeLog(const std::vector>& cmd_ptrs, std::vector* offsets) { + // If the batch is empty, nothing to do + if (cmd_ptrs.empty()) { + return Status::OK(); + } + + // For large batches, log batch size for monitoring + if (cmd_ptrs.size() > 10) { + LOG(INFO) << "ConsensusBatchProposeLog: Processing large batch of " << cmd_ptrs.size() << " commands"; + } + + // First, propose the log batch to the coordinator with optimized performance + auto batch_start = std::chrono::steady_clock::now(); + Status s = coordinator_.BatchProposeLog(cmd_ptrs, offsets); + auto batch_end = std::chrono::steady_clock::now(); + auto batch_time_ms = std::chrono::duration_cast(batch_end - batch_start).count(); + if (!s.ok()) { + LOG(WARNING) << "ConsensusBatchProposeLog: Failed to propose log batch: " << s.ToString(); return s; } - - // Wait for consensus to be achieved within 10 seconds - while (std::chrono::duration_cast(std::chrono::steady_clock::now() - start).count() < 10) { - // Check if consensus has been achieved for the given log offset - if (checkFinished(offset)) { - return Status::OK(); + + // Performance logging for large batches + if (cmd_ptrs.size() > 10) { + LOG(INFO) << "ConsensusBatchProposeLog: Successfully proposed batch of " << cmd_ptrs.size() + << " commands in " << batch_time_ms << "ms (" + << (cmd_ptrs.size() / (batch_time_ms ? batch_time_ms : 1)) << " commands/ms)"; + } + + // If consistency is not required, return immediately without waiting for replication + if (!coordinator_.GetISConsistency()) { + LOG(INFO) << "ConsensusBatchProposeLog: Consistency not required, returning immediately"; + return s; + } + + // Record the current committed_id and slave node count + LogOffset current_committed_id = GetCommittedId(); + int slave_count = GetNumberOfSlaveNode(); + + LOG(INFO) << "ConsensusBatchProposeLog: Before BatchProposeLog - Current committed_id: " << current_committed_id.ToString() + << ", expecting ACKs from 1 master and " << slave_count << " slave(s)"; + + // 在BatchProposeLog完成后,获取实际的prepared_id作为等待目标 + LogOffset actual_prepared_id = GetPreparedId(); + LogOffset last_cmd_offset; + if (!offsets->empty()) { + last_cmd_offset = offsets->back(); + LOG(INFO) << "ConsensusBatchProposeLog: Last command offset in batch: " << last_cmd_offset.ToString(); + LOG(INFO) << "ConsensusBatchProposeLog: After BatchProposeLog - actual prepared_id: " << actual_prepared_id.ToString(); + } + + // For strong consistency mode, set a batch-level timeout that applies to the entire batch + int batch_timeout_ms = g_pika_conf->replication_ack_timeout(); + // Adjust timeout based on batch size for large batches + if (cmd_ptrs.size() > 100) { + // Scale timeout logarithmically with batch size + batch_timeout_ms = static_cast(batch_timeout_ms * (1 + log10(cmd_ptrs.size() / 100.0))); + LOG(INFO) << "ConsensusBatchProposeLog: Adjusted batch timeout to " << batch_timeout_ms << "ms for large batch"; + } + + // For strong consistency mode, wait for the batch to be committed + LOG(INFO) << "ConsensusBatchProposeLog: Waiting for batch to be committed (target: " << actual_prepared_id.ToString() + << ") with timeout of " << batch_timeout_ms << "ms"; + + s = WaitForSlaveAcks(actual_prepared_id, batch_timeout_ms); + + // Process synchronization results + if (!s.ok()) { + if (s.IsTimeout()) { + LOG(WARNING) << "ConsensusBatchProposeLog: Batch timed out waiting for ACKs: " << s.ToString(); + } else if (s.IsIncomplete()) { + LOG(WARNING) << "ConsensusBatchProposeLog: Not all nodes acknowledged the batch: " << s.ToString(); + } else { + LOG(WARNING) << "ConsensusBatchProposeLog: Batch operation failed with status: " << s.ToString(); + } + LOG(WARNING) << "ConsensusBatchProposeLog: Batch operation could not be confirmed with strong consistency, " + << "batch size: " << cmd_ptrs.size(); + } else { + LogOffset new_committed_id = GetCommittedId(); + LOG(INFO) << "ConsensusBatchProposeLog: Successfully received ACKs for entire batch, " + << "new committed_id: " << new_committed_id.ToString(); + + // Verify that synchronization successfully included all offsets in this batch of commands + if (!offsets->empty() && new_committed_id < last_cmd_offset) { + LOG(WARNING) << "ConsensusBatchProposeLog: New committed_id " << new_committed_id.ToString() + << " is less than last command offset " << last_cmd_offset.ToString() + << ", some commands in batch may not be fully replicated"; + + // For strong consistency, we should ensure all commands are replicated + // But if there are no slaves, we don't need to worry about replication + if (slave_count > 0) { + LOG(WARNING) << "ConsensusBatchProposeLog: Some commands may not be fully replicated to all slaves, but proceeding"; + } else { + LOG(INFO) << "ConsensusBatchProposeLog: No slaves connected, no replication needed"; + } } - // TODO: 这里暂时注掉了sleep等待,50ms耗时过长,影响写入链路,后期需要改成条件变量唤醒方式 - //std::this_thread::sleep_for(std::chrono::milliseconds(50)); } - - return Status::Timeout("No consistency achieved within 10 seconds"); + return s; } +// Per-DB global batching window across threads +struct WindowState { + pstd::Mutex mu; + std::atomic start_us{0}; + std::atomic accepted{0}; +}; +static std::unordered_map g_db_windows; +static pstd::Mutex g_db_windows_mu; Status SyncMasterDB::ConsensusProcessLeaderLog(const std::shared_ptr& cmd_ptr, const BinlogItem& attribute) { return coordinator_.ProcessLeaderLog(cmd_ptr, attribute); @@ -515,6 +781,8 @@ std::unordered_map> SyncMasterDB::GetAll return coordinator_.SyncPros().GetAllSlaveNodes(); } +std::shared_ptr SyncMasterDB::GetCommandCollector() { return command_collector_; } + /* SyncSlaveDB */ SyncSlaveDB::SyncSlaveDB(const std::string& db_name) : SyncDB(db_name) { @@ -549,8 +817,10 @@ Status SyncSlaveDB::CheckSyncTimeout(uint64_t now) { return Status::OK(); } if (m_info_.LastRecvTime() + kRecvKeepAliveTimeout < now) { - // update slave state to kTryConnect, and try reconnect to master node - repl_state_ = ReplState::kTryConnect; + // 检测到超时,但不自动重连,设置为错误状态以便观察 + LOG(WARNING) << "SyncSlaveDB: Connection timeout detected for " << DBName() + << ", setting to kError state (no auto-reconnect)"; + repl_state_ = ReplState::kError; } return Status::OK(); } @@ -696,73 +966,111 @@ void PikaReplicaManager::InitDB() { void PikaReplicaManager::ProduceWriteQueue(const std::string& ip, int port, std::string db_name, const std::vector& tasks) { - std::lock_guard l(write_queue_mu_); - std::string index = ip + ":" + std::to_string(port); - for (auto& task : tasks) { - write_queues_[index][db_name].push(task); + { + std::lock_guard l(write_queue_mu_); + std::string index = ip + ":" + std::to_string(port); + for (auto& task : tasks) { + write_queues_[index][db_name].push(task); + } + // LOG(INFO) << "ProduceWriteQueue: Added " << tasks.size() << " tasks to queue for " << index << " db:" << db_name; } + // Release lock before calling ConsumeWriteQueue to avoid deadlock + ConsumeWriteQueue(); } int PikaReplicaManager::ConsumeWriteQueue() { - std::unordered_map>> to_send_map; - int counter = 0; + // LOG(INFO) << "ConsumeWriteQueue: Called, checking write queues"; + // Quick check if there are any tasks { std::lock_guard l(write_queue_mu_); - for (auto& iter : write_queues_) { - const std::string& ip_port = iter.first; - std::unordered_map>& p_map = iter.second; - for (auto& db_queue : p_map) { - std::queue& queue = db_queue.second; - for (int i = 0; i < kBinlogSendPacketNum; ++i) { - if (queue.empty()) { - break; - } - size_t batch_index = queue.size() > kBinlogSendBatchNum ? kBinlogSendBatchNum : queue.size(); - std::vector to_send; - size_t batch_size = 0; - for (size_t i = 0; i < batch_index; ++i) { - WriteTask& task = queue.front(); - batch_size += task.binlog_chip_.binlog_.size(); - // make sure SerializeToString will not over 2G - if (batch_size > PIKA_MAX_CONN_RBUF_HB) { - break; - } - to_send.push_back(task); - queue.pop(); - counter++; - } - if (!to_send.empty()) { - to_send_map[ip_port].push_back(std::move(to_send)); - } + if (write_queues_.empty()) { + static int empty_counter = 0; + // if (++empty_counter % 100 == 0) { // Log every 100 iterations + // LOG(INFO) << "ConsumeWriteQueue: write_queues_ is empty (count: " << empty_counter << "), this=" << this; + // } + // LOG(INFO) << "ConsumeWriteQueue: write_queues_ is empty, returning 0"; + return 0; + } + // LOG(INFO) << "ConsumeWriteQueue: write_queues_ size: " << write_queues_.size(); + } + // LOG(INFO) << "ConsumeWriteQueue: Starting to consume write queue, this=" << this; + if (g_pika_conf->command_batch_enabled()) { + for (auto& db_item : sync_master_dbs_) { + if (db_item.second) { + auto command_collector = db_item.second->GetCommandCollector(); + if (command_collector) { + command_collector->FlushCommands(); } } } } + // A list of sending jobs to be executed outside the lock. + // Each job is a tuple of (ip, port, tasks_to_send). + std::vector>> all_sends; + int counter = 0; - std::vector to_delete; - for (auto& iter : to_send_map) { - std::string ip; - int port = 0; - if (!pstd::ParseIpPortString(iter.first, ip, port)) { - LOG(WARNING) << "Parse ip_port error " << iter.first; - continue; - } - for (auto& to_send : iter.second) { - Status s = pika_repl_server_->SendSlaveBinlogChips(ip, port, to_send); - if (!s.ok()) { - LOG(WARNING) << "send binlog to " << ip << ":" << port << " failed, " << s.ToString(); - to_delete.push_back(iter.first); + // === Start of Critical Section === + { + std::lock_guard l(write_queue_mu_); + LOG(INFO) << "ConsumeWriteQueue: write_queues_ size: " << write_queues_.size(); + auto slave_iter = write_queues_.begin(); + while (slave_iter != write_queues_.end()) { + std::string ip; + int port = 0; + if (!pstd::ParseIpPortString(slave_iter->first, ip, port)) { + LOG(WARNING) << "Parse ip_port error " << slave_iter->first; + slave_iter = write_queues_.erase(slave_iter); continue; } + + // Collect all tasks for this slave from all its dbs + std::vector tasks_for_this_slave; + auto& p_map = slave_iter->second; + auto db_iter = p_map.begin(); + while (db_iter != p_map.end()) { + auto& queue = db_iter->second; + while (!queue.empty()) { + tasks_for_this_slave.push_back(std::move(queue.front())); + queue.pop(); + } + // Since the queue is now empty, erase this db entry + db_iter = p_map.erase(db_iter); + } + + if (!tasks_for_this_slave.empty()) { + LOG(INFO) << "ConsumeWriteQueue: Found " << tasks_for_this_slave.size() << " tasks for slave " << ip << ":" << port; + all_sends.emplace_back(ip, port, std::move(tasks_for_this_slave)); + } else { + LOG(INFO) << "ConsumeWriteQueue: No tasks found for slave " << ip << ":" << port; + } + + // Since all db entries for this slave are processed and erased, + // erase the slave entry itself. + slave_iter = write_queues_.erase(slave_iter); } } + // === End of Critical Section === - if (!to_delete.empty()) { - std::lock_guard l(write_queue_mu_); - for (auto& del_queue : to_delete) { - write_queues_.erase(del_queue); + // Now, execute all the prepared network IO jobs outside the lock. + LOG(INFO) << "ConsumeWriteQueue: Processing " << all_sends.size() << " send jobs"; + for (auto& send_job : all_sends) { + std::string& ip = std::get<0>(send_job); + int port = std::get<1>(send_job); + std::vector& to_send = std::get<2>(send_job); + + counter += to_send.size(); + LOG(INFO) << "ConsumeWriteQueue: Sending " << to_send.size() << " tasks to " << ip << ":" << port; + Status s = pika_repl_server_->SendSlaveBinlogChips(ip, port, to_send); + if (!s.ok()) { + LOG(WARNING) << "send binlog to " << ip << ":" << port << " failed, " << s.ToString(); + // On failure, drop the slave connection and any remaining items in its queue + DropItemInWriteQueue(ip, port); + } else { + LOG(INFO) << "ConsumeWriteQueue: Successfully sent " << to_send.size() << " tasks to " << ip << ":" << port; } } + + LOG(INFO) << "ConsumeWriteQueue: Completed, processed " << counter << " tasks"; return counter; } @@ -802,6 +1110,14 @@ void PikaReplicaManager::ScheduleWriteDBTask(const std::shared_ptr& cmd_ptr pika_repl_client_->ScheduleWriteDBTask(cmd_ptr, db_name); } +void PikaReplicaManager::SignalAsyncWriteDBTaskEnd(const std::string& db_name) { + pika_repl_client_->SignalAsyncWriteDBTaskEnd(db_name); +} + +void PikaReplicaManager::WaitForAsyncWriteDBTaskEnd(const std::string& db_name) { + pika_repl_client_->WaitForAsyncWriteDBTaskEnd(db_name); +} + void PikaReplicaManager::ReplServerRemoveClientConn(int fd) { pika_repl_server_->RemoveClientConn(fd); } void PikaReplicaManager::ReplServerUpdateClientConnMap(const std::string& ip_port, int fd) { @@ -815,20 +1131,40 @@ Status PikaReplicaManager::UpdateSyncBinlogStatus(const RmNode& slave, const Log return Status::NotFound(slave.ToString() + " not found"); } std::shared_ptr db = sync_master_dbs_[slave.NodeDBInfo()]; + auto slave_node = db->GetSlaveNode(slave.Ip(), slave.Port()); + if (!slave_node) { + return Status::NotFound("Slave node not found"); + } + + LOG(INFO) << "UpdateSyncBinlogStatus - slave: " << slave.ToString() + << ", start: " << offset_start.ToString() + << ", end: " << offset_end.ToString() + << ", current slave state: " << SlaveStateMsg[slave_node->slave_state]; + Status s = db->ConsensusUpdateSlave(slave.Ip(), slave.Port(), offset_start, offset_end); if (!s.ok()) { + LOG(WARNING) << "ConsensusUpdateSlave failed: " << s.ToString(); return s; } + + if (slave_node->sent_offset == slave_node->acked_offset) { + LOG(INFO) << "Slave has acknowledged all sent binlogs"; + } + if(db->GetISConsistency()){ + LOG(INFO) << "Updating CommittedID for consistency mode"; s = db->UpdateCommittedID(); if (!s.ok()) { + LOG(WARNING) << "UpdateCommittedID failed: " << s.ToString(); return s; } + LOG(INFO) << "CommittedID updated to: " << db->GetCommittedId().ToString(); } - s = db->SyncBinlogToWq(slave.Ip(), slave.Port()); - if (!s.ok()) { - return s; - } + + // s = db->SyncBinlogToWq(slave.Ip(), slave.Port()); + // if (!s.ok()) { + // return s; + // } return Status::OK(); } @@ -885,6 +1221,9 @@ Status PikaReplicaManager::WakeUpBinlogSync() { return s; } } + + // Set flag to force immediate send on next operation + immediate_send_once_.store(true); return Status::OK(); } @@ -918,14 +1257,21 @@ Status PikaReplicaManager::CheckDBRole(const std::string& db, int* role) { if (sync_slave_dbs_.find(p_info) == sync_slave_dbs_.end()) { return Status::NotFound(db + " not found"); } - if (sync_master_dbs_[p_info]->GetNumberOfSlaveNode() != 0 || - (sync_master_dbs_[p_info]->GetNumberOfSlaveNode() == 0 && - sync_slave_dbs_[p_info]->State() == kNoConnect)) { + int slave_count = sync_master_dbs_[p_info]->GetNumberOfSlaveNode(); + ReplState slave_state = sync_slave_dbs_[p_info]->State(); + + LOG(INFO) << "CheckDBRole: DB=" << db << ", slave_count=" << slave_count + << ", slave_state=" << ReplStateMsg[slave_state] << "(" << slave_state << ")"; + + if (slave_state == ReplState::kNoConnect || slave_state == ReplState::kDBNoConnect) { *role |= PIKA_ROLE_MASTER; - } - if (sync_slave_dbs_[p_info]->State() != ReplState::kNoConnect) { + } else { *role |= PIKA_ROLE_SLAVE; } + + if (slave_count > 0) { + *role |= PIKA_ROLE_MASTER; + } // if role is not master or slave, the rest situations are all single return Status::OK(); } @@ -1105,11 +1451,23 @@ Status PikaReplicaManager::RunSyncSlaveDBStateMachine() { for (const auto& item : sync_slave_dbs_) { DBInfo p_info = item.first; std::shared_ptr s_db = item.second; + + // Add debug logging + static int state_debug_counter = 0; + if (++state_debug_counter % 100 == 0) { // Log every 100 iterations + LOG(INFO) << "RunSyncSlaveDBStateMachine: DB " << p_info.db_name_ + << " state: " << ReplStateMsg[s_db->State()]; + } + if (s_db->State() == ReplState::kTryConnect) { + LOG(INFO) << "RunSyncSlaveDBStateMachine: Sending TrySync for " << p_info.db_name_; SendTrySyncRequest(p_info.db_name_); } else if (s_db->State() == ReplState::kTryDBSync) { SendDBSyncRequest(p_info.db_name_); } else if (s_db->State() == ReplState::kWaitReply) { + // 移除自动重试机制,保持等待状态以观察真实的连接问题 + LOG(INFO) << "RunSyncSlaveDBStateMachine: DB " << p_info.db_name_ + << " in kWaitReply state, waiting for master response (no auto-retry)"; continue; } else if (s_db->State() == ReplState::kWaitDBSync) { Status s = s_db->ActivateRsync(); @@ -1158,6 +1516,16 @@ void PikaReplicaManager::FindCommonMaster(std::string* master) { } } +std::shared_ptr PikaReplicaManager::GetConsensusCoordinator(const std::string& db_name) { + std::shared_lock l(dbs_rw_); + DBInfo p_info(db_name); + if (sync_master_dbs_.find(p_info) == sync_master_dbs_.end()) { + return nullptr; + } + // Return a pointer to the existing coordinator instead of creating a copy + return sync_master_dbs_[p_info]->StableLogger()->coordinator(); +} + void PikaReplicaManager::RmStatus(std::string* info) { std::shared_lock l(dbs_rw_); std::stringstream tmp_stream; @@ -1181,3 +1549,4 @@ void PikaReplicaManager::BuildBinlogOffset(const LogOffset& offset, InnerMessage boffset->set_term(offset.l_offset.term); boffset->set_index(offset.l_offset.index); } + diff --git a/src/pika_server.cc b/src/pika_server.cc index a7d50b1e71..e97724b586 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -24,6 +24,7 @@ #include "include/pika_monotonic_time.h" #include "include/pika_rm.h" #include "include/pika_server.h" +#include "include/pika_command_collector.h" using pstd::Status; extern PikaServer* g_pika_server; @@ -169,6 +170,12 @@ void PikaServer::Start() { } */ + if (g_pika_conf->command_batch_enabled()) { + command_collector_ = std::make_shared( + g_pika_rm->GetConsensusCoordinator(g_pika_conf->default_db())); + LOG(INFO) << "Command collector created successfully"; + } + ret = pika_client_processor_->Start(); if (ret != net::kSuccess) { dbs_.clear(); @@ -213,6 +220,7 @@ void PikaServer::Start() { LOG(INFO) << "Pika Server going to start"; rsync_server_->Start(); while (!exit_) { + //LOG(INFO) << "Pika Server start a new round of timing tasks"; DoTimingTask(); // wake up every 5 seconds if (!exit_ && exit_mutex_.try_lock_for(std::chrono::seconds(5))) { @@ -583,15 +591,26 @@ void PikaServer::DeleteSlave(int fd) { } if (is_find) { + LOG(INFO) << "DeleteSlave: Processing slave deletion for " << ip << ":" << port << ", clearing write queues"; g_pika_rm->LostConnection(ip, port); g_pika_rm->DropItemInWriteQueue(ip, port); + LOG(INFO) << "DeleteSlave: Completed slave deletion for " << ip << ":" << port; } if (slave_num == 0) { - std::lock_guard l(state_protector_); - last_role_ = role_; - role_ &= ~PIKA_ROLE_MASTER; - leader_protected_mode_ = false; // explicitly cancel protected mode + // Check if slaveof is configured, if so, do not remove MASTER role + // Because a Pika configured as a slave is still the master node for its clients + std::string slaveof = g_pika_conf->slaveof(); + if (slaveof.empty()) { + // Only remove MASTER role when slaveof is not configured + std::lock_guard l(state_protector_); + last_role_ = role_; + role_ &= ~PIKA_ROLE_MASTER; + leader_protected_mode_ = false; // explicitly cancel protected mode + LOG(INFO) << "DeleteSlave: Removed MASTER role for standalone node"; + } else { + LOG(INFO) << "DeleteSlave: Kept MASTER role for slaveof-configured node (slaveof: " << slaveof << ")"; + } } } @@ -790,13 +809,16 @@ void PikaServer::SetFirstMetaSync(bool v) { void PikaServer::ScheduleClientPool(net::TaskFunc func, void* arg, bool is_slow_cmd, bool is_admin_cmd) { if (is_slow_cmd && g_pika_conf->slow_cmd_pool()) { + //LOG(INFO) << "Schedule task to slow cmd thread pool"; pika_slow_cmd_thread_pool_->Schedule(func, arg); return; } if (is_admin_cmd) { + //LOG(INFO) << "Schedule task to admin cmd thread pool"; pika_admin_cmd_thread_pool_->Schedule(func, arg); return; } + //LOG(INFO) << "Schedule task to client processor thread pool"; pika_client_processor_->SchedulePool(func, arg); } @@ -1098,7 +1120,13 @@ int PikaServer::SendToPeer() { return g_pika_rm->ConsumeWriteQueue(); } void PikaServer::SignalAuxiliary() { pika_auxiliary_thread_->cv_.notify_one(); } -Status PikaServer::TriggerSendBinlogSync() { return g_pika_rm->WakeUpBinlogSync(); } +Status PikaServer::TriggerSendBinlogSync() { + // Only execute on master nodes + if (!(role_ & PIKA_ROLE_MASTER)) { + return Status::OK(); + } + return g_pika_rm->WakeUpBinlogSync(); +} int PikaServer::PubSubNumPat() { return pika_pubsub_thread_->PubSubNumPat(); } diff --git a/src/pika_slot_command.cc b/src/pika_slot_command.cc index dc7f07e73d..1042098ebb 100644 --- a/src/pika_slot_command.cc +++ b/src/pika_slot_command.cc @@ -1539,7 +1539,7 @@ void SlotsReloadCmd::Do() { g_pika_server->Bgslotsreload(db_); const PikaServer::BGSlotsReload &info = g_pika_server->bgslots_reload(); char buf[256]; - snprintf(buf, sizeof(buf), "+%s : %lld", info.s_start_time.c_str(), g_pika_server->GetSlotsreloadingCursor()); + snprintf(buf, sizeof(buf), "+%s : %ld", info.s_start_time.c_str(), g_pika_server->GetSlotsreloadingCursor()); res_.AppendContent(buf); return; } diff --git a/src/pika_stable_log.cc b/src/pika_stable_log.cc index b1e9fc278a..dcc781354d 100644 --- a/src/pika_stable_log.cc +++ b/src/pika_stable_log.cc @@ -35,6 +35,21 @@ StableLog::StableLog(std::string db_name, std::string log_path) StableLog::~StableLog() = default; +std::shared_ptr StableLog::coordinator() { + // Get and return the coordinator pointer directly from SyncMasterDB + auto master_db = g_pika_rm->GetSyncMasterDBByName(DBInfo(db_name_)); + if (master_db) { + // Return a nullptr if GetCoordinator is not ready + try { + // Use aliasing constructor instead of dangerous custom deleter + return std::shared_ptr(master_db, &master_db->GetCoordinator()); + } catch (const std::exception& e) { + LOG(ERROR) << "Failed to get coordinator for " << db_name_ << ": " << e.what(); + return nullptr; + } + } + return nullptr; +} void StableLog::Leave() { Close(); RemoveStableLogDir(); diff --git a/src/pstd/include/lock_mgr.h b/src/pstd/include/lock_mgr.h index 978e9dd17a..fd50dad3ce 100644 --- a/src/pstd/include/lock_mgr.h +++ b/src/pstd/include/lock_mgr.h @@ -33,7 +33,7 @@ class LockMgr : public pstd::noncopyable { private: // Default number of lock map stripes - const size_t default_num_stripes_[[maybe_unused]]; + const size_t default_num_stripes_; // Limit on number of keys locked per column family const int64_t max_num_locks_; diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index 3066a62759..0c4aa52cae 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -14,6 +14,7 @@ Redis::Redis(Storage* const s, const DataType& type) lock_mgr_(std::make_shared(1000, 0, std::make_shared())), small_compaction_threshold_(5000), small_compaction_duration_threshold_(10000) { + default_write_options_.disableWAL = true; statistics_store_ = std::make_unique>(); scan_cursors_store_ = std::make_unique>(); scan_cursors_store_->SetCapacity(5000); diff --git a/tests/integration/clean_start.sh b/tests/integration/clean_start.sh new file mode 100644 index 0000000000..b5674425ec --- /dev/null +++ b/tests/integration/clean_start.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +# 切换到项目根目录 +cd /home/pika/caiyu/pikiwidb || exit + +# 定义清理端口函数 +clean_ports() { + echo "Checking and cleaning ports..." + + sudo killall -9 pika + + # 等待端口完全释放 + sleep 1 +} + +# 编译项目 +echo "Building project..." +sudo ./build.sh +if [ $? -ne 0 ]; then + echo "Build failed!" + exit 1 +fi +echo "Build successful." + +# 清理测试目录 +echo "Cleaning up test directory..." +sudo rm -rf ./output/pacifica_test/ +echo "Cleanup completed." + +# 清理占用端口的进程 +clean_ports + +# 切换到输出目录 +cd output || exit + +# 启动主从服务器 +echo "Starting master and slave servers..." +# 不使用sudo运行脚本以避免权限问题 +sudo ../tests/integration/start_master_and_slave.sh +sleep 15 + +# 设置主从强一致性关系 +echo "Setting up strong consistency replication..." + +redis-cli -p 9302 slaveof 127.0.0.1 9301 strong +sleep 1 +echo "Replication setup successful." + +# 执行 benchmark +echo "Running benchmark..." + +redis-benchmark -p 9301 -t set -r 100000 -n 100000 -c 500 --threads 4 +echo "Benchmark finished." + +# 打印日志信息 +echo -e "\n==== 主节点 INFO 日志 ====" +tail -n 150 ./pacifica_test/master/log/pika.INFO + +echo -e "\n==== 主节点 WARNING 日志 ====" +tail -n 150 ./pacifica_test/master/log/pika.WARNING + +echo -e "\n==== 从节点 INFO 日志 ====" +tail -n 150 ./pacifica_test/slave1/log/pika.INFO + +echo -e "\n==== 从节点 WARNING 日志 ====" +tail -n 150 ./pacifica_test/slave1/log/pika.WARNING \ No newline at end of file diff --git a/tests/integration/start_master_and_slave.sh b/tests/integration/start_master_and_slave.sh index d3d0f1257d..5517ade141 100755 --- a/tests/integration/start_master_and_slave.sh +++ b/tests/integration/start_master_and_slave.sh @@ -1,135 +1,135 @@ -#!/bin/bash -# This script is used by .github/workflows/pika.yml, Do not modify this file unless you know what you are doing. -# it's used to start pika master and slave, running path: build -cp ../conf/pika.conf ./pika_single.conf -cp ../conf/pika.conf ./pika_master.conf -cp ../conf/pika.conf ./pika_slave.conf -cp ../conf/pika.conf ./pika_rename.conf -cp ../conf/pika.conf ./pika_master_rename.conf -cp ../conf/pika.conf ./pika_slave_rename.conf -cp ../conf/pika.conf ./pika_acl_both_password.conf -cp ../conf/pika.conf ./pika_acl_only_admin_password.conf -cp ../conf/pika.conf ./pika_has_other_acl_user.conf -# Create folders for storing data on the primary and secondary nodes -mkdir master_data -mkdir slave_data -# Example Change the location for storing data on primary and secondary nodes in the configuration file -sed -i.bak \ - -e 's|databases : 1|databases : 2|' \ - -e 's|#daemonize : yes|daemonize : yes|' \ - -e 's|timeout : 60|timeout : 500|' ./pika_single.conf - -sed -i.bak \ - -e 's|databases : 1|databases : 2|' \ - -e 's|port : 9221|port : 9241|' \ - -e 's|log-path : ./log/|log-path : ./master_data/log/|' \ - -e 's|db-path : ./db/|db-path : ./master_data/db/|' \ - -e 's|dump-path : ./dump/|dump-path : ./master_data/dump/|' \ - -e 's|pidfile : ./pika.pid|pidfile : ./master_data/pika.pid|' \ - -e 's|db-sync-path : ./dbsync/|db-sync-path : ./master_data/dbsync/|' \ - -e 's|#daemonize : yes|daemonize : yes|' \ - -e 's|timeout : 60|timeout : 500|' ./pika_master.conf - -sed -i.bak \ - -e 's|databases : 1|databases : 2|' \ - -e 's|port : 9221|port : 9231|' \ - -e 's|log-path : ./log/|log-path : ./slave_data/log/|' \ - -e 's|db-path : ./db/|db-path : ./slave_data/db/|' \ - -e 's|dump-path : ./dump/|dump-path : ./slave_data/dump/|' \ - -e 's|pidfile : ./pika.pid|pidfile : ./slave_data/pika.pid|' \ - -e 's|db-sync-path : ./dbsync/|db-sync-path : ./slave_data/dbsync/|' \ - -e 's|#daemonize : yes|daemonize : yes|' \ - -e 's|timeout : 60|timeout : 500|' ./pika_slave.conf - -sed -i.bak \ - -e 's|# rename-command : FLUSHALL 360flushall|rename-command : FLUSHALL 360flushall|' \ - -e 's|# rename-command : FLUSHDB 360flushdb|rename-command : FLUSHDB 360flushdb|' \ - -e 's|databases : 1|databases : 2|' \ - -e 's|port : 9221|port : 9251|' \ - -e 's|log-path : ./log/|log-path : ./rename_data/log/|' \ - -e 's|db-path : ./db/|db-path : ./rename_data/db/|' \ - -e 's|dump-path : ./dump/|dump-path : ./rename_data/dump/|' \ - -e 's|pidfile : ./pika.pid|pidfile : ./rename_data/pika.pid|' \ - -e 's|db-sync-path : ./dbsync/|db-sync-path : ./rename_data/dbsync/|' \ - -e 's|#daemonize : yes|daemonize : yes|' \ - -e 's|timeout : 60|timeout : 500|' ./pika_rename.conf - -sed -i.bak \ - -e 's|requirepass :|requirepass : requirepass|' \ - -e 's|masterauth :|masterauth : requirepass|' \ - -e 's|# userpass :|userpass : userpass|' \ - -e 's|# userblacklist :|userblacklist : flushall,flushdb|' \ - -e 's|port : 9221|port : 9261|' \ - -e 's|log-path : ./log/|log-path : ./acl1_data/log/|' \ - -e 's|db-path : ./db/|db-path : ./acl1_data/db/|' \ - -e 's|dump-path : ./dump/|dump-path : ./acl1_data/dump/|' \ - -e 's|pidfile : ./pika.pid|pidfile : ./acl1_data/pika.pid|' \ - -e 's|db-sync-path : ./dbsync/|db-sync-path : ./acl1_data/dbsync/|' \ - -e 's|#daemonize : yes|daemonize : yes|' \ - -e 's|timeout : 60|timeout : 500|' ./pika_acl_both_password.conf - -sed -i.bak \ - -e 's|requirepass :|requirepass : requirepass|' \ - -e 's|masterauth :|masterauth : requirepass|' \ - -e 's|# userblacklist :|userblacklist : flushall,flushdb|' \ - -e 's|port : 9221|port : 9271|' \ - -e 's|log-path : ./log/|log-path : ./acl2_data/log/|' \ - -e 's|db-path : ./db/|db-path : ./acl2_data/db/|' \ - -e 's|dump-path : ./dump/|dump-path : ./acl2_data/dump/|' \ - -e 's|pidfile : ./pika.pid|pidfile : ./acl2_data/pika.pid|' \ - -e 's|db-sync-path : ./dbsync/|db-sync-path : ./acl2_data/dbsync/|' \ - -e 's|#daemonize : yes|daemonize : yes|' \ - -e 's|timeout : 60|timeout : 500|' ./pika_acl_only_admin_password.conf - -sed -i.bak \ - -e 's|requirepass :|requirepass : requirepass|' \ - -e 's|masterauth :|masterauth : requirepass|' \ - -e 's|# userpass :|userpass : userpass|' \ - -e 's|# userblacklist :|userblacklist : flushall,flushdb|' \ - -e 's|port : 9221|port : 9281|' \ - -e 's|log-path : ./log/|log-path : ./acl3_data/log/|' \ - -e 's|db-path : ./db/|db-path : ./acl3_data/db/|' \ - -e 's|dump-path : ./dump/|dump-path : ./acl3_data/dump/|' \ - -e 's|pidfile : ./pika.pid|pidfile : ./acl3_data/pika.pid|' \ - -e 's|db-sync-path : ./dbsync/|db-sync-path : ./acl3_data/dbsync/|' \ - -e 's|#daemonize : yes|daemonize : yes|' \ - -e 's|timeout : 60|timeout : 500|' ./pika_has_other_acl_user.conf -echo -e '\nuser : limit on >limitpass ~* +@all &*' >> ./pika_has_other_acl_user.conf - -sed -i '' \ - -e 's|# rename-command : FLUSHDB 360flushdb|rename-command : FLUSHDB 360flushdb|' \ - -e 's|port : 9221|port : 9291|' \ - -e 's|log-path : ./log/|log-path : ./master_rename_data/log/|' \ - -e 's|db-path : ./db/|db-path : ./master_rename_data/db/|' \ - -e 's|dump-path : ./dump/|dump-path : ./master_rename_data/dump/|' \ - -e 's|pidfile : ./pika.pid|pidfile : ./master_rename_data/pika.pid|' \ - -e 's|db-sync-path : ./dbsync/|db-sync-path : ./master_rename_data/dbsync/|' \ - -e 's|#daemonize : yes|daemonize : yes|' \ - -e 's|timeout : 60|timeout : 500|' ./pika_master_rename.conf - -sed -i '' \ - -e 's|# rename-command : FLUSHDB 360flushdb|rename-command : FLUSHDB 360flushdb|' \ - -e 's|port : 9221|port : 9301|' \ - -e 's|log-path : ./log/|log-path : ./slave_rename_data/log/|' \ - -e 's|db-path : ./db/|db-path : ./slave_rename_data/db/|' \ - -e 's|dump-path : ./dump/|dump-path : ./slave_rename_data/dump/|' \ - -e 's|pidfile : ./pika.pid|pidfile : ./slave_rename_data/pika.pid|' \ - -e 's|db-sync-path : ./dbsync/|db-sync-path : ./slave_rename_data/dbsync/|' \ - -e 's|#daemonize : yes|daemonize : yes|' \ - -e 's|timeout : 60|timeout : 500|' ./pika_slave_rename.conf - -# Start three nodes -./pika -c ./pika_single.conf -./pika -c ./pika_master.conf -./pika -c ./pika_slave.conf -./pika -c ./pika_rename.conf -./pika -c ./pika_acl_both_password.conf -./pika -c ./pika_acl_only_admin_password.conf -./pika -c ./pika_has_other_acl_user.conf -./pika -c ./pika_master_rename.conf -./pika -c ./pika_slave_rename.conf -#ensure both master and slave are ready -sleep 10 +# #!/bin/bash +# # This script is used by .github/workflows/pika.yml, Do not modify this file unless you know what you are doing. +# # it's used to start pika master and slave, running path: build +# cp ../conf/pika.conf ./pika_single.conf +# cp ../conf/pika.conf ./pika_master.conf +# cp ../conf/pika.conf ./pika_slave.conf +# cp ../conf/pika.conf ./pika_rename.conf +# cp ../conf/pika.conf ./pika_master_rename.conf +# cp ../conf/pika.conf ./pika_slave_rename.conf +# cp ../conf/pika.conf ./pika_acl_both_password.conf +# cp ../conf/pika.conf ./pika_acl_only_admin_password.conf +# cp ../conf/pika.conf ./pika_has_other_acl_user.conf +# # Create folders for storing data on the primary and secondary nodes +# mkdir master_data +# mkdir slave_data +# # Example Change the location for storing data on primary and secondary nodes in the configuration file +# sed -i.bak \ +# -e 's|databases : 1|databases : 2|' \ +# -e 's|#daemonize : yes|daemonize : yes|' \ +# -e 's|timeout : 60|timeout : 500|' ./pika_single.conf + +# sed -i.bak \ +# -e 's|databases : 1|databases : 2|' \ +# -e 's|port : 9221|port : 9241|' \ +# -e 's|log-path : ./log/|log-path : ./master_data/log/|' \ +# -e 's|db-path : ./db/|db-path : ./master_data/db/|' \ +# -e 's|dump-path : ./dump/|dump-path : ./master_data/dump/|' \ +# -e 's|pidfile : ./pika.pid|pidfile : ./master_data/pika.pid|' \ +# -e 's|db-sync-path : ./dbsync/|db-sync-path : ./master_data/dbsync/|' \ +# -e 's|#daemonize : yes|daemonize : yes|' \ +# -e 's|timeout : 60|timeout : 500|' ./pika_master.conf + +# sed -i.bak \ +# -e 's|databases : 1|databases : 2|' \ +# -e 's|port : 9221|port : 9231|' \ +# -e 's|log-path : ./log/|log-path : ./slave_data/log/|' \ +# -e 's|db-path : ./db/|db-path : ./slave_data/db/|' \ +# -e 's|dump-path : ./dump/|dump-path : ./slave_data/dump/|' \ +# -e 's|pidfile : ./pika.pid|pidfile : ./slave_data/pika.pid|' \ +# -e 's|db-sync-path : ./dbsync/|db-sync-path : ./slave_data/dbsync/|' \ +# -e 's|#daemonize : yes|daemonize : yes|' \ +# -e 's|timeout : 60|timeout : 500|' ./pika_slave.conf + +# sed -i.bak \ +# -e 's|# rename-command : FLUSHALL 360flushall|rename-command : FLUSHALL 360flushall|' \ +# -e 's|# rename-command : FLUSHDB 360flushdb|rename-command : FLUSHDB 360flushdb|' \ +# -e 's|databases : 1|databases : 2|' \ +# -e 's|port : 9221|port : 9251|' \ +# -e 's|log-path : ./log/|log-path : ./rename_data/log/|' \ +# -e 's|db-path : ./db/|db-path : ./rename_data/db/|' \ +# -e 's|dump-path : ./dump/|dump-path : ./rename_data/dump/|' \ +# -e 's|pidfile : ./pika.pid|pidfile : ./rename_data/pika.pid|' \ +# -e 's|db-sync-path : ./dbsync/|db-sync-path : ./rename_data/dbsync/|' \ +# -e 's|#daemonize : yes|daemonize : yes|' \ +# -e 's|timeout : 60|timeout : 500|' ./pika_rename.conf + +# sed -i.bak \ +# -e 's|requirepass :|requirepass : requirepass|' \ +# -e 's|masterauth :|masterauth : requirepass|' \ +# -e 's|# userpass :|userpass : userpass|' \ +# -e 's|# userblacklist :|userblacklist : flushall,flushdb|' \ +# -e 's|port : 9221|port : 9261|' \ +# -e 's|log-path : ./log/|log-path : ./acl1_data/log/|' \ +# -e 's|db-path : ./db/|db-path : ./acl1_data/db/|' \ +# -e 's|dump-path : ./dump/|dump-path : ./acl1_data/dump/|' \ +# -e 's|pidfile : ./pika.pid|pidfile : ./acl1_data/pika.pid|' \ +# -e 's|db-sync-path : ./dbsync/|db-sync-path : ./acl1_data/dbsync/|' \ +# -e 's|#daemonize : yes|daemonize : yes|' \ +# -e 's|timeout : 60|timeout : 500|' ./pika_acl_both_password.conf + +# sed -i.bak \ +# -e 's|requirepass :|requirepass : requirepass|' \ +# -e 's|masterauth :|masterauth : requirepass|' \ +# -e 's|# userblacklist :|userblacklist : flushall,flushdb|' \ +# -e 's|port : 9221|port : 9271|' \ +# -e 's|log-path : ./log/|log-path : ./acl2_data/log/|' \ +# -e 's|db-path : ./db/|db-path : ./acl2_data/db/|' \ +# -e 's|dump-path : ./dump/|dump-path : ./acl2_data/dump/|' \ +# -e 's|pidfile : ./pika.pid|pidfile : ./acl2_data/pika.pid|' \ +# -e 's|db-sync-path : ./dbsync/|db-sync-path : ./acl2_data/dbsync/|' \ +# -e 's|#daemonize : yes|daemonize : yes|' \ +# -e 's|timeout : 60|timeout : 500|' ./pika_acl_only_admin_password.conf + +# sed -i.bak \ +# -e 's|requirepass :|requirepass : requirepass|' \ +# -e 's|masterauth :|masterauth : requirepass|' \ +# -e 's|# userpass :|userpass : userpass|' \ +# -e 's|# userblacklist :|userblacklist : flushall,flushdb|' \ +# -e 's|port : 9221|port : 9281|' \ +# -e 's|log-path : ./log/|log-path : ./acl3_data/log/|' \ +# -e 's|db-path : ./db/|db-path : ./acl3_data/db/|' \ +# -e 's|dump-path : ./dump/|dump-path : ./acl3_data/dump/|' \ +# -e 's|pidfile : ./pika.pid|pidfile : ./acl3_data/pika.pid|' \ +# -e 's|db-sync-path : ./dbsync/|db-sync-path : ./acl3_data/dbsync/|' \ +# -e 's|#daemonize : yes|daemonize : yes|' \ +# -e 's|timeout : 60|timeout : 500|' ./pika_has_other_acl_user.conf +# echo -e '\nuser : limit on >limitpass ~* +@all &*' >> ./pika_has_other_acl_user.conf + +# sed -i '' \ +# -e 's|# rename-command : FLUSHDB 360flushdb|rename-command : FLUSHDB 360flushdb|' \ +# -e 's|port : 9221|port : 9291|' \ +# -e 's|log-path : ./log/|log-path : ./master_rename_data/log/|' \ +# -e 's|db-path : ./db/|db-path : ./master_rename_data/db/|' \ +# -e 's|dump-path : ./dump/|dump-path : ./master_rename_data/dump/|' \ +# -e 's|pidfile : ./pika.pid|pidfile : ./master_rename_data/pika.pid|' \ +# -e 's|db-sync-path : ./dbsync/|db-sync-path : ./master_rename_data/dbsync/|' \ +# -e 's|#daemonize : yes|daemonize : yes|' \ +# -e 's|timeout : 60|timeout : 500|' ./pika_master_rename.conf + +# sed -i '' \ +# -e 's|# rename-command : FLUSHDB 360flushdb|rename-command : FLUSHDB 360flushdb|' \ +# -e 's|port : 9221|port : 9301|' \ +# -e 's|log-path : ./log/|log-path : ./slave_rename_data/log/|' \ +# -e 's|db-path : ./db/|db-path : ./slave_rename_data/db/|' \ +# -e 's|dump-path : ./dump/|dump-path : ./slave_rename_data/dump/|' \ +# -e 's|pidfile : ./pika.pid|pidfile : ./slave_rename_data/pika.pid|' \ +# -e 's|db-sync-path : ./dbsync/|db-sync-path : ./slave_rename_data/dbsync/|' \ +# -e 's|#daemonize : yes|daemonize : yes|' \ +# -e 's|timeout : 60|timeout : 500|' ./pika_slave_rename.conf + +# # Start three nodes +# ./pika -c ./pika_single.conf +# ./pika -c ./pika_master.conf +# ./pika -c ./pika_slave.conf +# ./pika -c ./pika_rename.conf +# ./pika -c ./pika_acl_both_password.conf +# ./pika -c ./pika_acl_only_admin_password.conf +# ./pika -c ./pika_has_other_acl_user.conf +# ./pika -c ./pika_master_rename.conf +# ./pika -c ./pika_slave_rename.conf +# #ensure both master and slave are ready +# sleep 10 # 创建PacificA一致性测试的数据目录 mkdir -p pacifica_test/master