Skip to content

Commit b44ce50

Browse files
committed
Fix the low qps bug
1 parent 3a3760a commit b44ce50

12 files changed

Lines changed: 167 additions & 153 deletions

conf/pika.conf

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -153,13 +153,6 @@ replication-num : 0
153153
# The default value of consensus-level is 0, which means this feature is not enabled.
154154
consensus-level : 0
155155

156-
# consensus-batch-size: The maximum number of items in a consensus batch.
157-
consensus-batch-size : 1000
158-
159-
# consensus-timeout: The timeout in milliseconds for waiting for a batch ACK from a slave.
160-
# Default: 1500
161-
consensus-timeout : 1500
162-
163156
# The Prefix of dump file's name.
164157
# All the files that generated by command "bgsave" will be name with this prefix.
165158
dump-prefix :
@@ -320,6 +313,11 @@ write-binlog : yes
320313
# Supported Units [K|M|G], binlog-file-size default unit is in [bytes] and the default value is 100M.
321314
binlog-file-size : 104857600
322315

316+
# The interval (in number of logs) for forcing a disk flush of the binlog.
317+
# A value of 1 means fsync for every log. A higher value improves performance at the cost of durability.
318+
# Default: 100
319+
binlog-fsync-interval : 100000
320+
323321
# Automatically triggers a small compaction according to statistics
324322
# Use the cache to store up to 'max-cache-statistic-keys' keys
325323
# If 'max-cache-statistic-keys' set to '0', that means turn off the statistics function

include/pika_client_conn.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include <bitset>
1010
#include <utility>
11+
#include <future>
1112

1213
#include "acl.h"
1314
#include "include/pika_command.h"
@@ -52,6 +53,13 @@ class PikaClientConn : public net::RedisConn {
5253
bool cache_miss_in_rtc_;
5354
};
5455

56+
struct ParallelTask {
57+
std::vector<std::shared_ptr<std::string>> resps;
58+
std::vector<std::future<void>> futures;
59+
std::atomic<size_t> completed_count{0};
60+
size_t total_tasks{0};
61+
};
62+
5563
struct TxnStateBitMask {
5664
public:
5765
static constexpr uint8_t Start = 0;
@@ -72,6 +80,7 @@ class PikaClientConn : public net::RedisConn {
7280
void BatchExecRedisCmd(const std::vector<net::RedisCmdArgsType>& argvs, bool cache_miss_in_rtc);
7381
int DealMessage(const net::RedisCmdArgsType& argv, std::string* response) override { return 0; }
7482
static void DoBackgroundTask(void* arg);
83+
static void ParallelExecRedisCmd(void* arg);
7584

7685
bool IsPubSub() { return is_pubsub_; }
7786
void SetIsPubSub(bool is_pubsub) { is_pubsub_ = is_pubsub; }

include/pika_conf.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -749,6 +749,8 @@ class PikaConf : public pstd::BaseConf {
749749
rsync_timeout_ms_.store(value);
750750
}
751751

752+
int binlog_fsync_interval() const;
753+
752754
void SetProtoMaxBulkLen(const int64_t value) {
753755
std::lock_guard l(rwlock_);
754756
TryPushDiffCommands("proto-max-bulk-len", std::to_string(value));
@@ -888,9 +890,6 @@ class PikaConf : public pstd::BaseConf {
888890
int ConfigRewriteSlaveOf();
889891
int ConfigRewriteReplicationID();
890892

891-
int consensus_batch_size() const;
892-
int consensus_timeout() const;
893-
894893
private:
895894
int port_ = 0;
896895
int slave_priority_ = 100;
@@ -1069,9 +1068,8 @@ class PikaConf : public pstd::BaseConf {
10691068
//Internal used metrics Persisted by pika.conf
10701069
std::unordered_set<std::string> internal_used_unfinished_full_sync_;
10711070

1072-
// Consensus configuration
1073-
int consensus_batch_size_;
1074-
int consensus_timeout_;
1071+
// Binlog fsync interval
1072+
int binlog_fsync_interval_;
10751073
};
10761074

10771075
#endif

include/pika_consensus.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ class ConsensusCoordinator {
294294
std::shared_mutex prepared_id__rwlock_;
295295
LogOffset prepared_id_ = LogOffset();
296296
std::shared_ptr<Log> logs_;
297+
int binlog_fsync_counter_ = 0;
297298
};
298299

299300
#endif // INCLUDE_PIKA_CONSENSUS_H_

include/pika_define.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -354,14 +354,6 @@ const int64_t kPoolSize = 1073741824;
354354
const std::string kBinlogPrefix = "write2file";
355355
const size_t kBinlogPrefixLen = 10;
356356

357-
/*
358-
* PIKA_BATCH_MAGIC: Core identifier for binlog batch processing.
359-
* - Master: Prefixes batched binlogs with this magic in SendBinlog
360-
* - Slave: Detects this magic in HandleBGWorkerWriteBinlog
361-
* to switch between batch and single-binlog parsing modes.
362-
*/
363-
const uint32_t PIKA_BATCH_MAGIC = 0x42544348; // "BTCH" in ASCII
364-
365357
const std::string kPikaMeta = "meta";
366358
const std::string kManifest = "manifest";
367359
const std::string kContext = "context";

include/pika_repl_bgworker.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class PikaReplBgWorker {
4242
net::RedisParser redis_parser_;
4343
std::string ip_port_;
4444
std::string db_name_;
45+
int binlog_fsync_counter_ = 0;
4546

4647
private:
4748
net::BGThread bg_thread_;

include/pika_rm.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,8 +245,15 @@ class PikaReplicaManager {
245245

246246
pstd::Mutex write_queue_mu_;
247247

248+
// db_name -> a queue of write task
249+
using DBWriteTaskQueue = std::map<std::string, std::queue<WriteTask>>;
250+
// ip:port -> a map of DBWriteTaskQueue
251+
using SlaveWriteTaskQueue = std::map<std::string, DBWriteTaskQueue>;
252+
248253
// every host owns a queue, the key is "ip + port"
249-
std::unordered_map<std::string, std::unordered_map<std::string, std::queue<std::pair<WriteTask, uint64_t>>>> write_queues_;
254+
SlaveWriteTaskQueue write_queues_;
255+
256+
// client for replica
250257
std::unique_ptr<PikaReplClient> pika_repl_client_;
251258
std::unique_ptr<PikaReplServer> pika_repl_server_;
252259
};

src/pika_client_conn.cc

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "net/src/dispatch_thread.h"
2020
#include "net/src/worker_thread.h"
2121
#include "src/pstd/include/scope_record_lock.h"
22+
#include <future>
2223

2324
#include "rocksdb/perf_context.h"
2425
#include "rocksdb/iostats_context.h"
@@ -357,14 +358,64 @@ void PikaClientConn::DoBackgroundTask(void* arg) {
357358
}
358359

359360
void PikaClientConn::BatchExecRedisCmd(const std::vector<net::RedisCmdArgsType>& argvs, bool cache_miss_in_rtc) {
360-
resp_num.store(static_cast<int32_t>(argvs.size()));
361-
for (const auto& argv : argvs) {
362-
std::shared_ptr<std::string> resp_ptr = std::make_shared<std::string>();
363-
resp_array.push_back(resp_ptr);
364-
ExecRedisCmd(argv, resp_ptr, cache_miss_in_rtc);
361+
if (argvs.empty()) {
362+
return;
365363
}
366-
time_stat_->process_done_ts_ = pstd::NowMicros();
367-
TryWriteResp();
364+
if (argvs.size() > 1) {
365+
auto task = std::make_shared<ParallelTask>();
366+
task->total_tasks = argvs.size();
367+
task->resps.resize(argvs.size());
368+
369+
for (size_t i = 0; i < argvs.size(); ++i) {
370+
task->resps[i] = std::make_shared<std::string>();
371+
std::promise<void> promise;
372+
task->futures.push_back(promise.get_future());
373+
374+
g_pika_server->ScheduleClientPool(&PikaClientConn::ParallelExecRedisCmd, new std::tuple(shared_from_this(), argvs[i], task, i, std::move(promise), cache_miss_in_rtc), false, false);
375+
}
376+
377+
for (auto& f : task->futures) {
378+
f.get();
379+
}
380+
381+
for (const auto& resp : task->resps) {
382+
WriteResp(*resp);
383+
}
384+
if (write_completed_cb_) {
385+
write_completed_cb_();
386+
write_completed_cb_ = nullptr;
387+
}
388+
NotifyEpoll(true);
389+
} else {
390+
resp_num.store(static_cast<int32_t>(argvs.size()));
391+
for (const auto& argv : argvs) {
392+
std::shared_ptr<std::string> resp_ptr = std::make_shared<std::string>();
393+
resp_array.push_back(resp_ptr);
394+
ExecRedisCmd(argv, resp_ptr, cache_miss_in_rtc);
395+
}
396+
time_stat_->process_done_ts_ = pstd::NowMicros();
397+
TryWriteResp();
398+
}
399+
}
400+
401+
void PikaClientConn::ParallelExecRedisCmd(void* arg) {
402+
auto* task_args = static_cast<std::tuple<std::shared_ptr<PikaClientConn>, net::RedisCmdArgsType, std::shared_ptr<ParallelTask>, size_t, std::promise<void>, bool>*>(arg);
403+
auto [conn, argv, task, index, promise, cache_miss_in_rtc] = std::move(*task_args);
404+
delete task_args;
405+
406+
std::string opt = argv[0];
407+
pstd::StringToLower(opt);
408+
if (opt == kClusterPrefix) {
409+
if (argv.size() >= 2) {
410+
opt += argv[1];
411+
pstd::StringToLower(opt);
412+
}
413+
}
414+
415+
std::shared_ptr<Cmd> cmd_ptr = conn->DoCmd(argv, opt, task->resps[index], cache_miss_in_rtc);
416+
*(task->resps[index]) = std::move(cmd_ptr->res().message());
417+
418+
promise.set_value();
368419
}
369420

370421
bool PikaClientConn::ReadCmdInCache(const net::RedisCmdArgsType& argv, const std::string& opt) {

src/pika_conf.cc

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,10 @@ int PikaConf::Load() {
528528
if (binlog_file_size_ < 1024 || static_cast<int64_t>(binlog_file_size_) > (1024LL * 1024 * 1024)) {
529529
binlog_file_size_ = 100 * 1024 * 1024; // 100M
530530
}
531+
GetConfInt("binlog-fsync-interval", &binlog_fsync_interval_);
532+
if (binlog_fsync_interval_ < 0) {
533+
binlog_fsync_interval_ = 0;
534+
}
531535
GetConfStr("pidfile", &pidfile_);
532536

533537
// db sync
@@ -707,11 +711,6 @@ int PikaConf::Load() {
707711
rsync_timeout_ms_.store(tmp_rsync_timeout_ms);
708712
}
709713

710-
consensus_batch_size_ = 100;
711-
consensus_timeout_ = 1000; // 1s
712-
713-
GetConfInt("consensus-batch-size", &consensus_batch_size_);
714-
GetConfInt("consensus-timeout", &consensus_timeout_);
715714
return ret;
716715
}
717716

@@ -918,6 +917,4 @@ std::vector<rocksdb::CompressionType> PikaConf::compression_per_level() {
918917
return types;
919918
}
920919

921-
int PikaConf::consensus_batch_size() const { return consensus_batch_size_; }
922-
923-
int PikaConf::consensus_timeout() const { return consensus_timeout_; }
920+
int PikaConf::binlog_fsync_interval() const { return binlog_fsync_interval_; }

src/pika_consensus.cc

Lines changed: 35 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -842,11 +842,12 @@ Status ConsensusCoordinator::PersistAppendBinlog(const std::shared_ptr<Cmd>& cmd
842842
return s;
843843
}
844844

845-
// Force flush to ensure data persistence on master
846-
s = stable_logger_->Logger()->Sync();
847-
if (!s.ok()) {
848-
LOG(WARNING) << "Failed to sync binlog to disk on master: " << s.ToString();
849-
return s;
845+
if (++binlog_fsync_counter_ % g_pika_conf->binlog_fsync_interval() == 0) {
846+
s = stable_logger_->Logger()->Sync();
847+
if (!s.ok()) {
848+
LOG(WARNING) << "Failed to sync binlog to disk on master: " << s.ToString();
849+
return s;
850+
}
850851
}
851852

852853
// If successful, append the log entry to the logs
@@ -998,33 +999,39 @@ Status ConsensusCoordinator::ApplyBinlog(const std::vector<Log::LogItem>& logs)
998999

9991000
Status ConsensusCoordinator::SendBinlog(std::shared_ptr<SlaveNode> slave_ptr, std::string db_name) {
10001001
std::vector<WriteTask> tasks;
1001-
LogOffset prev_offset = slave_ptr->sent_offset;
1002-
LOG(INFO) << "SendBinlog: logs_->LastOffset()=" << logs_->LastOffset().ToString()
1003-
<< ", slave_ptr->sent_offset=" << slave_ptr->sent_offset.ToString();
1004-
// Check if there are new log entries that need to be sent to the slave
1005-
if (logs_->LastOffset() >= slave_ptr->acked_offset) {
1006-
LOG(INFO) << "SendBinlog: logs_->Size()=" << logs_->Size();
1007-
// Find the index of the log entry corresponding to the slave's acknowledged offset
1008-
int index = logs_->FindOffset(slave_ptr->acked_offset);
1009-
LOG(INFO) << "SendBinlog: index=" << index;
1010-
if (index < logs_->Size()) {
1011-
for (int i = index; i < logs_->Size(); ++i) {
1012-
const Log::LogItem& item = logs_->At(i);
1013-
1014-
slave_ptr->SetLastSendTime(pstd::NowMicros());
1015-
1016-
RmNode rm_node(slave_ptr->Ip(), slave_ptr->Port(), slave_ptr->DBName(), slave_ptr->SessionId());
1017-
WriteTask task(rm_node, BinlogChip(item.offset, item.binlog_), prev_offset, GetCommittedId());
1018-
tasks.emplace_back(std::move(task));
1019-
1020-
prev_offset = item.offset;
1021-
slave_ptr->sent_offset = item.offset;
1022-
}
1002+
if (!g_pika_server->IsConsistency()) {
1003+
return Status::OK();
1004+
}
1005+
std::string ip = slave_ptr->Ip();
1006+
int port = slave_ptr->Port();
1007+
int32_t session_id = slave_ptr->SessionId();
1008+
1009+
LogOffset last_sent = slave_ptr->sent_offset;
1010+
if (logs_->LastOffset() > last_sent) {
1011+
int send_start_index = logs_->FindOffset(last_sent);
1012+
if (send_start_index < 0) {
1013+
LOG(WARNING) << "Binlog offset not found, maybe purged. last_sent: " << last_sent.ToString();
1014+
return Status::Corruption("cant find the file_num");
1015+
}
1016+
1017+
if (send_start_index < logs_->Size() && logs_->At(send_start_index).offset == last_sent) {
1018+
send_start_index++;
1019+
}
1020+
1021+
if (send_start_index < logs_->Size()) {
1022+
LogOffset prev_offset = send_start_index > 0 ? logs_->At(send_start_index - 1).offset : LogOffset();
1023+
const auto& item = logs_->At(send_start_index);
1024+
RmNode rm_node(ip, port, db_name, session_id);
1025+
WriteTask task(rm_node, BinlogChip(item.offset, item.binlog_), prev_offset, GetCommittedId());
1026+
tasks.emplace_back(std::move(task));
10231027
}
10241028
}
10251029

10261030
if (!tasks.empty()) {
1027-
g_pika_rm->ProduceWriteQueue(slave_ptr->Ip(), slave_ptr->Port(), db_name, tasks);
1031+
g_pika_rm->ProduceWriteQueue(ip, port, db_name, tasks);
1032+
slave_ptr->sent_offset = tasks.back().binlog_chip_.offset_;
1033+
LOG(INFO) << "SendBinlog tasks to queue, slave: " << ip << ":" << port << " tasks: " << tasks.size()
1034+
<< " new sent_offset: " << slave_ptr->sent_offset.ToString();
10281035
}
10291036
return Status::OK();
10301037
}

0 commit comments

Comments
 (0)