Skip to content

Commit 274b1b7

Browse files
Mixficsolwuxianrong
andauthored
feat: Add p99 and slow command count (OpenAtomFoundation#3042)
* Added time statistics for P99 and slow commands --------- Co-authored-by: wuxianrong <[email protected]>
1 parent 6c271c7 commit 274b1b7

13 files changed

Lines changed: 289 additions & 5 deletions

File tree

CMakeLists.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,24 @@ ExternalProject_Add(rocksdb
630630
make -j${CPU_CORE}
631631
)
632632

633+
ExternalProject_Add(prometheus_cpp
634+
URL
635+
https://github.com/jupp0r/prometheus-cpp/releases/download/v1.2.4/prometheus-cpp-with-submodules.tar.gz
636+
CMAKE_ARGS
637+
-DBUILD_SHARED_LIBS=OFF
638+
-DENABLE_PUSH=OFF
639+
-DENABLE_COMPRESSION=OFF
640+
-DCMAKE_INSTALL_LIBDIR=${INSTALL_LIBDIR}
641+
-DCMAKE_INSTALL_INCLUDEDIR=${INSTALL_INCLUDEDIR}
642+
BUILD_ALWAYS
643+
1
644+
BUILD_COMMAND
645+
make -j${CPU_CORE}
646+
)
647+
648+
set(PROMETHEUS_CPP_CORE_LIB ${INSTALL_LIBDIR}/libprometheus-cpp-core.a)
649+
set(PROMETHEUS_CPP_PULL_LIB ${INSTALL_LIBDIR}/libprometheus-cpp-pull.a)
650+
633651
ExternalProject_Add(rediscache
634652
URL
635653
https://github.com/pikiwidb/rediscache/archive/refs/tags/v1.0.7.tar.gz
@@ -822,6 +840,8 @@ target_link_libraries(${PROJECT_NAME}
822840
${LIB_PROTOBUF}
823841
${LIB_GFLAGS}
824842
${LIB_FMT}
843+
${PROMETHEUS_CPP_PULL_LIB}
844+
${PROMETHEUS_CPP_CORE_LIB}
825845
libsnappy.a
826846
libzstd.a
827847
liblz4.a

include/pika_admin.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,8 @@ class InfoCmd : public Cmd {
267267
kInfoAll,
268268
kInfoDebug,
269269
kInfoCommandStats,
270+
kInfoSlowCommand,
271+
kInfoCommandP99,
270272
kInfoCache
271273
};
272274
InfoCmd(const std::string& name, int arity, uint32_t flag) : Cmd(name, arity, flag) {}
@@ -294,6 +296,8 @@ class InfoCmd : public Cmd {
294296
const static std::string kRocksDBSection;
295297
const static std::string kDebugSection;
296298
const static std::string kCommandStatsSection;
299+
const static std::string kCommandP99Section;
300+
const static std::string kSlowCommandSection;
297301
const static std::string kCacheSection;
298302

299303
void DoInitial() override;
@@ -314,6 +318,8 @@ class InfoCmd : public Cmd {
314318
void InfoRocksDB(std::string& info);
315319
void InfoDebug(std::string& info);
316320
void InfoCommandStats(std::string& info);
321+
void InfoCommandP99(std::string& info);
322+
void InfoSlowCommand(std::string& info);
317323
void InfoCache(std::string& info, std::shared_ptr<DB> db);
318324

319325
std::string CacheStatusToString(int status);

include/pika_client_conn.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ class PikaClientConn : public net::RedisConn {
130130
std::shared_ptr<Cmd> DoCmd(const PikaCmdArgsType& argv, const std::string& opt,
131131
const std::shared_ptr<std::string>& resp_ptr, bool cache_miss_in_rtc);
132132

133-
void ProcessSlowlog(const PikaCmdArgsType& argv, std::shared_ptr<Cmd> c_ptr);
133+
void ProcessSlowlog(const PikaCmdArgsType& argv, std::shared_ptr<Cmd> c_ptr, const std::string& opt);
134134
void ProcessMonitor(const PikaCmdArgsType& argv);
135135

136136
void ExecRedisCmd(const PikaCmdArgsType& argv, std::shared_ptr<std::string>& resp_ptr, bool cache_miss_in_rtc);

include/pika_cmd_table_manager.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,17 @@
88

99
#include <shared_mutex>
1010
#include <thread>
11+
#include <prometheus/exposer.h>
12+
#include <prometheus/registry.h>
13+
#include <prometheus/counter.h>
14+
#include <prometheus/histogram.h>
1115

1216
#include "include/acl.h"
1317
#include "include/pika_command.h"
1418
#include "include/pika_data_distribution.h"
1519

20+
using namespace prometheus;
21+
1622
struct CommandStatistics {
1723
CommandStatistics() = default;
1824
CommandStatistics(const CommandStatistics& other) {
@@ -30,6 +36,7 @@ class PikaCmdTableManager {
3036
PikaCmdTableManager();
3137
virtual ~PikaCmdTableManager() = default;
3238
void InitCmdTable(void);
39+
void InitHistograms();
3340
void RenameCommand(const std::string before, const std::string after);
3441
std::shared_ptr<Cmd> GetCmd(const std::string& opt);
3542
bool CmdExist(const std::string& cmd) const;
@@ -42,6 +49,11 @@ class PikaCmdTableManager {
4249
* Info Commandstats used
4350
*/
4451
std::unordered_map<std::string, CommandStatistics>* GetCommandStatMap();
52+
std::unordered_map<std::string, CommandStatistics> GetSlowCommandCount();
53+
void UpdateSlowCommandCount(const std::string& opt);
54+
void ResetCommandCount();
55+
prometheus::Histogram& GetHistogram(const std::string& opt);
56+
prometheus::Family<prometheus::Histogram>* GetHistograms();
4557

4658
private:
4759
std::shared_ptr<Cmd> NewCommand(const std::string& opt);
@@ -60,5 +72,13 @@ class PikaCmdTableManager {
6072
* Info Commandstats used
6173
*/
6274
std::unordered_map<std::string, CommandStatistics> cmdstat_map_;
75+
std::unordered_map<std::string, CommandStatistics> slow_command_count_;
76+
std::thread reset_thread_;
77+
std::mutex command_mutex_;
78+
std::shared_mutex histograms_mutex_;
79+
std::shared_mutex slow_command_mutex_;
80+
std::shared_ptr<prometheus::Registry> prometheus_registry_;
81+
prometheus::Family<prometheus::Histogram>* histogram_family_;
82+
std::unordered_map<std::string, prometheus::Histogram*> histograms_;
6383
};
6484
#endif

src/pika_admin.cc

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -882,6 +882,8 @@ const std::string InfoCmd::kKeyspaceSection = "keyspace";
882882
const std::string InfoCmd::kDataSection = "data";
883883
const std::string InfoCmd::kRocksDBSection = "rocksdb";
884884
const std::string InfoCmd::kDebugSection = "debug";
885+
const std::string InfoCmd::kCommandP99Section = "commandp99";
886+
const std::string InfoCmd::kSlowCommandSection = "slowcommand";
885887
const std::string InfoCmd::kCommandStatsSection = "commandstats";
886888
const std::string InfoCmd::kCacheSection = "cache";
887889

@@ -967,6 +969,10 @@ void InfoCmd::DoInitial() {
967969
info_section_ = kInfoDebug;
968970
} else if (strcasecmp(argv_[1].data(), kCommandStatsSection.data()) == 0) {
969971
info_section_ = kInfoCommandStats;
972+
} else if (strcasecmp(argv_[1].data(), kCommandP99Section.data()) == 0) {
973+
info_section_ = kInfoCommandP99;
974+
} else if (strcasecmp(argv_[1].data(), kSlowCommandSection.data()) == 0) {
975+
info_section_ = kInfoSlowCommand;
970976
} else if (strcasecmp(argv_[1].data(), kCacheSection.data()) == 0) {
971977
info_section_ = kInfoCache;
972978
} else {
@@ -1008,6 +1014,10 @@ void InfoCmd::Do() {
10081014
info.append("\r\n");
10091015
InfoCommandStats(info);
10101016
info.append("\r\n");
1017+
InfoCommandP99(info);
1018+
info.append("\r\n");
1019+
InfoSlowCommand(info);
1020+
info.append("\r\n");
10111021
InfoCache(info, db_);
10121022
info.append("\r\n");
10131023
InfoCPU(info);
@@ -1051,6 +1061,12 @@ void InfoCmd::Do() {
10511061
case kInfoCommandStats:
10521062
InfoCommandStats(info);
10531063
break;
1064+
case kInfoCommandP99:
1065+
InfoCommandP99(info);
1066+
break;
1067+
case kInfoSlowCommand:
1068+
InfoSlowCommand(info);
1069+
break;
10541070
case kInfoCache:
10551071
InfoCache(info, db_);
10561072
break;
@@ -1481,6 +1497,73 @@ void InfoCmd::InfoDebug(std::string& info) {
14811497
g_pika_server->ServerStatus(&info);
14821498
}
14831499

1500+
void InfoCmd::InfoCommandP99(std::string& info) {
1501+
std::stringstream tmp_stream;
1502+
tmp_stream.precision(2);
1503+
tmp_stream.setf(std::ios::fixed);
1504+
tmp_stream << "# Commands P99" << "\r\n";
1505+
auto histogram_family = g_pika_cmd_table_manager->GetHistograms();
1506+
1507+
for (const auto& metric_family : histogram_family->Collect()) {
1508+
for (const auto& metric : metric_family.metric) {
1509+
std::string command_name;
1510+
1511+
for (const auto& label : metric.label) {
1512+
if (label.name == "command") {
1513+
command_name = label.value;
1514+
break;
1515+
}
1516+
}
1517+
1518+
double total_count = metric.histogram.sample_count;
1519+
1520+
if (command_name.empty()) {
1521+
tmp_stream << "Command: UNKNOWN\r\n";
1522+
} else {
1523+
tmp_stream << "Command: " << command_name << "\r\n";
1524+
}
1525+
1526+
double tp99_threshold = total_count * 0.99;
1527+
double tp999_threshold = total_count * 0.999;
1528+
double tp9999_threshold = total_count * 0.9999;
1529+
double tp99 = 0, tp999 = 0, tp9999 = 0;
1530+
1531+
for (const auto& bucket : metric.histogram.bucket) {
1532+
if (bucket.cumulative_count >= tp99_threshold && tp99 == 0) {
1533+
tp99 = bucket.upper_bound;
1534+
}
1535+
if (bucket.cumulative_count >= tp999_threshold && tp999 == 0) {
1536+
tp999 = bucket.upper_bound;
1537+
}
1538+
if (bucket.cumulative_count >= tp9999_threshold && tp9999 == 0) {
1539+
tp9999 = bucket.upper_bound;
1540+
break;
1541+
}
1542+
}
1543+
tmp_stream << "TP99 ms: " << tp99 << "\r\n";
1544+
tmp_stream << "TP999 ms: " << tp999 << "\r\n";
1545+
tmp_stream << "TP9999 ms: " << tp9999 << "\r\n";
1546+
tmp_stream << "----------------------\r\n";
1547+
}
1548+
}
1549+
1550+
info.append(tmp_stream.str());
1551+
}
1552+
1553+
void InfoCmd::InfoSlowCommand(std::string& info) {
1554+
std::stringstream tmp_stream;
1555+
tmp_stream.precision(2);
1556+
tmp_stream.setf(std::ios::fixed);
1557+
auto stats = g_pika_cmd_table_manager->GetSlowCommandCount();
1558+
tmp_stream << "# SlowCommand Count" << "\r\n";
1559+
for (auto iter : stats) {
1560+
if (iter.second.cmd_count != 0) {
1561+
tmp_stream << iter.first << ":slow_count=" << iter.second.cmd_count << "\r\n";
1562+
}
1563+
}
1564+
info.append(tmp_stream.str());
1565+
}
1566+
14841567
void InfoCmd::InfoCommandStats(std::string& info) {
14851568
std::stringstream tmp_stream;
14861569
tmp_stream.precision(2);

src/pika_client_conn.cc

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
#include <glog/logging.h>
88
#include <utility>
99
#include <vector>
10+
#include <prometheus/exposer.h>
11+
#include <prometheus/registry.h>
12+
#include <prometheus/counter.h>
13+
#include <prometheus/histogram.h>
1014

1115
#include "include/pika_admin.h"
1216
#include "include/pika_client_conn.h"
@@ -221,19 +225,21 @@ std::shared_ptr<Cmd> PikaClientConn::DoCmd(const PikaCmdArgsType& argv, const st
221225
c_ptr->Execute();
222226

223227
time_stat_->process_done_ts_ = pstd::NowMicros();
228+
g_pika_cmd_table_manager->GetHistogram(opt).Observe(time_stat_->total_time() / 1000);
224229
auto cmdstat_map = g_pika_cmd_table_manager->GetCommandStatMap();
225230
(*cmdstat_map)[opt].cmd_count.fetch_add(1);
226231
(*cmdstat_map)[opt].cmd_time_consuming.fetch_add(time_stat_->total_time());
227232

228233
if (g_pika_conf->slowlog_slower_than() >= 0) {
229-
ProcessSlowlog(argv, c_ptr);
234+
ProcessSlowlog(argv, c_ptr, opt);
230235
}
231236

232237
return c_ptr;
233238
}
234239

235-
void PikaClientConn::ProcessSlowlog(const PikaCmdArgsType& argv, std::shared_ptr<Cmd> c_ptr) {
240+
void PikaClientConn::ProcessSlowlog(const PikaCmdArgsType& argv, std::shared_ptr<Cmd> c_ptr, const std::string& opt) {
236241
if (time_stat_->total_time() > g_pika_conf->slowlog_slower_than()) {
242+
g_pika_cmd_table_manager->UpdateSlowCommandCount(opt);
237243
g_pika_server->SlowlogPushEntry(argv, time_stat_->start_ts() / 1000000, time_stat_->total_time());
238244
if (g_pika_conf->slowlog_write_errorlog()) {
239245
bool trim = false;

src/pika_cmd_table_manager.cc

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,31 @@
1414

1515
extern std::unique_ptr<PikaConf> g_pika_conf;
1616

17+
void PikaCmdTableManager::ResetCommandCount() {
18+
while (true) {
19+
std::this_thread::sleep_for(std::chrono::minutes(1));
20+
{
21+
std::lock_guard<std::mutex> lock(command_mutex_);
22+
slow_command_count_.clear();
23+
InitHistograms();
24+
}
25+
}
26+
}
27+
28+
void PikaCmdTableManager::InitHistograms() {
29+
histograms_.clear();
30+
prometheus_registry_ = std::make_shared<prometheus::Registry>();
31+
histogram_family_ = &prometheus::BuildHistogram()
32+
.Name("pika_command_duration_seconds")
33+
.Help("Execution time of Pika commands in seconds")
34+
.Register(*prometheus_registry_);
35+
}
36+
1737
PikaCmdTableManager::PikaCmdTableManager() {
1838
cmds_ = std::make_unique<CmdTable>();
1939
cmds_->reserve(300);
40+
InitHistograms();
41+
reset_thread_ = std::thread(&PikaCmdTableManager::ResetCommandCount, this);
2042
}
2143

2244
void PikaCmdTableManager::InitCmdTable(void) {
@@ -63,6 +85,49 @@ void PikaCmdTableManager::RenameCommand(const std::string before, const std::str
6385
}
6486
}
6587

88+
prometheus::Histogram& PikaCmdTableManager::GetHistogram(const std::string& opt) {
89+
{
90+
std::shared_lock<std::shared_mutex> read_lock(histograms_mutex_);
91+
auto it = histograms_.find(opt);
92+
if (it != histograms_.end()) {
93+
return *(it->second);
94+
}
95+
}
96+
std::unique_lock<std::shared_mutex> write_lock(histograms_mutex_);
97+
auto& new_histogram = histogram_family_->Add(
98+
{{"command", opt}},
99+
prometheus::Histogram::BucketBoundaries{0.5, 1, 2, 3, 5, 7, 10, 15, 20, 30, 40, 50, 65, 75, 85, 100, 125, 140, 150, 160, 175, 185, 200, 300, 400, 500, 750, 1000, 2000, 5000, 10000}
100+
);
101+
histograms_[opt] = &new_histogram;
102+
return new_histogram;
103+
}
104+
105+
prometheus::Family<prometheus::Histogram>* PikaCmdTableManager::GetHistograms() {
106+
return histogram_family_;
107+
}
108+
109+
void PikaCmdTableManager::UpdateSlowCommandCount(const std::string& opt) {
110+
{
111+
std::shared_lock<std::shared_mutex> read_lock(slow_command_mutex_);
112+
if (slow_command_count_.find(opt) != slow_command_count_.end()) {
113+
slow_command_count_[opt].cmd_count.fetch_add(1);
114+
return;
115+
}
116+
}
117+
118+
{
119+
std::unique_lock<std::shared_mutex> write_lock(slow_command_mutex_);
120+
slow_command_count_[opt];
121+
}
122+
123+
slow_command_count_[opt].cmd_count.fetch_add(1);
124+
}
125+
126+
std::unordered_map<std::string, CommandStatistics> PikaCmdTableManager::GetSlowCommandCount() {
127+
std::shared_lock<std::shared_mutex> lock(slow_command_mutex_);
128+
return slow_command_count_;
129+
}
130+
66131
std::unordered_map<std::string, CommandStatistics>* PikaCmdTableManager::GetCommandStatMap() {
67132
return &cmdstat_map_;
68133
}

tools/pika_exporter/config/info.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@ stats = true
55
cpu = true
66
replication = true
77
keyspace = true
8-
cache = true
9-
8+
cache = false
9+
commandp99 = true
10+
slowcommand = true
1011
execcount = false
1112
commandstats = false
1213
rocksdb = false

tools/pika_exporter/exporter/client.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ func (c *client) InfoNoneCommandList() (string, error) {
127127
"COMMAND_EXEC_COUNT": InfoConf.Execcount,
128128
"COMMANDSTATS": InfoConf.Commandstats,
129129
"ROCKSDB": InfoConf.Rocksdb,
130+
"CommandP99": InfoConf.CommandP99,
131+
"SlowCommand": InfoConf.SlowCommand,
130132
}
131133
for section, flag := range sectionsMap {
132134
if flag {
@@ -155,6 +157,8 @@ func (c *client) InfoAllCommandList() (string, error) {
155157
"COMMAND_EXEC_COUNT": InfoConf.Execcount,
156158
"COMMANDSTATS": InfoConf.Commandstats,
157159
"ROCKSDB": InfoConf.Rocksdb,
160+
"CommandP99": InfoConf.CommandP99,
161+
"SlowCommand": InfoConf.SlowCommand,
158162
}
159163
for section, flag := range sectionsMap {
160164
if flag {

tools/pika_exporter/exporter/conf.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ type InfoConfig struct {
2121
Keyspace bool `toml:"keyspace"`
2222
Execcount bool `toml:"execcount"`
2323
Commandstats bool `toml:"commandstats"`
24+
CommandP99 bool `toml:"commandp99"`
25+
SlowCommand bool `toml:"slowcommand"`
2426
Rocksdb bool `toml:"rocksdb"`
2527
Cache bool `toml:"cache"`
2628

0 commit comments

Comments
 (0)