Skip to content

Commit 33ef7e2

Browse files
committed
feat:add big_key
1 parent fdc4e5d commit 33ef7e2

2 files changed

Lines changed: 95 additions & 46 deletions

File tree

tools/bigkey_analyzer/README.md

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
# Big Key Analyzer
22

3-
大key分析工具,用于分析PikiwiDB实例中的大key情况。本工具适用于unstable分支新的存储结构,支持单实例和多DB实例(db/0, db/1, db/2...)。
3+
大key分析工具,用于分析PikiwiDB实例中的大key情况。本工具适用于unstable分支新的存储结构,支持多种目录结构:
4+
- 单实例 RocksDB
5+
- 多DB实例 (db/0, db/1, db/2...)
6+
- 直接分区目录 (0/, 1/, 2/...)
7+
- **新增**: dbN/M 三层嵌套结构 (db0/0, db0/1, db1/0...)
48

59
## 功能特点
610

@@ -92,10 +96,10 @@ Options:
9296

9397
```
9498
===== Big Key Analysis =====
95-
Type Size Key TTL
96-
hash 1048576 user:profile:1001 -1
97-
zset 524288 ranking:global 3600
98-
string 262144 config:settings -1
99+
DB Partition Type Size Key TTL
100+
db0 1 hash 1048576 user:profile:1001 -1
101+
db0 2 zset 524288 ranking:global 3600
102+
db1 0 string 262144 config:settings -1
99103
...
100104
101105
===== Key Prefix Statistics =====

tools/bigkey_analyzer/bigkey_analyzer.cc

Lines changed: 86 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -72,20 +72,26 @@ struct KeyInfo {
7272
std::string key;
7373
int64_t size;
7474
int64_t ttl;
75+
std::string db_name;
76+
std::string partition;
7577

76-
KeyInfo() : type(""), key(""), size(0), ttl(-1) {}
78+
KeyInfo() : type(""), key(""), size(0), ttl(-1), db_name(""), partition("") {}
7779

78-
KeyInfo(const std::string& t, const std::string& k, int64_t s, int64_t tt)
79-
: type(t), key(k), size(s), ttl(tt) {}
80+
KeyInfo(const std::string& t, const std::string& k, int64_t s, int64_t tt,
81+
const std::string& db = "", const std::string& part = "")
82+
: type(t), key(k), size(s), ttl(tt), db_name(db), partition(part) {}
8083

81-
KeyInfo(std::string&& t, std::string&& k, int64_t s, int64_t tt)
82-
: type(std::move(t)), key(std::move(k)), size(s), ttl(tt) {}
84+
KeyInfo(std::string&& t, std::string&& k, int64_t s, int64_t tt,
85+
const std::string& db = "", const std::string& part = "")
86+
: type(std::move(t)), key(std::move(k)), size(s), ttl(tt), db_name(db), partition(part) {}
8387

84-
KeyInfo(const char* t, const std::string& k, int64_t s, int64_t tt)
85-
: type(t), key(k), size(s), ttl(tt) {}
88+
KeyInfo(const char* t, const std::string& k, int64_t s, int64_t tt,
89+
const std::string& db = "", const std::string& part = "")
90+
: type(t), key(k), size(s), ttl(tt), db_name(db), partition(part) {}
8691

87-
KeyInfo(const char* t, std::string&& k, int64_t s, int64_t tt)
88-
: type(t), key(std::move(k)), size(s), ttl(tt) {}
92+
KeyInfo(const char* t, std::string&& k, int64_t s, int64_t tt,
93+
const std::string& db = "", const std::string& part = "")
94+
: type(t), key(std::move(k)), size(s), ttl(tt), db_name(db), partition(part) {}
8995

9096
bool operator<(const KeyInfo& other) const {
9197
return size > other.size; // Sort in descending order by size
@@ -188,7 +194,8 @@ bool ParseArgs(int argc, char* argv[], Config& config) {
188194

189195
// Analyze strings in MetaCF
190196
void AnalyzeStrings(rocksdb::DB* db, rocksdb::ColumnFamilyHandle* meta_handle,
191-
std::vector<KeyInfo>& key_infos, const Config& config) {
197+
std::vector<KeyInfo>& key_infos, const Config& config,
198+
const std::string& db_name, const std::string& partition) {
192199
std::cout << "Analyzing strings..." << std::endl;
193200

194201
int64_t curtime;
@@ -236,7 +243,7 @@ void AnalyzeStrings(rocksdb::DB* db, rocksdb::ColumnFamilyHandle* meta_handle,
236243
if (size >= config.min_size) {
237244
std::string display_key = ReplaceAll(user_key, "\n", "\\n");
238245
display_key = ReplaceAll(display_key, " ", "\\x20");
239-
key_infos.emplace_back("string", std::move(display_key), size, ttl);
246+
key_infos.emplace_back("string", std::move(display_key), size, ttl, db_name, partition);
240247
}
241248
}
242249

@@ -248,7 +255,8 @@ void AnalyzeStrings(rocksdb::DB* db, rocksdb::ColumnFamilyHandle* meta_handle,
248255
// Analyze hashes
249256
void AnalyzeHashes(rocksdb::DB* db, rocksdb::ColumnFamilyHandle* meta_handle,
250257
rocksdb::ColumnFamilyHandle* data_handle,
251-
std::vector<KeyInfo>& key_infos, const Config& config) {
258+
std::vector<KeyInfo>& key_infos, const Config& config,
259+
const std::string& db_name, const std::string& partition) {
252260
std::cout << "Analyzing hashes..." << std::endl;
253261

254262
int64_t curtime;
@@ -323,15 +331,16 @@ void AnalyzeHashes(rocksdb::DB* db, rocksdb::ColumnFamilyHandle* meta_handle,
323331
std::string user_key = DecodeUserKey(entry.first);
324332
std::string display_key = ReplaceAll(user_key, "\n", "\\n");
325333
display_key = ReplaceAll(display_key, " ", "\\x20");
326-
key_infos.emplace_back("hash", std::move(display_key), size, std::get<1>(entry.second));
334+
key_infos.emplace_back("hash", std::move(display_key), size, std::get<1>(entry.second), db_name, partition);
327335
}
328336
}
329337
}
330338

331339
// Analyze sets
332340
void AnalyzeSets(rocksdb::DB* db, rocksdb::ColumnFamilyHandle* meta_handle,
333341
rocksdb::ColumnFamilyHandle* data_handle,
334-
std::vector<KeyInfo>& key_infos, const Config& config) {
342+
std::vector<KeyInfo>& key_infos, const Config& config,
343+
const std::string& db_name, const std::string& partition) {
335344
std::cout << "Analyzing sets..." << std::endl;
336345

337346
int64_t curtime;
@@ -398,7 +407,7 @@ void AnalyzeSets(rocksdb::DB* db, rocksdb::ColumnFamilyHandle* meta_handle,
398407
std::string user_key = DecodeUserKey(entry.first);
399408
std::string display_key = ReplaceAll(user_key, "\n", "\\n");
400409
display_key = ReplaceAll(display_key, " ", "\\x20");
401-
key_infos.emplace_back("set", std::move(display_key), size, std::get<1>(entry.second));
410+
key_infos.emplace_back("set", std::move(display_key), size, std::get<1>(entry.second), db_name, partition);
402411
}
403412
}
404413
}
@@ -407,7 +416,8 @@ void AnalyzeSets(rocksdb::DB* db, rocksdb::ColumnFamilyHandle* meta_handle,
407416
void AnalyzeZsets(rocksdb::DB* db, rocksdb::ColumnFamilyHandle* meta_handle,
408417
rocksdb::ColumnFamilyHandle* data_handle,
409418
rocksdb::ColumnFamilyHandle* score_handle,
410-
std::vector<KeyInfo>& key_infos, const Config& config) {
419+
std::vector<KeyInfo>& key_infos, const Config& config,
420+
const std::string& db_name, const std::string& partition) {
411421
std::cout << "Analyzing zsets..." << std::endl;
412422

413423
int64_t curtime;
@@ -497,15 +507,16 @@ void AnalyzeZsets(rocksdb::DB* db, rocksdb::ColumnFamilyHandle* meta_handle,
497507
std::string user_key = DecodeUserKey(entry.first);
498508
std::string display_key = ReplaceAll(user_key, "\n", "\\n");
499509
display_key = ReplaceAll(display_key, " ", "\\x20");
500-
key_infos.emplace_back("zset", std::move(display_key), size, std::get<1>(entry.second));
510+
key_infos.emplace_back("zset", std::move(display_key), size, std::get<1>(entry.second), db_name, partition);
501511
}
502512
}
503513
}
504514

505515
// Analyze lists
506516
void AnalyzeLists(rocksdb::DB* db, rocksdb::ColumnFamilyHandle* meta_handle,
507517
rocksdb::ColumnFamilyHandle* data_handle,
508-
std::vector<KeyInfo>& key_infos, const Config& config) {
518+
std::vector<KeyInfo>& key_infos, const Config& config,
519+
const std::string& db_name, const std::string& partition) {
509520
std::cout << "Analyzing lists..." << std::endl;
510521

511522
int64_t curtime;
@@ -574,7 +585,7 @@ void AnalyzeLists(rocksdb::DB* db, rocksdb::ColumnFamilyHandle* meta_handle,
574585
std::string user_key = DecodeUserKey(entry.first);
575586
std::string display_key = ReplaceAll(user_key, "\n", "\\n");
576587
display_key = ReplaceAll(display_key, " ", "\\x20");
577-
key_infos.emplace_back("list", std::move(display_key), size, std::get<1>(entry.second));
588+
key_infos.emplace_back("list", std::move(display_key), size, std::get<1>(entry.second), db_name, partition);
578589
}
579590
}
580591
}
@@ -620,7 +631,8 @@ void GeneratePrefixStats(const std::vector<KeyInfo>& key_infos, const std::strin
620631
}
621632

622633
// Analyze a single database instance
623-
void AnalyzeSingleDB(const std::string& db_path, std::vector<KeyInfo>& key_infos, const Config& config) {
634+
void AnalyzeSingleDB(const std::string& db_path, std::vector<KeyInfo>& key_infos, const Config& config,
635+
const std::string& db_name, const std::string& partition) {
624636
rocksdb::DBOptions db_options;
625637
db_options.create_if_missing = false;
626638

@@ -654,24 +666,24 @@ void AnalyzeSingleDB(const std::string& db_path, std::vector<KeyInfo>& key_infos
654666

655667
// Analyze each type
656668
if (config.type_filter == "all" || config.type_filter == "strings") {
657-
AnalyzeStrings(db, handles[storage::kMetaCF], key_infos, config);
669+
AnalyzeStrings(db, handles[storage::kMetaCF], key_infos, config, db_name, partition);
658670
}
659671

660672
if (config.type_filter == "all" || config.type_filter == "hashes") {
661-
AnalyzeHashes(db, handles[storage::kMetaCF], handles[storage::kHashesDataCF], key_infos, config);
673+
AnalyzeHashes(db, handles[storage::kMetaCF], handles[storage::kHashesDataCF], key_infos, config, db_name, partition);
662674
}
663675

664676
if (config.type_filter == "all" || config.type_filter == "sets") {
665-
AnalyzeSets(db, handles[storage::kMetaCF], handles[storage::kSetsDataCF], key_infos, config);
677+
AnalyzeSets(db, handles[storage::kMetaCF], handles[storage::kSetsDataCF], key_infos, config, db_name, partition);
666678
}
667679

668680
if (config.type_filter == "all" || config.type_filter == "zsets") {
669681
AnalyzeZsets(db, handles[storage::kMetaCF], handles[storage::kZsetsDataCF],
670-
handles[storage::kZsetsScoreCF], key_infos, config);
682+
handles[storage::kZsetsScoreCF], key_infos, config, db_name, partition);
671683
}
672684

673685
if (config.type_filter == "all" || config.type_filter == "lists") {
674-
AnalyzeLists(db, handles[storage::kMetaCF], handles[storage::kListsDataCF], key_infos, config);
686+
AnalyzeLists(db, handles[storage::kMetaCF], handles[storage::kListsDataCF], key_infos, config, db_name, partition);
675687
}
676688

677689
// Cleanup
@@ -702,33 +714,61 @@ int main(int argc, char *argv[]){
702714
}
703715

704716
// Check if this is a single DB or multiple DB instances
705-
std::vector<std::string> db_paths;
717+
std::vector<std::tuple<std::string, std::string, std::string>> db_paths; // (path, db_name, partition)
706718

707719
// First, check if db_path itself is a valid RocksDB
708720
std::string test_path = config.db_path;
709721
if (DirectoryExists(test_path + "/CURRENT")) {
710722
// This is a single database instance
711-
db_paths.push_back(test_path);
723+
db_paths.push_back(std::make_tuple(test_path, "", ""));
712724
std::cout << "Detected single database instance" << std::endl;
713725
} else {
714-
// Check if each subdirectory is a valid RocksDB (direct subdirectories like 0/, 1/, 2/)
715-
for (int db_index = 0; db_index < 1000; db_index++) { // 防止无限循环,设置上限
716-
std::string db_inst_path = config.db_path + "/" + std::to_string(db_index);
717-
if (DirectoryExists(db_inst_path) && DirectoryExists(db_inst_path + "/CURRENT")) {
718-
db_paths.push_back(db_inst_path);
719-
} else if (db_index > 0 && !DirectoryExists(db_inst_path)) {
720-
// 如果目录不存在且已找到至少一个DB,则认为已到达末尾
726+
// 尝试检测dbN/M/格式 (如 db0/1, db0/2, db1/0 等)
727+
bool found_dbn_format = false;
728+
for (int db_index = 0; db_index < 1000; db_index++) {
729+
std::string db_name = "db" + std::to_string(db_index);
730+
std::string db_dir = config.db_path + "/" + db_name;
731+
732+
if (DirectoryExists(db_dir)) {
733+
// 检查这个db下的所有分区子目录
734+
bool found_partitions = false;
735+
for (int partition = 0; partition < 1000; partition++) {
736+
std::string partition_path = db_dir + "/" + std::to_string(partition);
737+
if (DirectoryExists(partition_path) && DirectoryExists(partition_path + "/CURRENT")) {
738+
db_paths.push_back(std::make_tuple(partition_path, db_name, std::to_string(partition)));
739+
found_partitions = true;
740+
found_dbn_format = true;
741+
} else if (partition > 0 && !DirectoryExists(partition_path) && found_partitions) {
742+
// 当前partition不存在且已找到至少一个partition,认为已到达该db的末尾
743+
break;
744+
}
745+
}
746+
} else if (db_index > 0 && found_dbn_format) {
747+
// 当前db不存在且已找到至少一个db,认为已到达末尾
721748
break;
722749
}
723750
}
724751

725-
// 如果上面的检测失败,尝试经典的db/N格式
752+
// 如果没有找到dbN/M格式,尝试检测直接的分区目录格式 (如 0/, 1/, 2/)
753+
if (db_paths.empty()) {
754+
for (int db_index = 0; db_index < 1000; db_index++) {
755+
std::string db_inst_path = config.db_path + "/" + std::to_string(db_index);
756+
if (DirectoryExists(db_inst_path) && DirectoryExists(db_inst_path + "/CURRENT")) {
757+
db_paths.push_back(std::make_tuple(db_inst_path, "", std::to_string(db_index)));
758+
} else if (db_index > 0 && !DirectoryExists(db_inst_path) && !db_paths.empty()) {
759+
// 如果目录不存在且已找到至少一个DB,则认为已到达末尾
760+
break;
761+
}
762+
}
763+
}
764+
765+
// 尝试经典的db/N格式
726766
if (db_paths.empty()) {
727767
int db_index = 0;
728768
while (true) {
729769
std::string db_inst_path = config.db_path + "/db/" + std::to_string(db_index);
730770
if (DirectoryExists(db_inst_path) && DirectoryExists(db_inst_path + "/CURRENT")) {
731-
db_paths.push_back(db_inst_path);
771+
db_paths.push_back(std::make_tuple(db_inst_path, "db", std::to_string(db_index)));
732772
db_index++;
733773
} else {
734774
break;
@@ -738,16 +778,19 @@ int main(int argc, char *argv[]){
738778

739779
if (db_paths.empty()) {
740780
std::cerr << "Error: No valid database found at " << config.db_path << std::endl;
741-
std::cerr << "Checked for single instance, direct subdirectories (0, 1, 2...), and db/0, db/1, ... directories" << std::endl;
781+
std::cerr << "Checked for single instance, dbN/M format, direct subdirectories (0, 1, 2...), and db/0, db/1, ... directories" << std::endl;
742782
return 1;
743783
}
744784

745785
std::cout << "Detected " << db_paths.size() << " database instances" << std::endl;
746786
}
747787

748788
// Analyze each database instance
749-
for (const auto& db_path : db_paths) {
750-
AnalyzeSingleDB(db_path, key_infos, config);
789+
for (const auto& db_info : db_paths) {
790+
const std::string& db_path = std::get<0>(db_info);
791+
const std::string& db_name = std::get<1>(db_info);
792+
const std::string& partition = std::get<2>(db_info);
793+
AnalyzeSingleDB(db_path, key_infos, config, db_name, partition);
751794
}
752795

753796
// Sort keys by size
@@ -760,10 +803,12 @@ int main(int argc, char *argv[]){
760803

761804
// Output results
762805
*out << "===== Big Key Analysis =====\n";
763-
*out << "Type\tSize\tKey\tTTL\n";
806+
*out << "DB\tPartition\tType\tSize\tKey\tTTL\n";
764807

765808
for (const auto& info : key_infos) {
766-
*out << info.type << "\t" << info.size << "\t" << info.key << "\t" << info.ttl << "\n";
809+
*out << info.db_name << "\t" << info.partition << "\t"
810+
<< info.type << "\t" << info.size << "\t"
811+
<< info.key << "\t" << info.ttl << "\n";
767812
}
768813

769814
// Generate prefix statistics if requested

0 commit comments

Comments
 (0)