From b7a0f38cb96740b71c379ca2b61df270512d6f85 Mon Sep 17 00:00:00 2001 From: chejinge Date: Wed, 24 Dec 2025 17:35:47 +0800 Subject: [PATCH 01/17] feat:add bigkey --- tools/CMakeLists.txt | 3 +- tools/bigkey_analyzer/CMakeLists.txt | 46 ++ tools/bigkey_analyzer/README.md | 122 ++++ tools/bigkey_analyzer/bigkey_analyzer.cc | 690 +++++++++++++++++++++++ 4 files changed, 860 insertions(+), 1 deletion(-) create mode 100644 tools/bigkey_analyzer/CMakeLists.txt create mode 100644 tools/bigkey_analyzer/README.md create mode 100644 tools/bigkey_analyzer/bigkey_analyzer.cc diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 4c90e9745c..bb944b0179 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1,8 +1,9 @@ add_subdirectory(./aof_to_pika) add_subdirectory(./benchmark_client) +add_subdirectory(./bigkey_analyzer) add_subdirectory(./binlog_sender) add_subdirectory(./manifest_generator) add_subdirectory(./rdb_to_pika) add_subdirectory(./pika_to_txt) add_subdirectory(./txt_to_pika) -add_subdirectory(./pika-port/pika_port_3) \ No newline at end of file +add_subdirectory(./pika-port/pika_port_3) diff --git a/tools/bigkey_analyzer/CMakeLists.txt b/tools/bigkey_analyzer/CMakeLists.txt new file mode 100644 index 0000000000..df4b672064 --- /dev/null +++ b/tools/bigkey_analyzer/CMakeLists.txt @@ -0,0 +1,46 @@ +cmake_minimum_required(VERSION 3.18) + +project(bigkey_analyzer) + +set(CMAKE_CXX_STANDARD 17) + +# Add the bigkey_analyzer executable +add_executable(bigkey_analyzer + bigkey_analyzer.cc +) + +# Include directories +target_include_directories(bigkey_analyzer PRIVATE + ${PROJECT_SOURCE_DIR}/../../include + ${PROJECT_SOURCE_DIR}/../../src + ${ROCKSDB_INCLUDE_DIR} + ${INSTALL_INCLUDEDIR} +) + +# Link libraries +target_link_libraries(bigkey_analyzer + storage + ${ROCKSDB_LIBRARY} + ${GLOG_LIBRARY} + ${LIB_GFLAGS} + ${LIB_FMT} + libsnappy.a + libzstd.a + liblz4.a + libz.a + ${LIBUNWIND_LIBRARY} + ${JEMALLOC_LIBRARY} +) + +# Add dependencies +add_dependencies(bigkey_analyzer + rocksdb + glog + gflags + storage +) + +# Installation +install(TARGETS bigkey_analyzer + RUNTIME DESTINATION bin +) diff --git a/tools/bigkey_analyzer/README.md b/tools/bigkey_analyzer/README.md new file mode 100644 index 0000000000..9b7341aa6d --- /dev/null +++ b/tools/bigkey_analyzer/README.md @@ -0,0 +1,122 @@ +# BigKey Analyzer + +BigKey Analyzer 是一个离线分析工具,用于检测和分析 PikiwiDB 数据库中的大键(Big Keys)。该工具直接读取 RocksDB 数据文件,无需启动数据库服务,可以帮助运维人员识别可能影响系统性能的大键。 + +## 功能特性 + +- 直接读取 RocksDB 数据文件,无需服务端运行 +- 支持所有 PikiwiDB 数据类型:strings, hashes, sets, zsets, lists +- 按键大小排序显示结果 +- 支持设置最小键大小阈值,只显示超过阈值的键 +- 支持限制显示结果数量(Top N) +- 按前缀分析键分布情况 +- 可输出到文件 + +## 编译 + +```bash +# 确保在 PikiwiDB 根目录下 +mkdir -p build && cd build + +# 带上工具编译参数 +cmake .. -DUSE_PIKA_TOOLS=ON +make bigkey_analyzer +``` + +## 使用方法 + +```bash +./bigkey_analyzer [OPTIONS] +``` + +### 选项 + +- `--min-size=SIZE`:只显示大于 SIZE 字节的键 +- `--top=N`:只显示前 N 个最大的键 +- `--prefix-stat`:显示按前缀分组的统计信息 +- `--prefix-delimiter=C`:设置前缀分隔符(默认为 ":") +- `--type=TYPE`:只分析指定类型,可选值:strings|hashes|lists|sets|zsets|all +- `--output=FILE`:输出结果到文件 +- `--help`:显示帮助信息 + +### 示例 + +分析数据库中所有键: +```bash +./bigkey_analyzer /path/to/db_directory +``` + +只显示大于 1MB 的键: +```bash +./bigkey_analyzer --min-size=1048576 /path/to/db_directory +``` + +只显示前 10 个最大的键: +```bash +./bigkey_analyzer --top=10 /path/to/db_directory +``` + +按前缀统计键分布: +```bash +./bigkey_analyzer --prefix-stat /path/to/db_directory +``` + +使用自定义前缀分隔符: +```bash +./bigkey_analyzer --prefix-stat --prefix-delimiter="." /path/to/db_directory +``` + +只分析字符串类型: +```bash +./bigkey_analyzer --type=strings /path/to/db_directory +``` + +输出到文件: +```bash +./bigkey_analyzer --output=result.txt /path/to/db_directory +``` + +## 输出格式 + +基本输出格式: +``` +Type Size Key TTL +``` + +- `Type`:键类型(string, hash, list, set, zset) +- `Size`:键占用的总字节数(包括元数据) +- `Key`:键名称 +- `TTL`:剩余生存时间(秒),-1 表示无过期时间 + +前缀统计输出格式: +``` +Prefix Count TotalSize AvgSize +``` + +- `Prefix`:键前缀 +- `Count`:该前缀下的键数量 +- `TotalSize`:该前缀下所有键的总大小(字节) +- `AvgSize`:该前缀下键的平均大小(字节) + +## 提示 + +1. 对于非常大的数据库,建议先使用 `--min-size` 设置一个较大的阈值(如 1MB)来过滤小键。 +2. 使用 `--prefix-stat` 可以帮助识别特定前缀下的键分布情况,有助于发现问题模块。 +3. 大键可能导致性能问题,可以考虑以下解决方案: + - 拆分大的 hash, set, zset 为多个小的 + - 使用适当的过期策略 + - 使用压缩算法减小值的大小 + +## 常见问题 + +1. **"Error: Database directory does not exist"** + - 确保提供了正确的数据库路径 + - 数据库路径通常包含 strings, hashes, sets, zsets, lists 子目录 + +2. **"Error opening X database"** + - 确保数据库文件未被锁定(如数据库正在运行) + - 检查是否有足够的文件访问权限 + +3. **显示的键大小与内存使用不匹配** + - 此工具计算的是键在存储引擎中的总大小,包括元数据 + - 实际内存使用可能因内存分配和 RocksDB 缓存策略而有所不同 diff --git a/tools/bigkey_analyzer/bigkey_analyzer.cc b/tools/bigkey_analyzer/bigkey_analyzer.cc new file mode 100644 index 0000000000..689510702f --- /dev/null +++ b/tools/bigkey_analyzer/bigkey_analyzer.cc @@ -0,0 +1,690 @@ +// Copyright (c) 2025-present, PikiwiDB Project +// Licensed under the BSD-style license found in the LICENSE file in the root directory of this source tree. +// This source code is also available under the terms of the GNU General Public License, version 3. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "storage/storage.h" +#include "rocksdb/options.h" +#include "rocksdb/db.h" +#include "rocksdb/env.h" +#include "rocksdb/iterator.h" +#include "rocksdb/slice.h" +#include "rocksdb/status.h" + +// Utility function to check if a directory exists +bool DirectoryExists(const std::string& path) { + struct stat st; + return stat(path.c_str(), &st) == 0 && S_ISDIR(st.st_mode); +} + +// Replace special characters for consistent display +std::string ReplaceAll(std::string str, const std::string& from, const std::string& to) { + size_t start_pos = 0; + while ((start_pos = str.find(from, start_pos)) != std::string::npos) { + str.replace(start_pos, from.length(), to); + start_pos += to.length(); // Handles case where 'to' is a substring of 'from' + } + return str; +} + +// Print usage information +void PrintUsage() { + std::cout << "Usage: bigkey_analyzer [OPTIONS] " << std::endl; + std::cout << "Options:" << std::endl; + std::cout << " --min-size=SIZE Only show keys larger than SIZE bytes" << std::endl; + std::cout << " --top=N Only show top N largest keys" << std::endl; + std::cout << " --prefix-stat Show statistics by key prefix" << std::endl; + std::cout << " --prefix-delimiter=C Character used to delimit prefix (default: ':')" << std::endl; + std::cout << " --type=TYPE Only analyze specific type (strings|hashes|lists|sets|zsets|all)" << std::endl; + std::cout << " --output=FILE Write output to file instead of stdout" << std::endl; + std::cout << " --help Display this help message" << std::endl; +} + +// Data structure to hold key information +struct KeyInfo { + std::string type; + std::string key; + int64_t size; + int64_t ttl; + + // Default constructor + KeyInfo() : type(""), key(""), size(0), ttl(-1) {} + + // Constructor with const references + KeyInfo(const std::string& t, const std::string& k, int64_t s, int64_t tt) + : type(t), key(k), size(s), ttl(tt) {} + + // Constructor with rvalue references for better move semantics + KeyInfo(std::string&& t, std::string&& k, int64_t s, int64_t tt) + : type(std::move(t)), key(std::move(k)), size(s), ttl(tt) {} + + // Mixed constructor (const char* literals are common) + KeyInfo(const char* t, const std::string& k, int64_t s, int64_t tt) + : type(t), key(k), size(s), ttl(tt) {} + + KeyInfo(const char* t, std::string&& k, int64_t s, int64_t tt) + : type(t), key(std::move(k)), size(s), ttl(tt) {} + + bool operator<(const KeyInfo& other) const { + return size > other.size; // Sort in descending order by size + } +}; + +// Data structure for prefix statistics +struct PrefixStat { + size_t count = 0; + int64_t total_size = 0; + + void Add(int64_t size) { + count++; + total_size += size; + } +}; + +// Configuration for the analyzer +struct Config { + std::string db_path; + int64_t min_size = 0; + int top_n = -1; + bool prefix_stat = false; + std::string prefix_delimiter = ":"; + std::string type_filter = "all"; + std::string output_file; +}; + +// Parse command line arguments +bool ParseArgs(int argc, char* argv[], Config& config) { + if (argc < 2) { + PrintUsage(); + return false; + } + + static struct option long_options[] = { + {"min-size", required_argument, 0, 'm'}, + {"top", required_argument, 0, 't'}, + {"prefix-stat", no_argument, 0, 'p'}, + {"prefix-delimiter", required_argument, 0, 'd'}, + {"type", required_argument, 0, 'y'}, + {"output", required_argument, 0, 'o'}, + {"help", no_argument, 0, 'h'}, + {0, 0, 0, 0} + }; + + int opt; + int option_index = 0; + + // Set default values + config.min_size = 0; + config.top_n = -1; + config.prefix_stat = false; + config.prefix_delimiter = ":"; + config.type_filter = "all"; + + while ((opt = getopt_long(argc, argv, "m:t:pd:y:o:h", long_options, &option_index)) != -1) { + switch (opt) { + case 'm': + config.min_size = std::stoll(optarg); + break; + case 't': + config.top_n = std::stoi(optarg); + break; + case 'p': + config.prefix_stat = true; + break; + case 'd': + config.prefix_delimiter = optarg; + break; + case 'y': + config.type_filter = optarg; + break; + case 'o': + config.output_file = optarg; + break; + case 'h': + PrintUsage(); + return false; + default: + PrintUsage(); + return false; + } + } + + if (optind >= argc) { + std::cerr << "Error: Missing database path" << std::endl; + PrintUsage(); + return false; + } + + config.db_path = argv[optind]; + + // Validate the database path + if (!DirectoryExists(config.db_path)) { + std::cerr << "Error: Database directory does not exist: " << config.db_path << std::endl; + return false; + } + + return true; +} + +// Analyze strings database +void AnalyzeStrings(const std::string& path, std::vector& key_infos, const Config& config) { + if (!DirectoryExists(path)) { + std::cerr << "Skipping strings: directory not found: " << path << std::endl; + return; + } + + std::cout << "Analyzing strings database at " << path << "..." << std::endl; + + rocksdb::Options options; + rocksdb::DB* db; + rocksdb::Status status = rocksdb::DB::OpenForReadOnly(options, path, &db); + + if (!status.ok()) { + std::cerr << "Error opening strings database: " << status.ToString() << std::endl; + return; + } + + int64_t curtime; + db->GetEnv()->GetCurrentTime(&curtime).ok(); + + rocksdb::ReadOptions read_options; + auto iter = db->NewIterator(read_options); + + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + const std::string& key = iter->key().ToString(); + const std::string& value = iter->value().ToString(); + + // Extract timestamp from value (if exists) + int32_t ttl = -1; + int64_t ts = 0; + + if (value.size() >= 12) { // At least has timestamp + // The format in storage engine may be different from the old one + // We're simplifying here - real implementation would use ParsedStringsValue + ts = *reinterpret_cast(value.data() + value.size() - 12); + if (ts != 0) { + int64_t diff = ts - curtime; + ttl = diff > 0 ? diff : -1; + } + } + + int64_t size = key.size() + value.size(); + + if (size >= config.min_size) { + std::string display_key = ReplaceAll(key, "\n", "\\n"); + display_key = ReplaceAll(display_key, " ", "\\x20"); + key_infos.emplace_back("string", display_key, size, ttl); + } + } + + delete iter; + delete db; +} + +// Analyze hashes database +void AnalyzeHashes(const std::string& path, std::vector& key_infos, const Config& config) { + if (!DirectoryExists(path)) { + std::cerr << "Skipping hashes: directory not found: " << path << std::endl; + return; + } + + std::cout << "Analyzing hashes database at " << path << "..." << std::endl; + + rocksdb::Options options; + rocksdb::DB* db; + rocksdb::Status status = rocksdb::DB::OpenForReadOnly(options, path, &db); + + if (!status.ok()) { + std::cerr << "Error opening hashes database: " << status.ToString() << std::endl; + return; + } + + int64_t curtime; + db->GetEnv()->GetCurrentTime(&curtime).ok(); + + rocksdb::ReadOptions read_options; + auto iter = db->NewIterator(read_options); + + // Using an unordered_map to group hash fields by key + std::unordered_map> hash_sizes; // key -> (size, ttl) + + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + const std::string& key = iter->key().ToString(); + const std::string& value = iter->value().ToString(); + + // Check if this is a metadata key or field + if (key.find('m') == 0) { + // This is a metadata key (handle ttl, etc) + std::string hash_key = key.substr(1); // Strip the 'm' prefix + int64_t ttl = -1; + + // Extract ttl from metadata - simplified, real implementation would use ParsedHashesMetaValue + if (value.size() >= 12) { + int64_t ts = *reinterpret_cast(value.data() + value.size() - 12); + if (ts != 0) { + ttl = ts - curtime; + if (ttl <= 0) ttl = -1; + } + } + + // Initialize size to metadata size + hash_sizes[hash_key] = std::make_pair(key.size() + value.size(), ttl); + } else if (key.find('f') == 0) { + // This is a field key + size_t separator = key.find('|'); + if (separator != std::string::npos) { + std::string hash_key = key.substr(1, separator - 1); // Extract the hash key + std::string field = key.substr(separator + 1); // Extract the field name + + // Add field size to hash size + auto it = hash_sizes.find(hash_key); + if (it != hash_sizes.end()) { + it->second.first += key.size() + value.size(); + } else { + // If we encounter a field before metadata, initialize with default ttl + hash_sizes[hash_key] = std::make_pair(key.size() + value.size(), -1); + } + } + } + } + + // Add hash keys to the result + for (const auto& entry : hash_sizes) { + if (entry.second.first >= config.min_size) { + std::string display_key = ReplaceAll(entry.first, "\n", "\\n"); + display_key = ReplaceAll(display_key, " ", "\\x20"); + key_infos.emplace_back("hash", display_key, entry.second.first, entry.second.second); + } + } + + delete iter; + delete db; +} + +// Analyze sets database +void AnalyzeSets(const std::string& path, std::vector& key_infos, const Config& config) { + if (!DirectoryExists(path)) { + std::cerr << "Skipping sets: directory not found: " << path << std::endl; + return; + } + + std::cout << "Analyzing sets database at " << path << "..." << std::endl; + + rocksdb::Options options; + rocksdb::DB* db; + rocksdb::Status status = rocksdb::DB::OpenForReadOnly(options, path, &db); + + if (!status.ok()) { + std::cerr << "Error opening sets database: " << status.ToString() << std::endl; + return; + } + + int64_t curtime; + db->GetEnv()->GetCurrentTime(&curtime).ok(); + + rocksdb::ReadOptions read_options; + auto iter = db->NewIterator(read_options); + + // Using an unordered_map to group set members by key + std::unordered_map> set_sizes; // key -> (size, ttl) + + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + const std::string& key = iter->key().ToString(); + const std::string& value = iter->value().ToString(); + + // Check if this is a metadata key or member + if (key.find('m') == 0) { + // This is a metadata key + std::string set_key = key.substr(1); // Strip the 'm' prefix + int64_t ttl = -1; + + // Extract ttl from metadata - simplified, real implementation would use ParsedSetsMetaValue + if (value.size() >= 12) { + int64_t ts = *reinterpret_cast(value.data() + value.size() - 12); + if (ts != 0) { + ttl = ts - curtime; + if (ttl <= 0) ttl = -1; + } + } + + // Initialize size to metadata size + set_sizes[set_key] = std::make_pair(key.size() + value.size(), ttl); + } else if (key.find('s') == 0) { + // This is a member key + size_t separator = key.find('|'); + if (separator != std::string::npos) { + std::string set_key = key.substr(1, separator - 1); // Extract the set key + std::string member = key.substr(separator + 1); // Extract the member name + + // Add member size to set size + auto it = set_sizes.find(set_key); + if (it != set_sizes.end()) { + it->second.first += key.size() + value.size(); + } else { + // If we encounter a member before metadata, initialize with default ttl + set_sizes[set_key] = std::make_pair(key.size() + value.size(), -1); + } + } + } + } + + // Add set keys to the result + for (const auto& entry : set_sizes) { + if (entry.second.first >= config.min_size) { + std::string display_key = ReplaceAll(entry.first, "\n", "\\n"); + display_key = ReplaceAll(display_key, " ", "\\x20"); + key_infos.emplace_back("set", display_key, entry.second.first, entry.second.second); + } + } + + delete iter; + delete db; +} + +// Analyze zsets database +void AnalyzeZsets(const std::string& path, std::vector& key_infos, const Config& config) { + if (!DirectoryExists(path)) { + std::cerr << "Skipping zsets: directory not found: " << path << std::endl; + return; + } + + std::cout << "Analyzing zsets database at " << path << "..." << std::endl; + + rocksdb::Options options; + rocksdb::DB* db; + rocksdb::Status status = rocksdb::DB::OpenForReadOnly(options, path, &db); + + if (!status.ok()) { + std::cerr << "Error opening zsets database: " << status.ToString() << std::endl; + return; + } + + int64_t curtime; + db->GetEnv()->GetCurrentTime(&curtime).ok(); + + rocksdb::ReadOptions read_options; + auto iter = db->NewIterator(read_options); + + // Using an unordered_map to group zset members by key + std::unordered_map> zset_sizes; // key -> (size, ttl) + + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + const std::string& key = iter->key().ToString(); + const std::string& value = iter->value().ToString(); + + // Check if this is a metadata key, score key or data key + if (key.find('m') == 0) { + // This is a metadata key + std::string zset_key = key.substr(1); // Strip the 'm' prefix + int64_t ttl = -1; + + // Extract ttl from metadata - simplified, real implementation would use ParsedZSetsMetaValue + if (value.size() >= 12) { + int64_t ts = *reinterpret_cast(value.data() + value.size() - 12); + if (ts != 0) { + ttl = ts - curtime; + if (ttl <= 0) ttl = -1; + } + } + + // Initialize size to metadata size + zset_sizes[zset_key] = std::make_pair(key.size() + value.size(), ttl); + } else if (key.find('z') == 0 || key.find('s') == 0) { + // This is a score key or data key + size_t separator = key.find('|'); + if (separator != std::string::npos) { + std::string zset_key = key.substr(1, separator - 1); // Extract the zset key + + // Add member size to zset size + auto it = zset_sizes.find(zset_key); + if (it != zset_sizes.end()) { + it->second.first += key.size() + value.size(); + } else { + // If we encounter a member before metadata, initialize with default ttl + zset_sizes[zset_key] = std::make_pair(key.size() + value.size(), -1); + } + } + } + } + + // Add zset keys to the result + for (const auto& entry : zset_sizes) { + if (entry.second.first >= config.min_size) { + std::string display_key = ReplaceAll(entry.first, "\n", "\\n"); + display_key = ReplaceAll(display_key, " ", "\\x20"); + key_infos.emplace_back("zset", display_key, entry.second.first, entry.second.second); + } + } + + delete iter; + delete db; +} + +// Analyze lists database +void AnalyzeLists(const std::string& path, std::vector& key_infos, const Config& config) { + if (!DirectoryExists(path)) { + std::cerr << "Skipping lists: directory not found: " << path << std::endl; + return; + } + + std::cout << "Analyzing lists database at " << path << "..." << std::endl; + + rocksdb::Options options; + rocksdb::DB* db; + rocksdb::Status status = rocksdb::DB::OpenForReadOnly(options, path, &db); + + if (!status.ok()) { + std::cerr << "Error opening lists database: " << status.ToString() << std::endl; + return; + } + + int64_t curtime; + db->GetEnv()->GetCurrentTime(&curtime).ok(); + + rocksdb::ReadOptions read_options; + auto iter = db->NewIterator(read_options); + + // Using an unordered_map to group list items by key + std::unordered_map> list_sizes; // key -> (size, ttl) + + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + const std::string& key = iter->key().ToString(); + const std::string& value = iter->value().ToString(); + + // Check if this is a metadata key or item key + if (key.find('m') == 0) { + // This is a metadata key + std::string list_key = key.substr(1); // Strip the 'm' prefix + int64_t ttl = -1; + + // Extract ttl from metadata - simplified, real implementation would use ParsedListsMetaValue + if (value.size() >= 12) { + int64_t ts = *reinterpret_cast(value.data() + value.size() - 12); + if (ts != 0) { + ttl = ts - curtime; + if (ttl <= 0) ttl = -1; + } + } + + // Initialize size to metadata size + list_sizes[list_key] = std::make_pair(key.size() + value.size(), ttl); + } else if (key.find('l') == 0) { + // This is an item key + size_t separator = key.find('|'); + if (separator != std::string::npos) { + std::string list_key = key.substr(1, separator - 1); // Extract the list key + + // Add item size to list size + auto it = list_sizes.find(list_key); + if (it != list_sizes.end()) { + it->second.first += key.size() + value.size(); + } else { + // If we encounter an item before metadata, initialize with default ttl + list_sizes[list_key] = std::make_pair(key.size() + value.size(), -1); + } + } + } + } + + // Add list keys to the result + for (const auto& entry : list_sizes) { + if (entry.second.first >= config.min_size) { + std::string display_key = ReplaceAll(entry.first, "\n", "\\n"); + display_key = ReplaceAll(display_key, " ", "\\x20"); + key_infos.emplace_back("list", display_key, entry.second.first, entry.second.second); + } + } + + delete iter; + delete db; +} + +// Get the prefix of a key +std::string GetKeyPrefix(const std::string& key, const std::string& delimiter) { + size_t pos = key.find(delimiter); + if (pos != std::string::npos) { + return key.substr(0, pos); + } + return key; // Return the entire key if no delimiter found +} + +// Generate prefix statistics +void GeneratePrefixStats(const std::vector& key_infos, const std::string& delimiter, std::ostream& out) { + std::unordered_map prefix_stats; + + for (const auto& info : key_infos) { + std::string prefix = GetKeyPrefix(info.key, delimiter); + prefix_stats[prefix].Add(info.size); + } + + // Convert to vector for sorting + std::vector> sorted_stats; + for (const auto& entry : prefix_stats) { + sorted_stats.emplace_back(entry); + } + + // Sort by total size in descending order + std::sort(sorted_stats.begin(), sorted_stats.end(), + [](const auto& a, const auto& b) { + return a.second.total_size > b.second.total_size; + }); + + // Output header + out << "\n===== Key Prefix Statistics =====\n"; + out << "Prefix\tCount\tTotal Size\tAvg Size\n"; + + // Output stats + for (const auto& entry : sorted_stats) { + double avg_size = static_cast(entry.second.total_size) / entry.second.count; + out << entry.first << "\t" + << entry.second.count << "\t" + << entry.second.total_size << "\t" + << avg_size << "\n"; + } +} + +int main(int argc, char* argv[]) { + // Parse command line arguments + Config config; + if (!ParseArgs(argc, argv, config)) { + return 1; + } + + // Vector to store key information + std::vector key_infos; + + // Create output stream + std::unique_ptr file_out; + std::ostream* out = &std::cout; + + if (!config.output_file.empty()) { + file_out = std::make_unique(config.output_file); + if (!file_out->is_open()) { + std::cerr << "Error opening output file: " << config.output_file << std::endl; + return 1; + } + out = file_out.get(); + } + + // Analyze each database type + if (config.type_filter == "all" || config.type_filter == "strings") { + std::string path = config.db_path + "/strings"; + AnalyzeStrings(path, key_infos, config); + } + + if (config.type_filter == "all" || config.type_filter == "hashes") { + std::string path = config.db_path + "/hashes"; + AnalyzeHashes(path, key_infos, config); + } + + if (config.type_filter == "all" || config.type_filter == "sets") { + std::string path = config.db_path + "/sets"; + AnalyzeSets(path, key_infos, config); + } + + if (config.type_filter == "all" || config.type_filter == "zsets") { + std::string path = config.db_path + "/zsets"; + AnalyzeZsets(path, key_infos, config); + } + + if (config.type_filter == "all" || config.type_filter == "lists") { + std::string path = config.db_path + "/lists"; + AnalyzeLists(path, key_infos, config); + } + + // Sort keys by size (largest first) + std::sort(key_infos.begin(), key_infos.end()); + + // Limit to top N if requested + if (config.top_n > 0 && config.top_n < static_cast(key_infos.size())) { + key_infos.resize(config.top_n); + } + + // Output results + *out << "===== Big Key Analysis =====\n"; + *out << "Type\tSize\tKey\tTTL\n"; + + for (const auto& info : key_infos) { + *out << info.type << " " << info.size << " " << info.key << " " << info.ttl << "\n"; + } + + // Generate prefix statistics if requested + if (config.prefix_stat) { + GeneratePrefixStats(key_infos, config.prefix_delimiter, *out); + } + + // Output summary + *out << "\n===== Summary =====\n"; + *out << "Total keys analyzed: " << key_infos.size() << "\n"; + + // Count by type + std::unordered_map type_counts; + std::unordered_map type_sizes; + + for (const auto& info : key_infos) { + type_counts[info.type]++; + type_sizes[info.type] += info.size; + } + + *out << "Keys by type:\n"; + for (const auto& entry : type_counts) { + double avg_size = static_cast(type_sizes[entry.first]) / entry.second; + double mb_size = static_cast(type_sizes[entry.first]) / (1024 * 1024); + + *out << " " << entry.first << ": " << entry.second << " keys, " + << mb_size << " MB total, " + << avg_size << " bytes avg\n"; + } + + return 0; +} From 419a5935f116f286717412d058aa7e8407acd005 Mon Sep 17 00:00:00 2001 From: chejinge Date: Wed, 24 Dec 2025 19:33:09 +0800 Subject: [PATCH 02/17] fix:cmakelist --- tools/bigkey_analyzer/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/bigkey_analyzer/CMakeLists.txt b/tools/bigkey_analyzer/CMakeLists.txt index df4b672064..424dfb004b 100644 --- a/tools/bigkey_analyzer/CMakeLists.txt +++ b/tools/bigkey_analyzer/CMakeLists.txt @@ -17,6 +17,12 @@ target_include_directories(bigkey_analyzer PRIVATE ${INSTALL_INCLUDEDIR} ) +# Link directories +target_link_directories(bigkey_analyzer PRIVATE + ${INSTALL_LIBDIR_64} + ${INSTALL_LIBDIR} +) + # Link libraries target_link_libraries(bigkey_analyzer storage From 145a3d80883e3113a8411eef0f121e5169e0be6d Mon Sep 17 00:00:00 2001 From: chejinge Date: Wed, 24 Dec 2025 19:54:43 +0800 Subject: [PATCH 03/17] fix:cmakelist --- tools/bigkey_analyzer/bigkey_analyzer.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/bigkey_analyzer/bigkey_analyzer.cc b/tools/bigkey_analyzer/bigkey_analyzer.cc index 689510702f..f64e09b4e6 100644 --- a/tools/bigkey_analyzer/bigkey_analyzer.cc +++ b/tools/bigkey_analyzer/bigkey_analyzer.cc @@ -30,7 +30,7 @@ bool DirectoryExists(const std::string& path) { // Replace special characters for consistent display std::string ReplaceAll(std::string str, const std::string& from, const std::string& to) { size_t start_pos = 0; - while ((start_pos = str.find(from, start_pos)) != std::string::npos) { + while((start_pos = str.find(from, start_pos)) != std::string::npos) { str.replace(start_pos, from.length(), to); start_pos += to.length(); // Handles case where 'to' is a substring of 'from' } @@ -284,7 +284,6 @@ void AnalyzeHashes(const std::string& path, std::vector& key_infos, con size_t separator = key.find('|'); if (separator != std::string::npos) { std::string hash_key = key.substr(1, separator - 1); // Extract the hash key - std::string field = key.substr(separator + 1); // Extract the field name // Add field size to hash size auto it = hash_sizes.find(hash_key); @@ -364,7 +363,6 @@ void AnalyzeSets(const std::string& path, std::vector& key_infos, const size_t separator = key.find('|'); if (separator != std::string::npos) { std::string set_key = key.substr(1, separator - 1); // Extract the set key - std::string member = key.substr(separator + 1); // Extract the member name // Add member size to set size auto it = set_sizes.find(set_key); @@ -593,7 +591,7 @@ void GeneratePrefixStats(const std::vector& key_infos, const std::strin } } -int main(int argc, char* argv[]) { +int main(int argc, char *argv[]){ // Parse command line arguments Config config; if (!ParseArgs(argc, argv, config)) { From 08921352ad21126320d2c77cda9d71d758875a50 Mon Sep 17 00:00:00 2001 From: chejinge Date: Wed, 24 Dec 2025 20:14:03 +0800 Subject: [PATCH 04/17] fix:file bug --- src/pstd/src/env.cc | 54 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/src/pstd/src/env.cc b/src/pstd/src/env.cc index 7dadf924ea..ccedd00065 100644 --- a/src/pstd/src/env.cc +++ b/src/pstd/src/env.cc @@ -188,35 +188,63 @@ bool DeleteDirIfExist(const std::string& path) { return !(IsDir(path) == 0 && DeleteDir(path) != 0); } -uint64_t Du(const std::string& path) { +uuint64_t Du(const std::string& path) { uint64_t sum = 0; try { if (!filesystem::exists(path)) { return 0; } - if (filesystem::is_symlink(path)) { - filesystem::path symlink_path = filesystem::read_symlink(path); - sum = Du(symlink_path); - } else if (filesystem::is_directory(path)) { + + // 用 error_code 避免异常在老实现上导致问题 + std::error_code ec; + + if (filesystem::is_symlink(path, ec) && !ec) { + filesystem::path symlink_path = filesystem::read_symlink(path, ec); + if (!ec) { + sum = Du(symlink_path.string()); + } + } else if (filesystem::is_directory(path, ec) && !ec) { for (const auto& entry : filesystem::directory_iterator(path)) { - if (entry.is_symlink()) { - sum += Du(filesystem::read_symlink(entry.path())); - } else if (entry.is_directory()) { - sum += Du(entry.path()); - } else if (entry.is_regular_file()) { - sum += entry.file_size(); + auto st = entry.symlink_status(ec); + if (ec) { + ec.clear(); + continue; } + + if (filesystem::is_symlink(st)) { + auto p = filesystem::read_symlink(entry.path(), ec); + if (!ec) { + sum += Du(p.string()); + } else { + ec.clear(); + } + } else if (filesystem::is_directory(st)) { + sum += Du(entry.path().string()); + } else if (filesystem::is_regular_file(st)) { + auto sz = filesystem::file_size(entry.path(), ec); + if (!ec) { + sum += sz; + } else { + ec.clear(); + } + } + } + } else if (filesystem::is_regular_file(path, ec) && !ec) { + auto sz = filesystem::file_size(path, ec); + if (!ec) { + sum = sz; } - } else if (filesystem::is_regular_file(path)) { - sum = filesystem::file_size(path); } } catch (const filesystem::filesystem_error& ex) { LOG(WARNING) << "Error accessing path: " << ex.what(); + } catch (const std::exception& ex) { + LOG(WARNING) << "Error accessing path: " << ex.what(); } return sum; } + uint64_t NowMicros() { auto now = std::chrono::system_clock::now(); return std::chrono::duration_cast(now.time_since_epoch()).count(); From 516d1b854b5fd3b350790e65206e2871f186e47c Mon Sep 17 00:00:00 2001 From: chejinge Date: Wed, 24 Dec 2025 20:24:22 +0800 Subject: [PATCH 05/17] fix --- src/pstd/src/env.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pstd/src/env.cc b/src/pstd/src/env.cc index ccedd00065..ea4aa5db98 100644 --- a/src/pstd/src/env.cc +++ b/src/pstd/src/env.cc @@ -188,7 +188,7 @@ bool DeleteDirIfExist(const std::string& path) { return !(IsDir(path) == 0 && DeleteDir(path) != 0); } -uuint64_t Du(const std::string& path) { +uint64_t Du(const std::string& path) { uint64_t sum = 0; try { if (!filesystem::exists(path)) { From 6a7979531b7913b30fc29b0538e03e8de28295b2 Mon Sep 17 00:00:00 2001 From: chejinge Date: Thu, 25 Dec 2025 10:09:38 +0800 Subject: [PATCH 06/17] fix --- src/pstd/src/env.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pstd/src/env.cc b/src/pstd/src/env.cc index ea4aa5db98..b406276f40 100644 --- a/src/pstd/src/env.cc +++ b/src/pstd/src/env.cc @@ -195,7 +195,6 @@ uint64_t Du(const std::string& path) { return 0; } - // 用 error_code 避免异常在老实现上导致问题 std::error_code ec; if (filesystem::is_symlink(path, ec) && !ec) { From 24689d7dc4e47e5d6f1f97acb0339934d8c432eb Mon Sep 17 00:00:00 2001 From: chejinge Date: Thu, 25 Dec 2025 18:29:29 +0800 Subject: [PATCH 07/17] fix --- tools/bigkey_analyzer/bigkey_analyzer.cc | 391 ++++++++++++++--------- 1 file changed, 247 insertions(+), 144 deletions(-) diff --git a/tools/bigkey_analyzer/bigkey_analyzer.cc b/tools/bigkey_analyzer/bigkey_analyzer.cc index f64e09b4e6..7be27e3cc4 100644 --- a/tools/bigkey_analyzer/bigkey_analyzer.cc +++ b/tools/bigkey_analyzer/bigkey_analyzer.cc @@ -20,6 +20,8 @@ #include "rocksdb/iterator.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" +#include "storage/src/base_data_key_format.h" +#include "storage/src/base_meta_value_format.h" // Utility function to check if a directory exists bool DirectoryExists(const std::string& path) { @@ -240,9 +242,15 @@ void AnalyzeHashes(const std::string& path, std::vector& key_infos, con std::cout << "Analyzing hashes database at " << path << "..." << std::endl; - rocksdb::Options options; + // Open database with column families + rocksdb::DBOptions db_options; + std::vector column_families; + column_families.emplace_back(rocksdb::kDefaultColumnFamilyName, rocksdb::ColumnFamilyOptions()); + column_families.emplace_back("data_cf", rocksdb::ColumnFamilyOptions()); + + std::vector handles; rocksdb::DB* db; - rocksdb::Status status = rocksdb::DB::OpenForReadOnly(options, path, &db); + rocksdb::Status status = rocksdb::DB::OpenForReadOnly(db_options, path, column_families, &handles, &db); if (!status.ok()) { std::cerr << "Error opening hashes database: " << status.ToString() << std::endl; @@ -253,49 +261,59 @@ void AnalyzeHashes(const std::string& path, std::vector& key_infos, con db->GetEnv()->GetCurrentTime(&curtime).ok(); rocksdb::ReadOptions read_options; - auto iter = db->NewIterator(read_options); // Using an unordered_map to group hash fields by key std::unordered_map> hash_sizes; // key -> (size, ttl) - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - const std::string& key = iter->key().ToString(); - const std::string& value = iter->value().ToString(); + // Read metadata from default column family (handles[0]) + auto meta_iter = db->NewIterator(read_options, handles[0]); + for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) { + rocksdb::Slice key_slice = meta_iter->key(); + rocksdb::Slice value_slice = meta_iter->value(); + std::string key = key_slice.ToString(); - // Check if this is a metadata key or field - if (key.find('m') == 0) { - // This is a metadata key (handle ttl, etc) - std::string hash_key = key.substr(1); // Strip the 'm' prefix - int64_t ttl = -1; - - // Extract ttl from metadata - simplified, real implementation would use ParsedHashesMetaValue - if (value.size() >= 12) { - int64_t ts = *reinterpret_cast(value.data() + value.size() - 12); - if (ts != 0) { - ttl = ts - curtime; - if (ttl <= 0) ttl = -1; - } + int64_t ttl = -1; + + // Parse metadata value to get TTL + if (value_slice.size() >= storage::ParsedBaseMetaValue::kBaseMetaValueSuffixLength) { + storage::ParsedHashesMetaValue parsed_meta(value_slice); + int32_t timestamp = parsed_meta.timestamp(); + if (timestamp > 0) { + int64_t diff = timestamp - curtime; + ttl = diff > 0 ? diff : -1; } + } + + // Initialize with metadata size + hash_sizes[key] = std::make_pair(key_slice.size() + value_slice.size(), ttl); + } + delete meta_iter; + + // Read data fields from data column family (handles[1]) + auto data_iter = db->NewIterator(read_options, handles[1]); + for (data_iter->SeekToFirst(); data_iter->Valid(); data_iter->Next()) { + rocksdb::Slice encoded_key_slice = data_iter->key(); + rocksdb::Slice value_slice = data_iter->value(); + + // Parse the data key to extract the hash key + try { + storage::ParsedHashesDataKey parsed_key(encoded_key_slice); + std::string hash_key = parsed_key.key().ToString(); - // Initialize size to metadata size - hash_sizes[hash_key] = std::make_pair(key.size() + value.size(), ttl); - } else if (key.find('f') == 0) { - // This is a field key - size_t separator = key.find('|'); - if (separator != std::string::npos) { - std::string hash_key = key.substr(1, separator - 1); // Extract the hash key - - // Add field size to hash size - auto it = hash_sizes.find(hash_key); - if (it != hash_sizes.end()) { - it->second.first += key.size() + value.size(); - } else { - // If we encounter a field before metadata, initialize with default ttl - hash_sizes[hash_key] = std::make_pair(key.size() + value.size(), -1); - } + // Add field size to the corresponding hash + auto it = hash_sizes.find(hash_key); + if (it != hash_sizes.end()) { + it->second.first += encoded_key_slice.size() + value_slice.size(); + } else { + // If metadata not found, initialize with default ttl + hash_sizes[hash_key] = std::make_pair(encoded_key_slice.size() + value_slice.size(), -1); } + } catch (...) { + // Skip malformed keys + continue; } } + delete data_iter; // Add hash keys to the result for (const auto& entry : hash_sizes) { @@ -306,7 +324,10 @@ void AnalyzeHashes(const std::string& path, std::vector& key_infos, con } } - delete iter; + // Cleanup + for (auto handle : handles) { + delete handle; + } delete db; } @@ -319,9 +340,15 @@ void AnalyzeSets(const std::string& path, std::vector& key_infos, const std::cout << "Analyzing sets database at " << path << "..." << std::endl; - rocksdb::Options options; + // Open database with column families + rocksdb::DBOptions db_options; + std::vector column_families; + column_families.emplace_back(rocksdb::kDefaultColumnFamilyName, rocksdb::ColumnFamilyOptions()); + column_families.emplace_back("data_cf", rocksdb::ColumnFamilyOptions()); + + std::vector handles; rocksdb::DB* db; - rocksdb::Status status = rocksdb::DB::OpenForReadOnly(options, path, &db); + rocksdb::Status status = rocksdb::DB::OpenForReadOnly(db_options, path, column_families, &handles, &db); if (!status.ok()) { std::cerr << "Error opening sets database: " << status.ToString() << std::endl; @@ -332,49 +359,59 @@ void AnalyzeSets(const std::string& path, std::vector& key_infos, const db->GetEnv()->GetCurrentTime(&curtime).ok(); rocksdb::ReadOptions read_options; - auto iter = db->NewIterator(read_options); // Using an unordered_map to group set members by key std::unordered_map> set_sizes; // key -> (size, ttl) - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - const std::string& key = iter->key().ToString(); - const std::string& value = iter->value().ToString(); + // Read metadata from default column family (handles[0]) + auto meta_iter = db->NewIterator(read_options, handles[0]); + for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) { + rocksdb::Slice key_slice = meta_iter->key(); + rocksdb::Slice value_slice = meta_iter->value(); + std::string key = key_slice.ToString(); - // Check if this is a metadata key or member - if (key.find('m') == 0) { - // This is a metadata key - std::string set_key = key.substr(1); // Strip the 'm' prefix - int64_t ttl = -1; - - // Extract ttl from metadata - simplified, real implementation would use ParsedSetsMetaValue - if (value.size() >= 12) { - int64_t ts = *reinterpret_cast(value.data() + value.size() - 12); - if (ts != 0) { - ttl = ts - curtime; - if (ttl <= 0) ttl = -1; - } + int64_t ttl = -1; + + // Parse metadata value to get TTL + if (value_slice.size() >= storage::ParsedBaseMetaValue::kBaseMetaValueSuffixLength) { + storage::ParsedSetsMetaValue parsed_meta(value_slice); + int32_t timestamp = parsed_meta.timestamp(); + if (timestamp > 0) { + int64_t diff = timestamp - curtime; + ttl = diff > 0 ? diff : -1; } + } + + // Initialize with metadata size + set_sizes[key] = std::make_pair(key_slice.size() + value_slice.size(), ttl); + } + delete meta_iter; + + // Read data members from data column family (handles[1]) + auto data_iter = db->NewIterator(read_options, handles[1]); + for (data_iter->SeekToFirst(); data_iter->Valid(); data_iter->Next()) { + rocksdb::Slice encoded_key_slice = data_iter->key(); + rocksdb::Slice value_slice = data_iter->value(); + + // Parse the data key to extract the set key + try { + storage::ParsedSetsMemberKey parsed_key(encoded_key_slice); + std::string set_key = parsed_key.key().ToString(); - // Initialize size to metadata size - set_sizes[set_key] = std::make_pair(key.size() + value.size(), ttl); - } else if (key.find('s') == 0) { - // This is a member key - size_t separator = key.find('|'); - if (separator != std::string::npos) { - std::string set_key = key.substr(1, separator - 1); // Extract the set key - - // Add member size to set size - auto it = set_sizes.find(set_key); - if (it != set_sizes.end()) { - it->second.first += key.size() + value.size(); - } else { - // If we encounter a member before metadata, initialize with default ttl - set_sizes[set_key] = std::make_pair(key.size() + value.size(), -1); - } + // Add member size to the corresponding set + auto it = set_sizes.find(set_key); + if (it != set_sizes.end()) { + it->second.first += encoded_key_slice.size() + value_slice.size(); + } else { + // If metadata not found, initialize with default ttl + set_sizes[set_key] = std::make_pair(encoded_key_slice.size() + value_slice.size(), -1); } + } catch (...) { + // Skip malformed keys + continue; } } + delete data_iter; // Add set keys to the result for (const auto& entry : set_sizes) { @@ -385,7 +422,10 @@ void AnalyzeSets(const std::string& path, std::vector& key_infos, const } } - delete iter; + // Cleanup + for (auto handle : handles) { + delete handle; + } delete db; } @@ -398,9 +438,16 @@ void AnalyzeZsets(const std::string& path, std::vector& key_infos, cons std::cout << "Analyzing zsets database at " << path << "..." << std::endl; - rocksdb::Options options; + // ZSets use 3 column families: default for meta, data_cf for member data, and score_cf for scores + rocksdb::DBOptions db_options; + std::vector column_families; + column_families.emplace_back(rocksdb::kDefaultColumnFamilyName, rocksdb::ColumnFamilyOptions()); + column_families.emplace_back("data_cf", rocksdb::ColumnFamilyOptions()); + column_families.emplace_back("score_cf", rocksdb::ColumnFamilyOptions()); + + std::vector handles; rocksdb::DB* db; - rocksdb::Status status = rocksdb::DB::OpenForReadOnly(options, path, &db); + rocksdb::Status status = rocksdb::DB::OpenForReadOnly(db_options, path, column_families, &handles, &db); if (!status.ok()) { std::cerr << "Error opening zsets database: " << status.ToString() << std::endl; @@ -411,49 +458,83 @@ void AnalyzeZsets(const std::string& path, std::vector& key_infos, cons db->GetEnv()->GetCurrentTime(&curtime).ok(); rocksdb::ReadOptions read_options; - auto iter = db->NewIterator(read_options); // Using an unordered_map to group zset members by key std::unordered_map> zset_sizes; // key -> (size, ttl) - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - const std::string& key = iter->key().ToString(); - const std::string& value = iter->value().ToString(); + // Read metadata from default column family (handles[0]) + auto meta_iter = db->NewIterator(read_options, handles[0]); + for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) { + rocksdb::Slice key_slice = meta_iter->key(); + rocksdb::Slice value_slice = meta_iter->value(); + std::string key = key_slice.ToString(); - // Check if this is a metadata key, score key or data key - if (key.find('m') == 0) { - // This is a metadata key - std::string zset_key = key.substr(1); // Strip the 'm' prefix - int64_t ttl = -1; - - // Extract ttl from metadata - simplified, real implementation would use ParsedZSetsMetaValue - if (value.size() >= 12) { - int64_t ts = *reinterpret_cast(value.data() + value.size() - 12); - if (ts != 0) { - ttl = ts - curtime; - if (ttl <= 0) ttl = -1; - } + int64_t ttl = -1; + + // Parse metadata value to get TTL + if (value_slice.size() >= storage::ParsedBaseMetaValue::kBaseMetaValueSuffixLength) { + storage::ParsedZSetsMetaValue parsed_meta(value_slice); + int32_t timestamp = parsed_meta.timestamp(); + if (timestamp > 0) { + int64_t diff = timestamp - curtime; + ttl = diff > 0 ? diff : -1; } + } + + // Initialize with metadata size + zset_sizes[key] = std::make_pair(key_slice.size() + value_slice.size(), ttl); + } + delete meta_iter; + + // Read member data from data column family (handles[1]) + auto data_iter = db->NewIterator(read_options, handles[1]); + for (data_iter->SeekToFirst(); data_iter->Valid(); data_iter->Next()) { + rocksdb::Slice encoded_key_slice = data_iter->key(); + rocksdb::Slice value_slice = data_iter->value(); + + // Parse the data key to extract the zset key + try { + storage::ParsedZSetsMemberKey parsed_key(encoded_key_slice); + std::string zset_key = parsed_key.key().ToString(); - // Initialize size to metadata size - zset_sizes[zset_key] = std::make_pair(key.size() + value.size(), ttl); - } else if (key.find('z') == 0 || key.find('s') == 0) { - // This is a score key or data key - size_t separator = key.find('|'); - if (separator != std::string::npos) { - std::string zset_key = key.substr(1, separator - 1); // Extract the zset key - - // Add member size to zset size - auto it = zset_sizes.find(zset_key); - if (it != zset_sizes.end()) { - it->second.first += key.size() + value.size(); - } else { - // If we encounter a member before metadata, initialize with default ttl - zset_sizes[zset_key] = std::make_pair(key.size() + value.size(), -1); - } + // Add member size to the corresponding zset + auto it = zset_sizes.find(zset_key); + if (it != zset_sizes.end()) { + it->second.first += encoded_key_slice.size() + value_slice.size(); + } else { + // If metadata not found, initialize with default ttl + zset_sizes[zset_key] = std::make_pair(encoded_key_slice.size() + value_slice.size(), -1); } + } catch (...) { + // Skip malformed keys + continue; } } + delete data_iter; + + // Read score data from score column family (handles[2]) + auto score_iter = db->NewIterator(read_options, handles[2]); + for (score_iter->SeekToFirst(); score_iter->Valid(); score_iter->Next()) { + rocksdb::Slice encoded_key_slice = score_iter->key(); + rocksdb::Slice value_slice = score_iter->value(); + + // ZSetsScoreKey has the same structure as other data keys + try { + storage::ParsedZSetsMemberKey parsed_key(encoded_key_slice); // We can use the same parser for score keys + std::string zset_key = parsed_key.key().ToString(); + + // Add score entry size to the corresponding zset + auto it = zset_sizes.find(zset_key); + if (it != zset_sizes.end()) { + it->second.first += encoded_key_slice.size() + value_slice.size(); + } + // No else case here because we should have seen the metadata or data entry first + } catch (...) { + // Skip malformed keys + continue; + } + } + delete score_iter; // Add zset keys to the result for (const auto& entry : zset_sizes) { @@ -464,7 +545,10 @@ void AnalyzeZsets(const std::string& path, std::vector& key_infos, cons } } - delete iter; + // Cleanup + for (auto handle : handles) { + delete handle; + } delete db; } @@ -477,9 +561,15 @@ void AnalyzeLists(const std::string& path, std::vector& key_infos, cons std::cout << "Analyzing lists database at " << path << "..." << std::endl; - rocksdb::Options options; + // Open database with column families + rocksdb::DBOptions db_options; + std::vector column_families; + column_families.emplace_back(rocksdb::kDefaultColumnFamilyName, rocksdb::ColumnFamilyOptions()); + column_families.emplace_back("data_cf", rocksdb::ColumnFamilyOptions()); + + std::vector handles; rocksdb::DB* db; - rocksdb::Status status = rocksdb::DB::OpenForReadOnly(options, path, &db); + rocksdb::Status status = rocksdb::DB::OpenForReadOnly(db_options, path, column_families, &handles, &db); if (!status.ok()) { std::cerr << "Error opening lists database: " << status.ToString() << std::endl; @@ -490,49 +580,59 @@ void AnalyzeLists(const std::string& path, std::vector& key_infos, cons db->GetEnv()->GetCurrentTime(&curtime).ok(); rocksdb::ReadOptions read_options; - auto iter = db->NewIterator(read_options); // Using an unordered_map to group list items by key std::unordered_map> list_sizes; // key -> (size, ttl) - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - const std::string& key = iter->key().ToString(); - const std::string& value = iter->value().ToString(); + // Read metadata from default column family (handles[0]) + auto meta_iter = db->NewIterator(read_options, handles[0]); + for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) { + rocksdb::Slice key_slice = meta_iter->key(); + rocksdb::Slice value_slice = meta_iter->value(); + std::string key = key_slice.ToString(); - // Check if this is a metadata key or item key - if (key.find('m') == 0) { - // This is a metadata key - std::string list_key = key.substr(1); // Strip the 'm' prefix - int64_t ttl = -1; - - // Extract ttl from metadata - simplified, real implementation would use ParsedListsMetaValue - if (value.size() >= 12) { - int64_t ts = *reinterpret_cast(value.data() + value.size() - 12); - if (ts != 0) { - ttl = ts - curtime; - if (ttl <= 0) ttl = -1; - } + int64_t ttl = -1; + + // Parse metadata value to get TTL + if (value_slice.size() >= storage::ParsedBaseMetaValue::kBaseMetaValueSuffixLength) { + storage::ParsedBaseMetaValue parsed_meta(value_slice); // Lists use BaseMetaValue too + int32_t timestamp = parsed_meta.timestamp(); + if (timestamp > 0) { + int64_t diff = timestamp - curtime; + ttl = diff > 0 ? diff : -1; } + } + + // Initialize with metadata size + list_sizes[key] = std::make_pair(key_slice.size() + value_slice.size(), ttl); + } + delete meta_iter; + + // Read data items from data column family (handles[1]) + auto data_iter = db->NewIterator(read_options, handles[1]); + for (data_iter->SeekToFirst(); data_iter->Valid(); data_iter->Next()) { + rocksdb::Slice encoded_key_slice = data_iter->key(); + rocksdb::Slice value_slice = data_iter->value(); + + // Parse the data key to extract the list key + try { + storage::ParsedBaseDataKey parsed_key(encoded_key_slice); // Lists use BaseDataKey directly + std::string list_key = parsed_key.key().ToString(); - // Initialize size to metadata size - list_sizes[list_key] = std::make_pair(key.size() + value.size(), ttl); - } else if (key.find('l') == 0) { - // This is an item key - size_t separator = key.find('|'); - if (separator != std::string::npos) { - std::string list_key = key.substr(1, separator - 1); // Extract the list key - - // Add item size to list size - auto it = list_sizes.find(list_key); - if (it != list_sizes.end()) { - it->second.first += key.size() + value.size(); - } else { - // If we encounter an item before metadata, initialize with default ttl - list_sizes[list_key] = std::make_pair(key.size() + value.size(), -1); - } + // Add item size to the corresponding list + auto it = list_sizes.find(list_key); + if (it != list_sizes.end()) { + it->second.first += encoded_key_slice.size() + value_slice.size(); + } else { + // If metadata not found, initialize with default ttl + list_sizes[list_key] = std::make_pair(encoded_key_slice.size() + value_slice.size(), -1); } + } catch (...) { + // Skip malformed keys + continue; } } + delete data_iter; // Add list keys to the result for (const auto& entry : list_sizes) { @@ -543,7 +643,10 @@ void AnalyzeLists(const std::string& path, std::vector& key_infos, cons } } - delete iter; + // Cleanup + for (auto handle : handles) { + delete handle; + } delete db; } From 94f5a1eaa8365262a446c3f5a759e94efd31474d Mon Sep 17 00:00:00 2001 From: chejinge Date: Thu, 25 Dec 2025 19:29:18 +0800 Subject: [PATCH 08/17] fix --- tools/bigkey_analyzer/bigkey_analyzer.cc | 315 ++++++++++++++--------- 1 file changed, 194 insertions(+), 121 deletions(-) diff --git a/tools/bigkey_analyzer/bigkey_analyzer.cc b/tools/bigkey_analyzer/bigkey_analyzer.cc index 7be27e3cc4..91dd983547 100644 --- a/tools/bigkey_analyzer/bigkey_analyzer.cc +++ b/tools/bigkey_analyzer/bigkey_analyzer.cc @@ -274,18 +274,25 @@ void AnalyzeHashes(const std::string& path, std::vector& key_infos, con int64_t ttl = -1; - // Parse metadata value to get TTL + // Parse metadata value to get TTL and check if stale if (value_slice.size() >= storage::ParsedBaseMetaValue::kBaseMetaValueSuffixLength) { storage::ParsedHashesMetaValue parsed_meta(value_slice); + + // Skip stale or empty hashes + if (parsed_meta.IsStale() || parsed_meta.count() == 0) { + continue; + } + int32_t timestamp = parsed_meta.timestamp(); - if (timestamp > 0) { + if (timestamp > 0 && !parsed_meta.IsPermanentSurvival()) { int64_t diff = timestamp - curtime; - ttl = diff > 0 ? diff : -1; + ttl = diff > 0 ? diff : -2; } } - // Initialize with metadata size - hash_sizes[key] = std::make_pair(key_slice.size() + value_slice.size(), ttl); + // Initialize with base metadata size (key + 12 bytes overhead) + int64_t sum = key.size() + 12; + hash_sizes[key] = std::make_pair(sum, ttl); } delete meta_iter; @@ -295,18 +302,22 @@ void AnalyzeHashes(const std::string& path, std::vector& key_infos, con rocksdb::Slice encoded_key_slice = data_iter->key(); rocksdb::Slice value_slice = data_iter->value(); - // Parse the data key to extract the hash key + // Parse the data key to extract the hash key and field try { storage::ParsedHashesDataKey parsed_key(encoded_key_slice); std::string hash_key = parsed_key.key().ToString(); + std::string field = parsed_key.field().ToString(); + + // Calculate field size: 4 (size prefix) + key + 4 (size prefix) + field + value + int64_t field_size = 4 + hash_key.size() + 4 + field.size() + value_slice.size(); // Add field size to the corresponding hash auto it = hash_sizes.find(hash_key); if (it != hash_sizes.end()) { - it->second.first += encoded_key_slice.size() + value_slice.size(); + it->second.first += field_size; } else { - // If metadata not found, initialize with default ttl - hash_sizes[hash_key] = std::make_pair(encoded_key_slice.size() + value_slice.size(), -1); + // If metadata not found, initialize with field size and default ttl + hash_sizes[hash_key] = std::make_pair(hash_key.size() + 12 + field_size, -1); } } catch (...) { // Skip malformed keys @@ -341,14 +352,27 @@ void AnalyzeSets(const std::string& path, std::vector& key_infos, const std::cout << "Analyzing sets database at " << path << "..." << std::endl; // Open database with column families + std::vector column_families; + rocksdb::Options options; + // 先列出所有可用的列族 + rocksdb::Status s = rocksdb::DB::ListColumnFamilies(options, path, &column_families); + + if (!s.ok()) { + std::cerr << "Error listing column families for sets: " << s.ToString() << std::endl; + return; + } + rocksdb::DBOptions db_options; - std::vector column_families; - column_families.emplace_back(rocksdb::kDefaultColumnFamilyName, rocksdb::ColumnFamilyOptions()); - column_families.emplace_back("data_cf", rocksdb::ColumnFamilyOptions()); + std::vector cf_descriptors; + + // 添加所有列族到描述符 + for (const auto& cf_name : column_families) { + cf_descriptors.emplace_back(cf_name, rocksdb::ColumnFamilyOptions()); + } std::vector handles; rocksdb::DB* db; - rocksdb::Status status = rocksdb::DB::OpenForReadOnly(db_options, path, column_families, &handles, &db); + rocksdb::Status status = rocksdb::DB::OpenForReadOnly(db_options, path, cf_descriptors, &handles, &db); if (!status.ok()) { std::cerr << "Error opening sets database: " << status.ToString() << std::endl; @@ -363,55 +387,71 @@ void AnalyzeSets(const std::string& path, std::vector& key_infos, const // Using an unordered_map to group set members by key std::unordered_map> set_sizes; // key -> (size, ttl) - // Read metadata from default column family (handles[0]) - auto meta_iter = db->NewIterator(read_options, handles[0]); - for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) { - rocksdb::Slice key_slice = meta_iter->key(); - rocksdb::Slice value_slice = meta_iter->value(); - std::string key = key_slice.ToString(); - - int64_t ttl = -1; - - // Parse metadata value to get TTL - if (value_slice.size() >= storage::ParsedBaseMetaValue::kBaseMetaValueSuffixLength) { - storage::ParsedSetsMetaValue parsed_meta(value_slice); - int32_t timestamp = parsed_meta.timestamp(); - if (timestamp > 0) { - int64_t diff = timestamp - curtime; - ttl = diff > 0 ? diff : -1; + // 找到default和data_cf的索引 + int default_cf_index = -1; + int data_cf_index = -1; + + for (size_t i = 0; i < column_families.size(); i++) { + if (column_families[i] == "default") { + default_cf_index = i; + } else if (column_families[i] == "data_cf") { + data_cf_index = i; + } + } + + // 处理元数据 (default 列族) + if (default_cf_index != -1) { + auto meta_iter = db->NewIterator(read_options, handles[default_cf_index]); + for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) { + rocksdb::Slice key_slice = meta_iter->key(); + rocksdb::Slice value_slice = meta_iter->value(); + std::string key = key_slice.ToString(); + + int64_t ttl = -1; + + // Parse metadata value to get TTL + if (value_slice.size() >= storage::ParsedBaseMetaValue::kBaseMetaValueSuffixLength) { + storage::ParsedSetsMetaValue parsed_meta(value_slice); + int32_t timestamp = parsed_meta.timestamp(); + if (timestamp > 0) { + int64_t diff = timestamp - curtime; + ttl = diff > 0 ? diff : -1; + } } + + // Initialize with metadata size + set_sizes[key] = std::make_pair(key_slice.size() + value_slice.size(), ttl); } - - // Initialize with metadata size - set_sizes[key] = std::make_pair(key_slice.size() + value_slice.size(), ttl); + delete meta_iter; } - delete meta_iter; - // Read data members from data column family (handles[1]) - auto data_iter = db->NewIterator(read_options, handles[1]); - for (data_iter->SeekToFirst(); data_iter->Valid(); data_iter->Next()) { - rocksdb::Slice encoded_key_slice = data_iter->key(); - rocksdb::Slice value_slice = data_iter->value(); - - // Parse the data key to extract the set key - try { - storage::ParsedSetsMemberKey parsed_key(encoded_key_slice); - std::string set_key = parsed_key.key().ToString(); + // 处理数据 (data_cf 列族) + if (data_cf_index != -1) { + auto data_iter = db->NewIterator(read_options, handles[data_cf_index]); + for (data_iter->SeekToFirst(); data_iter->Valid(); data_iter->Next()) { + rocksdb::Slice encoded_key_slice = data_iter->key(); + rocksdb::Slice value_slice = data_iter->value(); - // Add member size to the corresponding set - auto it = set_sizes.find(set_key); - if (it != set_sizes.end()) { - it->second.first += encoded_key_slice.size() + value_slice.size(); - } else { - // If metadata not found, initialize with default ttl - set_sizes[set_key] = std::make_pair(encoded_key_slice.size() + value_slice.size(), -1); + // Parse the data key to extract the set key + try { + storage::ParsedSetsMemberKey parsed_key(encoded_key_slice); + std::string set_key = parsed_key.key().ToString(); + + // Add member size to the corresponding set + auto it = set_sizes.find(set_key); + if (it != set_sizes.end()) { + it->second.first += encoded_key_slice.size() + value_slice.size(); + } else { + // If metadata not found, initialize with default ttl + set_sizes[set_key] = std::make_pair(encoded_key_slice.size() + value_slice.size(), -1); + } + } catch (...) { + // Skip malformed keys + continue; } - } catch (...) { - // Skip malformed keys - continue; } + delete data_iter; } - delete data_iter; // Add set keys to the result for (const auto& entry : set_sizes) { @@ -438,16 +478,28 @@ void AnalyzeZsets(const std::string& path, std::vector& key_infos, cons std::cout << "Analyzing zsets database at " << path << "..." << std::endl; - // ZSets use 3 column families: default for meta, data_cf for member data, and score_cf for scores + // Open database with column families + std::vector column_families; + rocksdb::Options options; + // 先列出所有可用的列族 + rocksdb::Status s = rocksdb::DB::ListColumnFamilies(options, path, &column_families); + + if (!s.ok()) { + std::cerr << "Error listing column families for zsets: " << s.ToString() << std::endl; + return; + } + rocksdb::DBOptions db_options; - std::vector column_families; - column_families.emplace_back(rocksdb::kDefaultColumnFamilyName, rocksdb::ColumnFamilyOptions()); - column_families.emplace_back("data_cf", rocksdb::ColumnFamilyOptions()); - column_families.emplace_back("score_cf", rocksdb::ColumnFamilyOptions()); + std::vector cf_descriptors; + + // 添加所有列族到描述符 + for (const auto& cf_name : column_families) { + cf_descriptors.emplace_back(cf_name, rocksdb::ColumnFamilyOptions()); + } std::vector handles; rocksdb::DB* db; - rocksdb::Status status = rocksdb::DB::OpenForReadOnly(db_options, path, column_families, &handles, &db); + rocksdb::Status status = rocksdb::DB::OpenForReadOnly(db_options, path, cf_descriptors, &handles, &db); if (!status.ok()) { std::cerr << "Error opening zsets database: " << status.ToString() << std::endl; @@ -462,79 +514,100 @@ void AnalyzeZsets(const std::string& path, std::vector& key_infos, cons // Using an unordered_map to group zset members by key std::unordered_map> zset_sizes; // key -> (size, ttl) - // Read metadata from default column family (handles[0]) - auto meta_iter = db->NewIterator(read_options, handles[0]); - for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) { - rocksdb::Slice key_slice = meta_iter->key(); - rocksdb::Slice value_slice = meta_iter->value(); - std::string key = key_slice.ToString(); - - int64_t ttl = -1; - - // Parse metadata value to get TTL - if (value_slice.size() >= storage::ParsedBaseMetaValue::kBaseMetaValueSuffixLength) { - storage::ParsedZSetsMetaValue parsed_meta(value_slice); - int32_t timestamp = parsed_meta.timestamp(); - if (timestamp > 0) { - int64_t diff = timestamp - curtime; - ttl = diff > 0 ? diff : -1; + // 找到default、data_cf和score_cf的索引 + int default_cf_index = -1; + int data_cf_index = -1; + int score_cf_index = -1; + + for (size_t i = 0; i < column_families.size(); i++) { + if (column_families[i] == "default") { + default_cf_index = i; + } else if (column_families[i] == "data_cf") { + data_cf_index = i; + } else if (column_families[i] == "score_cf") { + score_cf_index = i; + } + } + + // 处理元数据 (default 列族) + if (default_cf_index != -1) { + auto meta_iter = db->NewIterator(read_options, handles[default_cf_index]); + for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) { + rocksdb::Slice key_slice = meta_iter->key(); + rocksdb::Slice value_slice = meta_iter->value(); + std::string key = key_slice.ToString(); + + int64_t ttl = -1; + + // Parse metadata value to get TTL + if (value_slice.size() >= storage::ParsedBaseMetaValue::kBaseMetaValueSuffixLength) { + storage::ParsedZSetsMetaValue parsed_meta(value_slice); + int32_t timestamp = parsed_meta.timestamp(); + if (timestamp > 0) { + int64_t diff = timestamp - curtime; + ttl = diff > 0 ? diff : -1; + } } + + // Initialize with metadata size + zset_sizes[key] = std::make_pair(key_slice.size() + value_slice.size(), ttl); } - - // Initialize with metadata size - zset_sizes[key] = std::make_pair(key_slice.size() + value_slice.size(), ttl); + delete meta_iter; } - delete meta_iter; - // Read member data from data column family (handles[1]) - auto data_iter = db->NewIterator(read_options, handles[1]); - for (data_iter->SeekToFirst(); data_iter->Valid(); data_iter->Next()) { - rocksdb::Slice encoded_key_slice = data_iter->key(); - rocksdb::Slice value_slice = data_iter->value(); - - // Parse the data key to extract the zset key - try { - storage::ParsedZSetsMemberKey parsed_key(encoded_key_slice); - std::string zset_key = parsed_key.key().ToString(); + // 处理成员数据 (data_cf 列族) + if (data_cf_index != -1) { + auto data_iter = db->NewIterator(read_options, handles[data_cf_index]); + for (data_iter->SeekToFirst(); data_iter->Valid(); data_iter->Next()) { + rocksdb::Slice encoded_key_slice = data_iter->key(); + rocksdb::Slice value_slice = data_iter->value(); - // Add member size to the corresponding zset - auto it = zset_sizes.find(zset_key); - if (it != zset_sizes.end()) { - it->second.first += encoded_key_slice.size() + value_slice.size(); - } else { - // If metadata not found, initialize with default ttl - zset_sizes[zset_key] = std::make_pair(encoded_key_slice.size() + value_slice.size(), -1); + // Parse the data key to extract the zset key + try { + storage::ParsedZSetsMemberKey parsed_key(encoded_key_slice); + std::string zset_key = parsed_key.key().ToString(); + + // Add member size to the corresponding zset + auto it = zset_sizes.find(zset_key); + if (it != zset_sizes.end()) { + it->second.first += encoded_key_slice.size() + value_slice.size(); + } else { + // If metadata not found, initialize with default ttl + zset_sizes[zset_key] = std::make_pair(encoded_key_slice.size() + value_slice.size(), -1); + } + } catch (...) { + // Skip malformed keys + continue; } - } catch (...) { - // Skip malformed keys - continue; } + delete data_iter; } - delete data_iter; - // Read score data from score column family (handles[2]) - auto score_iter = db->NewIterator(read_options, handles[2]); - for (score_iter->SeekToFirst(); score_iter->Valid(); score_iter->Next()) { - rocksdb::Slice encoded_key_slice = score_iter->key(); - rocksdb::Slice value_slice = score_iter->value(); - - // ZSetsScoreKey has the same structure as other data keys - try { - storage::ParsedZSetsMemberKey parsed_key(encoded_key_slice); // We can use the same parser for score keys - std::string zset_key = parsed_key.key().ToString(); + // 处理分数数据 (score_cf 列族) + if (score_cf_index != -1) { + auto score_iter = db->NewIterator(read_options, handles[score_cf_index]); + for (score_iter->SeekToFirst(); score_iter->Valid(); score_iter->Next()) { + rocksdb::Slice encoded_key_slice = score_iter->key(); + rocksdb::Slice value_slice = score_iter->value(); - // Add score entry size to the corresponding zset - auto it = zset_sizes.find(zset_key); - if (it != zset_sizes.end()) { - it->second.first += encoded_key_slice.size() + value_slice.size(); - } - // No else case here because we should have seen the metadata or data entry first - } catch (...) { - // Skip malformed keys - continue; + // ZSetsScoreKey has the same structure as other data keys + try { + storage::ParsedZSetsMemberKey parsed_key(encoded_key_slice); // We can use the same parser for score keys + std::string zset_key = parsed_key.key().ToString(); + + // Add score entry size to the corresponding zset + auto it = zset_sizes.find(zset_key); + if (it != zset_sizes.end()) { + it->second.first += encoded_key_slice.size() + value_slice.size(); + } + // No else case here because we should have seen the metadata or data entry first + } catch (...) { + // Skip malformed keys + continue; + } } + delete score_iter; } - delete score_iter; // Add zset keys to the result for (const auto& entry : zset_sizes) { From 08bcb3f98fe811cafa889b5320a06178050632e1 Mon Sep 17 00:00:00 2001 From: chejinge Date: Thu, 25 Dec 2025 19:54:49 +0800 Subject: [PATCH 09/17] fix --- tools/bigkey_analyzer/bigkey_analyzer.cc | 334 ++++++++++------------- 1 file changed, 141 insertions(+), 193 deletions(-) diff --git a/tools/bigkey_analyzer/bigkey_analyzer.cc b/tools/bigkey_analyzer/bigkey_analyzer.cc index 91dd983547..377c0b054d 100644 --- a/tools/bigkey_analyzer/bigkey_analyzer.cc +++ b/tools/bigkey_analyzer/bigkey_analyzer.cc @@ -351,122 +351,65 @@ void AnalyzeSets(const std::string& path, std::vector& key_infos, const std::cout << "Analyzing sets database at " << path << "..." << std::endl; - // Open database with column families - std::vector column_families; - rocksdb::Options options; - // 先列出所有可用的列族 - rocksdb::Status s = rocksdb::DB::ListColumnFamilies(options, path, &column_families); + // 初始化存储选项 + storage::StorageOptions storage_options; + storage::Storage storage; + rocksdb::Status s = storage.Open(storage_options, path); if (!s.ok()) { - std::cerr << "Error listing column families for sets: " << s.ToString() << std::endl; - return; - } - - rocksdb::DBOptions db_options; - std::vector cf_descriptors; - - // 添加所有列族到描述符 - for (const auto& cf_name : column_families) { - cf_descriptors.emplace_back(cf_name, rocksdb::ColumnFamilyOptions()); - } - - std::vector handles; - rocksdb::DB* db; - rocksdb::Status status = rocksdb::DB::OpenForReadOnly(db_options, path, cf_descriptors, &handles, &db); - - if (!status.ok()) { - std::cerr << "Error opening sets database: " << status.ToString() << std::endl; + std::cerr << "Error opening sets database: " << s.ToString() << std::endl; return; } - - int64_t curtime; - db->GetEnv()->GetCurrentTime(&curtime).ok(); - - rocksdb::ReadOptions read_options; - - // Using an unordered_map to group set members by key - std::unordered_map> set_sizes; // key -> (size, ttl) - - // 找到default和data_cf的索引 - int default_cf_index = -1; - int data_cf_index = -1; - - for (size_t i = 0; i < column_families.size(); i++) { - if (column_families[i] == "default") { - default_cf_index = i; - } else if (column_families[i] == "data_cf") { - data_cf_index = i; + + // 使用Scan API遍历所有set keys + std::string start_key; + const std::string pattern("*"); + const int64_t count = 1000; + std::string next_key; + bool scan_finished = false; + + while (!scan_finished) { + std::vector keys; + s = storage.Scanx(storage::DataType::kSets, start_key, pattern, count, &keys, &next_key); + if (!s.ok()) { + std::cerr << "Error scanning sets: " << s.ToString() << std::endl; + break; } - } - - // 处理元数据 (default 列族) - if (default_cf_index != -1) { - auto meta_iter = db->NewIterator(read_options, handles[default_cf_index]); - for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) { - rocksdb::Slice key_slice = meta_iter->key(); - rocksdb::Slice value_slice = meta_iter->value(); - std::string key = key_slice.ToString(); + + // 如果next_key为空,或者没有找到更多键,则结束扫描 + if (next_key.empty() || keys.empty()) { + scan_finished = true; + } + + start_key = next_key; + + // 处理每个集合键 + for (const auto& key : keys) { + int64_t sum = 0; + sum = sum + key.size() + 12; // 基础元数据大小 + // 获取set的所有成员 + std::vector members; int64_t ttl = -1; - // Parse metadata value to get TTL - if (value_slice.size() >= storage::ParsedBaseMetaValue::kBaseMetaValueSuffixLength) { - storage::ParsedSetsMetaValue parsed_meta(value_slice); - int32_t timestamp = parsed_meta.timestamp(); - if (timestamp > 0) { - int64_t diff = timestamp - curtime; - ttl = diff > 0 ? diff : -1; - } + s = storage.SMembersWithTTL(key, &members, &ttl); + if (!s.ok()) { + continue; } - // Initialize with metadata size - set_sizes[key] = std::make_pair(key_slice.size() + value_slice.size(), ttl); - } - delete meta_iter; - } - - // 处理数据 (data_cf 列族) - if (data_cf_index != -1) { - auto data_iter = db->NewIterator(read_options, handles[data_cf_index]); - for (data_iter->SeekToFirst(); data_iter->Valid(); data_iter->Next()) { - rocksdb::Slice encoded_key_slice = data_iter->key(); - rocksdb::Slice value_slice = data_iter->value(); + // 计算每个成员的大小并加总 + for (const auto& member : members) { + sum = sum + 4 + key.size() + 4 + member.size(); + } - // Parse the data key to extract the set key - try { - storage::ParsedSetsMemberKey parsed_key(encoded_key_slice); - std::string set_key = parsed_key.key().ToString(); - - // Add member size to the corresponding set - auto it = set_sizes.find(set_key); - if (it != set_sizes.end()) { - it->second.first += encoded_key_slice.size() + value_slice.size(); - } else { - // If metadata not found, initialize with default ttl - set_sizes[set_key] = std::make_pair(encoded_key_slice.size() + value_slice.size(), -1); - } - } catch (...) { - // Skip malformed keys - continue; + // 如果key大小超过阈值,添加到结果集 + if (sum >= config.min_size) { + std::string display_key = ReplaceAll(key, "\n", "\\n"); + display_key = ReplaceAll(display_key, " ", "\\x20"); + key_infos.emplace_back("set", display_key, sum, ttl); } } - delete data_iter; - } - - // Add set keys to the result - for (const auto& entry : set_sizes) { - if (entry.second.first >= config.min_size) { - std::string display_key = ReplaceAll(entry.first, "\n", "\\n"); - display_key = ReplaceAll(display_key, " ", "\\x20"); - key_infos.emplace_back("set", display_key, entry.second.first, entry.second.second); - } } - - // Cleanup - for (auto handle : handles) { - delete handle; - } - delete db; } // Analyze zsets database @@ -542,15 +485,22 @@ void AnalyzeZsets(const std::string& path, std::vector& key_infos, cons // Parse metadata value to get TTL if (value_slice.size() >= storage::ParsedBaseMetaValue::kBaseMetaValueSuffixLength) { storage::ParsedZSetsMetaValue parsed_meta(value_slice); + + // Skip stale or empty zsets + if (parsed_meta.IsStale() || parsed_meta.count() == 0) { + continue; + } + int32_t timestamp = parsed_meta.timestamp(); - if (timestamp > 0) { + if (timestamp > 0 && !parsed_meta.IsPermanentSurvival()) { int64_t diff = timestamp - curtime; - ttl = diff > 0 ? diff : -1; + ttl = diff > 0 ? diff : -2; } } - // Initialize with metadata size - zset_sizes[key] = std::make_pair(key_slice.size() + value_slice.size(), ttl); + // Initialize with base metadata size (key + 12 bytes overhead) + int64_t sum = key.size() + 12; + zset_sizes[key] = std::make_pair(sum, ttl); } delete meta_iter; } @@ -562,18 +512,22 @@ void AnalyzeZsets(const std::string& path, std::vector& key_infos, cons rocksdb::Slice encoded_key_slice = data_iter->key(); rocksdb::Slice value_slice = data_iter->value(); - // Parse the data key to extract the zset key + // Parse the data key to extract the zset key and member try { storage::ParsedZSetsMemberKey parsed_key(encoded_key_slice); std::string zset_key = parsed_key.key().ToString(); + std::string member = parsed_key.member().ToString(); + + // Calculate member size: 4 + key + 4 + member + value (score) + int64_t member_size = 4 + zset_key.size() + 4 + member.size() + value_slice.size(); // Add member size to the corresponding zset auto it = zset_sizes.find(zset_key); if (it != zset_sizes.end()) { - it->second.first += encoded_key_slice.size() + value_slice.size(); + it->second.first += member_size; } else { - // If metadata not found, initialize with default ttl - zset_sizes[zset_key] = std::make_pair(encoded_key_slice.size() + value_slice.size(), -1); + // If metadata not found, initialize with member size and default ttl + zset_sizes[zset_key] = std::make_pair(zset_key.size() + 12 + member_size, -1); } } catch (...) { // Skip malformed keys @@ -592,17 +546,36 @@ void AnalyzeZsets(const std::string& path, std::vector& key_infos, cons // ZSetsScoreKey has the same structure as other data keys try { - storage::ParsedZSetsMemberKey parsed_key(encoded_key_slice); // We can use the same parser for score keys - std::string zset_key = parsed_key.key().ToString(); + // 这里是分数数据,需要计算额外的开销 + std::string zset_key; + std::string member; + + // 尝试解析,这里可能不是完全准确 + // 但我们至少要计算正确的大小 + try { + storage::ParsedZSetsMemberKey parsed_key(encoded_key_slice); + zset_key = parsed_key.key().ToString(); + member = parsed_key.member().ToString(); + } catch (...) { + // 如果上面的解析失败,使用通用方法提取 + size_t pos = encoded_key_slice.ToString().find_first_of('\0'); + if (pos != std::string::npos && pos > 0) { + zset_key = encoded_key_slice.ToString().substr(0, pos); + } else { + continue; // 跳过无法解析的键 + } + } - // Add score entry size to the corresponding zset + // 计算score entry大小:4 + key + 8 (score) + 4 + member + int64_t score_size = 4 + zset_key.size() + 8 + 4 + (member.empty() ? 8 : member.size()); + + // 添加分数条目大小到对应的zset auto it = zset_sizes.find(zset_key); if (it != zset_sizes.end()) { - it->second.first += encoded_key_slice.size() + value_slice.size(); - } - // No else case here because we should have seen the metadata or data entry first + it->second.first += score_size; + } } catch (...) { - // Skip malformed keys + // 跳过畸形键 continue; } } @@ -634,93 +607,68 @@ void AnalyzeLists(const std::string& path, std::vector& key_infos, cons std::cout << "Analyzing lists database at " << path << "..." << std::endl; - // Open database with column families - rocksdb::DBOptions db_options; - std::vector column_families; - column_families.emplace_back(rocksdb::kDefaultColumnFamilyName, rocksdb::ColumnFamilyOptions()); - column_families.emplace_back("data_cf", rocksdb::ColumnFamilyOptions()); - - std::vector handles; - rocksdb::DB* db; - rocksdb::Status status = rocksdb::DB::OpenForReadOnly(db_options, path, column_families, &handles, &db); + // 初始化存储选项 + storage::StorageOptions storage_options; + storage::Storage storage; + rocksdb::Status s = storage.Open(storage_options, path); - if (!status.ok()) { - std::cerr << "Error opening lists database: " << status.ToString() << std::endl; + if (!s.ok()) { + std::cerr << "Error opening lists database: " << s.ToString() << std::endl; return; } - - int64_t curtime; - db->GetEnv()->GetCurrentTime(&curtime).ok(); - - rocksdb::ReadOptions read_options; - - // Using an unordered_map to group list items by key - std::unordered_map> list_sizes; // key -> (size, ttl) - - // Read metadata from default column family (handles[0]) - auto meta_iter = db->NewIterator(read_options, handles[0]); - for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) { - rocksdb::Slice key_slice = meta_iter->key(); - rocksdb::Slice value_slice = meta_iter->value(); - std::string key = key_slice.ToString(); - - int64_t ttl = -1; + + // 使用Scan API遍历所有list keys + std::string start_key; + const std::string pattern("*"); + const int64_t count = 1000; + const int64_t batch_count = 1000; + std::string next_key; + bool scan_finished = false; + + while (!scan_finished) { + std::vector keys; + s = storage.Scanx(storage::DataType::kLists, start_key, pattern, count, &keys, &next_key); + if (!s.ok()) { + std::cerr << "Error scanning lists: " << s.ToString() << std::endl; + break; + } - // Parse metadata value to get TTL - if (value_slice.size() >= storage::ParsedBaseMetaValue::kBaseMetaValueSuffixLength) { - storage::ParsedBaseMetaValue parsed_meta(value_slice); // Lists use BaseMetaValue too - int32_t timestamp = parsed_meta.timestamp(); - if (timestamp > 0) { - int64_t diff = timestamp - curtime; - ttl = diff > 0 ? diff : -1; - } + // 如果next_key为空,或者没有找到更多键,则结束扫描 + if (next_key.empty() || keys.empty()) { + scan_finished = true; } - // Initialize with metadata size - list_sizes[key] = std::make_pair(key_slice.size() + value_slice.size(), ttl); - } - delete meta_iter; - - // Read data items from data column family (handles[1]) - auto data_iter = db->NewIterator(read_options, handles[1]); - for (data_iter->SeekToFirst(); data_iter->Valid(); data_iter->Next()) { - rocksdb::Slice encoded_key_slice = data_iter->key(); - rocksdb::Slice value_slice = data_iter->value(); + start_key = next_key; - // Parse the data key to extract the list key - try { - storage::ParsedBaseDataKey parsed_key(encoded_key_slice); // Lists use BaseDataKey directly - std::string list_key = parsed_key.key().ToString(); + // 处理每个列表键 + for (const auto& key : keys) { + int64_t sum = 0; + sum = sum + key.size() + 12 + 16; // 基础元数据大小 - // Add item size to the corresponding list - auto it = list_sizes.find(list_key); - if (it != list_sizes.end()) { - it->second.first += encoded_key_slice.size() + value_slice.size(); - } else { - // If metadata not found, initialize with default ttl - list_sizes[list_key] = std::make_pair(encoded_key_slice.size() + value_slice.size(), -1); + // 获取list的所有元素,分批获取 + int64_t pos = 0; + std::vector list_items; + int64_t ttl = -1; + + s = storage.LRangeWithTTL(key, pos, pos + batch_count - 1, &list_items, &ttl); + while (s.ok() && !list_items.empty()) { + for (const auto& item : list_items) { + sum = sum + 4 + key.size() + 4 + 8 + item.size(); + } + + pos += batch_count; + list_items.clear(); + s = storage.LRange(key, pos, pos + batch_count - 1, &list_items); + } + + // 如果key大小超过阈值,添加到结果集 + if (sum >= config.min_size) { + std::string display_key = ReplaceAll(key, "\n", "\\n"); + display_key = ReplaceAll(display_key, " ", "\\x20"); + key_infos.emplace_back("list", display_key, sum, ttl); } - } catch (...) { - // Skip malformed keys - continue; - } - } - delete data_iter; - - // Add list keys to the result - for (const auto& entry : list_sizes) { - if (entry.second.first >= config.min_size) { - std::string display_key = ReplaceAll(entry.first, "\n", "\\n"); - display_key = ReplaceAll(display_key, " ", "\\x20"); - key_infos.emplace_back("list", display_key, entry.second.first, entry.second.second); } } - - // Cleanup - for (auto handle : handles) { - delete handle; - } - delete db; } // Get the prefix of a key From ef52284cef23264e4f4c7fbfcb3c494600adb342 Mon Sep 17 00:00:00 2001 From: chejinge Date: Thu, 25 Dec 2025 20:00:42 +0800 Subject: [PATCH 10/17] fix --- tools/bigkey_analyzer/bigkey_analyzer.cc | 264 +++++++++++++++-------- 1 file changed, 169 insertions(+), 95 deletions(-) diff --git a/tools/bigkey_analyzer/bigkey_analyzer.cc b/tools/bigkey_analyzer/bigkey_analyzer.cc index 377c0b054d..95f4bafb21 100644 --- a/tools/bigkey_analyzer/bigkey_analyzer.cc +++ b/tools/bigkey_analyzer/bigkey_analyzer.cc @@ -351,65 +351,104 @@ void AnalyzeSets(const std::string& path, std::vector& key_infos, const std::cout << "Analyzing sets database at " << path << "..." << std::endl; - // 初始化存储选项 - storage::StorageOptions storage_options; - storage::Storage storage; - rocksdb::Status s = storage.Open(storage_options, path); + // Open database with column families + rocksdb::DBOptions db_options; + std::vector column_families; + column_families.emplace_back(rocksdb::kDefaultColumnFamilyName, rocksdb::ColumnFamilyOptions()); + column_families.emplace_back("data_cf", rocksdb::ColumnFamilyOptions()); - if (!s.ok()) { - std::cerr << "Error opening sets database: " << s.ToString() << std::endl; + std::vector handles; + rocksdb::DB* db; + rocksdb::Status status = rocksdb::DB::OpenForReadOnly(db_options, path, column_families, &handles, &db); + + if (!status.ok()) { + std::cerr << "Error opening sets database: " << status.ToString() << std::endl; return; } - - // 使用Scan API遍历所有set keys - std::string start_key; - const std::string pattern("*"); - const int64_t count = 1000; - std::string next_key; - bool scan_finished = false; - - while (!scan_finished) { - std::vector keys; - s = storage.Scanx(storage::DataType::kSets, start_key, pattern, count, &keys, &next_key); - if (!s.ok()) { - std::cerr << "Error scanning sets: " << s.ToString() << std::endl; - break; - } - - // 如果next_key为空,或者没有找到更多键,则结束扫描 - if (next_key.empty() || keys.empty()) { - scan_finished = true; - } + + int64_t curtime; + db->GetEnv()->GetCurrentTime(&curtime).ok(); + + rocksdb::ReadOptions read_options; + + // Using an unordered_map to group set members by key + std::unordered_map> set_sizes; // key -> (size, ttl) + + // Read metadata from default column family (handles[0]) + auto meta_iter = db->NewIterator(read_options, handles[0]); + for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) { + rocksdb::Slice key_slice = meta_iter->key(); + rocksdb::Slice value_slice = meta_iter->value(); + std::string key = key_slice.ToString(); - start_key = next_key; + int64_t ttl = -1; - // 处理每个集合键 - for (const auto& key : keys) { - int64_t sum = 0; - sum = sum + key.size() + 12; // 基础元数据大小 - - // 获取set的所有成员 - std::vector members; - int64_t ttl = -1; + // Parse metadata value to get TTL + if (value_slice.size() >= storage::ParsedBaseMetaValue::kBaseMetaValueSuffixLength) { + storage::ParsedSetsMetaValue parsed_meta(value_slice); - s = storage.SMembersWithTTL(key, &members, &ttl); - if (!s.ok()) { + // Skip stale or empty sets + if (parsed_meta.IsStale() || parsed_meta.count() == 0) { continue; } - // 计算每个成员的大小并加总 - for (const auto& member : members) { - sum = sum + 4 + key.size() + 4 + member.size(); + int32_t timestamp = parsed_meta.timestamp(); + if (timestamp > 0 && !parsed_meta.IsPermanentSurvival()) { + int64_t diff = timestamp - curtime; + ttl = diff > 0 ? diff : -2; } + } + + // Initialize with base metadata size (key + 12 bytes overhead) + int64_t sum = key.size() + 12; + set_sizes[key] = std::make_pair(sum, ttl); + } + delete meta_iter; + + // Read data members from data column family (handles[1]) + auto data_iter = db->NewIterator(read_options, handles[1]); + for (data_iter->SeekToFirst(); data_iter->Valid(); data_iter->Next()) { + rocksdb::Slice encoded_key_slice = data_iter->key(); + rocksdb::Slice value_slice = data_iter->value(); + + // Parse the data key to extract the set key and member + try { + storage::ParsedSetsMemberKey parsed_key(encoded_key_slice); + std::string set_key = parsed_key.key().ToString(); + std::string member = parsed_key.member().ToString(); - // 如果key大小超过阈值,添加到结果集 - if (sum >= config.min_size) { - std::string display_key = ReplaceAll(key, "\n", "\\n"); - display_key = ReplaceAll(display_key, " ", "\\x20"); - key_infos.emplace_back("set", display_key, sum, ttl); + // Calculate member size: 4 (size prefix) + key + 4 (size prefix) + member + int64_t member_size = 4 + set_key.size() + 4 + member.size(); + + // Add member size to the corresponding set + auto it = set_sizes.find(set_key); + if (it != set_sizes.end()) { + it->second.first += member_size; + } else { + // If metadata not found, initialize with member size and default ttl + set_sizes[set_key] = std::make_pair(set_key.size() + 12 + member_size, -1); } + } catch (...) { + // Skip malformed keys + continue; } } + delete data_iter; + + // Add set keys to the result + for (const auto& entry : set_sizes) { + if (entry.second.first >= config.min_size) { + std::string display_key = ReplaceAll(entry.first, "\n", "\\n"); + display_key = ReplaceAll(display_key, " ", "\\x20"); + key_infos.emplace_back("set", display_key, entry.second.first, entry.second.second); + } + } + + // Cleanup + for (auto handle : handles) { + delete handle; + } + delete db; } // Analyze zsets database @@ -607,68 +646,103 @@ void AnalyzeLists(const std::string& path, std::vector& key_infos, cons std::cout << "Analyzing lists database at " << path << "..." << std::endl; - // 初始化存储选项 - storage::StorageOptions storage_options; - storage::Storage storage; - rocksdb::Status s = storage.Open(storage_options, path); + // Open database with column families + rocksdb::DBOptions db_options; + std::vector column_families; + column_families.emplace_back(rocksdb::kDefaultColumnFamilyName, rocksdb::ColumnFamilyOptions()); + column_families.emplace_back("data_cf", rocksdb::ColumnFamilyOptions()); - if (!s.ok()) { - std::cerr << "Error opening lists database: " << s.ToString() << std::endl; + std::vector handles; + rocksdb::DB* db; + rocksdb::Status status = rocksdb::DB::OpenForReadOnly(db_options, path, column_families, &handles, &db); + + if (!status.ok()) { + std::cerr << "Error opening lists database: " << status.ToString() << std::endl; return; } - - // 使用Scan API遍历所有list keys - std::string start_key; - const std::string pattern("*"); - const int64_t count = 1000; - const int64_t batch_count = 1000; - std::string next_key; - bool scan_finished = false; - - while (!scan_finished) { - std::vector keys; - s = storage.Scanx(storage::DataType::kLists, start_key, pattern, count, &keys, &next_key); - if (!s.ok()) { - std::cerr << "Error scanning lists: " << s.ToString() << std::endl; - break; - } - - // 如果next_key为空,或者没有找到更多键,则结束扫描 - if (next_key.empty() || keys.empty()) { - scan_finished = true; - } + + int64_t curtime; + db->GetEnv()->GetCurrentTime(&curtime).ok(); + + rocksdb::ReadOptions read_options; + + // Using an unordered_map to group list items by key + std::unordered_map> list_sizes; // key -> (size, ttl) + + // Read metadata from default column family (handles[0]) + auto meta_iter = db->NewIterator(read_options, handles[0]); + for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) { + rocksdb::Slice key_slice = meta_iter->key(); + rocksdb::Slice value_slice = meta_iter->value(); + std::string key = key_slice.ToString(); - start_key = next_key; + int64_t ttl = -1; - // 处理每个列表键 - for (const auto& key : keys) { - int64_t sum = 0; - sum = sum + key.size() + 12 + 16; // 基础元数据大小 + // Parse metadata value to get TTL + if (value_slice.size() >= storage::ParsedBaseMetaValue::kBaseMetaValueSuffixLength) { + storage::ParsedListsMetaValue parsed_meta(value_slice); - // 获取list的所有元素,分批获取 - int64_t pos = 0; - std::vector list_items; - int64_t ttl = -1; + // Skip stale or empty lists + if (parsed_meta.IsStale() || parsed_meta.count() == 0) { + continue; + } - s = storage.LRangeWithTTL(key, pos, pos + batch_count - 1, &list_items, &ttl); - while (s.ok() && !list_items.empty()) { - for (const auto& item : list_items) { - sum = sum + 4 + key.size() + 4 + 8 + item.size(); - } - - pos += batch_count; - list_items.clear(); - s = storage.LRange(key, pos, pos + batch_count - 1, &list_items); + int32_t timestamp = parsed_meta.timestamp(); + if (timestamp > 0 && !parsed_meta.IsPermanentSurvival()) { + int64_t diff = timestamp - curtime; + ttl = diff > 0 ? diff : -2; } + } + + // Initialize with base metadata size (key + 12 + 16 bytes overhead) + int64_t sum = key.size() + 12 + 16; + list_sizes[key] = std::make_pair(sum, ttl); + } + delete meta_iter; + + // Read data items from data column family (handles[1]) + auto data_iter = db->NewIterator(read_options, handles[1]); + for (data_iter->SeekToFirst(); data_iter->Valid(); data_iter->Next()) { + rocksdb::Slice encoded_key_slice = data_iter->key(); + rocksdb::Slice value_slice = data_iter->value(); + + // Parse the data key to extract the list key + try { + storage::ParsedBaseDataKey parsed_key(encoded_key_slice); // Lists use BaseDataKey directly + std::string list_key = parsed_key.key().ToString(); + + // Calculate element size: 4 + key + 4 + 8 (index) + element + int64_t element_size = 4 + list_key.size() + 4 + 8 + value_slice.size(); - // 如果key大小超过阈值,添加到结果集 - if (sum >= config.min_size) { - std::string display_key = ReplaceAll(key, "\n", "\\n"); - display_key = ReplaceAll(display_key, " ", "\\x20"); - key_infos.emplace_back("list", display_key, sum, ttl); + // Add element size to the corresponding list + auto it = list_sizes.find(list_key); + if (it != list_sizes.end()) { + it->second.first += element_size; + } else { + // If metadata not found, initialize with element size and default ttl + list_sizes[list_key] = std::make_pair(list_key.size() + 12 + 16 + element_size, -1); } + } catch (...) { + // Skip malformed keys + continue; } } + delete data_iter; + + // Add list keys to the result + for (const auto& entry : list_sizes) { + if (entry.second.first >= config.min_size) { + std::string display_key = ReplaceAll(entry.first, "\n", "\\n"); + display_key = ReplaceAll(display_key, " ", "\\x20"); + key_infos.emplace_back("list", display_key, entry.second.first, entry.second.second); + } + } + + // Cleanup + for (auto handle : handles) { + delete handle; + } + delete db; } // Get the prefix of a key From 1e6252c164156d7f44ef86196c50fae4372ec468 Mon Sep 17 00:00:00 2001 From: chejinge Date: Thu, 25 Dec 2025 20:04:54 +0800 Subject: [PATCH 11/17] fix --- tools/bigkey_analyzer/bigkey_analyzer.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/bigkey_analyzer/bigkey_analyzer.cc b/tools/bigkey_analyzer/bigkey_analyzer.cc index 95f4bafb21..7c911ba90b 100644 --- a/tools/bigkey_analyzer/bigkey_analyzer.cc +++ b/tools/bigkey_analyzer/bigkey_analyzer.cc @@ -22,6 +22,7 @@ #include "rocksdb/status.h" #include "storage/src/base_data_key_format.h" #include "storage/src/base_meta_value_format.h" +#include "storage/src/lists_meta_value_format.h" // Utility function to check if a directory exists bool DirectoryExists(const std::string& path) { @@ -682,13 +683,13 @@ void AnalyzeLists(const std::string& path, std::vector& key_infos, cons if (value_slice.size() >= storage::ParsedBaseMetaValue::kBaseMetaValueSuffixLength) { storage::ParsedListsMetaValue parsed_meta(value_slice); - // Skip stale or empty lists - if (parsed_meta.IsStale() || parsed_meta.count() == 0) { + // Skip empty lists + if (parsed_meta.count() == 0) { continue; } int32_t timestamp = parsed_meta.timestamp(); - if (timestamp > 0 && !parsed_meta.IsPermanentSurvival()) { + if (timestamp > 0) { int64_t diff = timestamp - curtime; ttl = diff > 0 ? diff : -2; } From 8e03b119ab73ab3f01d894936ea36954fdd122f8 Mon Sep 17 00:00:00 2001 From: chejinge Date: Thu, 25 Dec 2025 20:18:10 +0800 Subject: [PATCH 12/17] fix --- tools/bigkey_analyzer/bigkey_analyzer.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/bigkey_analyzer/bigkey_analyzer.cc b/tools/bigkey_analyzer/bigkey_analyzer.cc index 7c911ba90b..a6b83cd495 100644 --- a/tools/bigkey_analyzer/bigkey_analyzer.cc +++ b/tools/bigkey_analyzer/bigkey_analyzer.cc @@ -23,6 +23,7 @@ #include "storage/src/base_data_key_format.h" #include "storage/src/base_meta_value_format.h" #include "storage/src/lists_meta_value_format.h" +#include "storage/src/custom_comparator.h" // Utility function to check if a directory exists bool DirectoryExists(const std::string& path) { @@ -356,7 +357,7 @@ void AnalyzeSets(const std::string& path, std::vector& key_infos, const rocksdb::DBOptions db_options; std::vector column_families; column_families.emplace_back(rocksdb::kDefaultColumnFamilyName, rocksdb::ColumnFamilyOptions()); - column_families.emplace_back("data_cf", rocksdb::ColumnFamilyOptions()); + column_families.emplace_back("member_cf", rocksdb::ColumnFamilyOptions()); std::vector handles; rocksdb::DB* db; @@ -651,7 +652,10 @@ void AnalyzeLists(const std::string& path, std::vector& key_infos, cons rocksdb::DBOptions db_options; std::vector column_families; column_families.emplace_back(rocksdb::kDefaultColumnFamilyName, rocksdb::ColumnFamilyOptions()); - column_families.emplace_back("data_cf", rocksdb::ColumnFamilyOptions()); + + rocksdb::ColumnFamilyOptions data_cf_ops; + data_cf_ops.comparator = storage::ListsDataKeyComparator(); + column_families.emplace_back("data_cf", data_cf_ops); std::vector handles; rocksdb::DB* db; From 1733c24419d0f2fa885cd90d335b6dd4b699fd78 Mon Sep 17 00:00:00 2001 From: chejinge Date: Thu, 25 Dec 2025 20:22:03 +0800 Subject: [PATCH 13/17] fix --- tools/bigkey_analyzer/bigkey_analyzer.cc | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/bigkey_analyzer/bigkey_analyzer.cc b/tools/bigkey_analyzer/bigkey_analyzer.cc index a6b83cd495..93e21ee5d3 100644 --- a/tools/bigkey_analyzer/bigkey_analyzer.cc +++ b/tools/bigkey_analyzer/bigkey_analyzer.cc @@ -13,7 +13,7 @@ #include #include -#include "storage/storage.h" +#include "storage/include/storage/storage.h" #include "rocksdb/options.h" #include "rocksdb/db.h" #include "rocksdb/env.h" @@ -652,10 +652,7 @@ void AnalyzeLists(const std::string& path, std::vector& key_infos, cons rocksdb::DBOptions db_options; std::vector column_families; column_families.emplace_back(rocksdb::kDefaultColumnFamilyName, rocksdb::ColumnFamilyOptions()); - - rocksdb::ColumnFamilyOptions data_cf_ops; - data_cf_ops.comparator = storage::ListsDataKeyComparator(); - column_families.emplace_back("data_cf", data_cf_ops); + column_families.emplace_back("data_cf", rocksdb::ColumnFamilyOptions()); std::vector handles; rocksdb::DB* db; From e3c1133592f2cec8907da59c7127674dcdced6a3 Mon Sep 17 00:00:00 2001 From: chejinge Date: Thu, 25 Dec 2025 20:26:46 +0800 Subject: [PATCH 14/17] fix --- tools/bigkey_analyzer/bigkey_analyzer.cc | 25 ++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/tools/bigkey_analyzer/bigkey_analyzer.cc b/tools/bigkey_analyzer/bigkey_analyzer.cc index 93e21ee5d3..84832ea5c3 100644 --- a/tools/bigkey_analyzer/bigkey_analyzer.cc +++ b/tools/bigkey_analyzer/bigkey_analyzer.cc @@ -25,6 +25,17 @@ #include "storage/src/lists_meta_value_format.h" #include "storage/src/custom_comparator.h" +// Comparator instances +const rocksdb::Comparator* ListsDataKeyComparator() { + static storage::ListsDataKeyComparatorImpl ldkc; + return &ldkc; +} + +const rocksdb::Comparator* ZSetsScoreKeyComparator() { + static storage::ZSetsScoreKeyComparatorImpl zskc; + return &zskc; +} + // Utility function to check if a directory exists bool DirectoryExists(const std::string& path) { struct stat st; @@ -478,7 +489,13 @@ void AnalyzeZsets(const std::string& path, std::vector& key_infos, cons // 添加所有列族到描述符 for (const auto& cf_name : column_families) { - cf_descriptors.emplace_back(cf_name, rocksdb::ColumnFamilyOptions()); + if (cf_name == "score_cf") { + rocksdb::ColumnFamilyOptions score_cf_ops; + score_cf_ops.comparator = ZSetsScoreKeyComparator(); + cf_descriptors.emplace_back(cf_name, score_cf_ops); + } else { + cf_descriptors.emplace_back(cf_name, rocksdb::ColumnFamilyOptions()); + } } std::vector handles; @@ -652,7 +669,11 @@ void AnalyzeLists(const std::string& path, std::vector& key_infos, cons rocksdb::DBOptions db_options; std::vector column_families; column_families.emplace_back(rocksdb::kDefaultColumnFamilyName, rocksdb::ColumnFamilyOptions()); - column_families.emplace_back("data_cf", rocksdb::ColumnFamilyOptions()); + + // 使用自定义比较器 + rocksdb::ColumnFamilyOptions data_cf_ops; + data_cf_ops.comparator = ListsDataKeyComparator(); + column_families.emplace_back("data_cf", data_cf_ops); std::vector handles; rocksdb::DB* db; From 1a7a69dc54889a000b9ed2c4866755b0f6383546 Mon Sep 17 00:00:00 2001 From: chejinge Date: Thu, 25 Dec 2025 20:29:39 +0800 Subject: [PATCH 15/17] fix --- tools/bigkey_analyzer/CMakeLists.txt | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tools/bigkey_analyzer/CMakeLists.txt b/tools/bigkey_analyzer/CMakeLists.txt index 424dfb004b..08721376d1 100644 --- a/tools/bigkey_analyzer/CMakeLists.txt +++ b/tools/bigkey_analyzer/CMakeLists.txt @@ -38,13 +38,10 @@ target_link_libraries(bigkey_analyzer ${JEMALLOC_LIBRARY} ) -# Add dependencies -add_dependencies(bigkey_analyzer - rocksdb - glog - gflags - storage -) +# Add dependencies (only if these targets exist in the parent CMake) +if(TARGET storage) + add_dependencies(bigkey_analyzer storage) +endif() # Installation install(TARGETS bigkey_analyzer From 7e1c52e02cb8c5d2ecc019964cf0da88e6019ef1 Mon Sep 17 00:00:00 2001 From: chejinge Date: Thu, 25 Dec 2025 20:48:19 +0800 Subject: [PATCH 16/17] fix --- tools/bigkey_analyzer/bigkey_analyzer.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/bigkey_analyzer/bigkey_analyzer.cc b/tools/bigkey_analyzer/bigkey_analyzer.cc index 84832ea5c3..bcd2c5106e 100644 --- a/tools/bigkey_analyzer/bigkey_analyzer.cc +++ b/tools/bigkey_analyzer/bigkey_analyzer.cc @@ -13,16 +13,25 @@ #include #include -#include "storage/include/storage/storage.h" +// RocksDB headers #include "rocksdb/options.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" +#include "rocksdb/comparator.h" + +// Storage headers - include storage.h first to get Slice typedef +#include "storage/include/storage/storage.h" + +// Storage format headers +#include "storage/src/coding.h" #include "storage/src/base_data_key_format.h" #include "storage/src/base_meta_value_format.h" #include "storage/src/lists_meta_value_format.h" +#include "storage/src/lists_data_key_format.h" +#include "storage/src/zsets_data_key_format.h" #include "storage/src/custom_comparator.h" // Comparator instances From ed23685881bfbcbf57150ce33b705e42a6dbd6cd Mon Sep 17 00:00:00 2001 From: chejinge Date: Wed, 31 Dec 2025 09:59:30 +0800 Subject: [PATCH 17/17] fix --- .github/workflows/clean-cache.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/clean-cache.yml b/.github/workflows/clean-cache.yml index 698bb5cdf1..61b660885f 100644 --- a/.github/workflows/clean-cache.yml +++ b/.github/workflows/clean-cache.yml @@ -4,6 +4,10 @@ on: types: - closed +permissions: + actions: write + contents: read + jobs: cleanup: runs-on: ubuntu-latest @@ -18,4 +22,4 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} REPO: ${{ github.repository }} - BRANCH: refs/pull/${{ github.event.pull_request.number }}/merge \ No newline at end of file + BRANCH: refs/pull/${{ github.event.pull_request.number }}/merge