Skip to content

Commit 4b65ebb

Browse files
feat:Big key new (#3209)
* feat:add bigkey --------- Co-authored-by: chejinge <[email protected]>
1 parent 8afe87f commit 4b65ebb

6 files changed

Lines changed: 1135 additions & 14 deletions

File tree

.github/workflows/clean-cache.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ on:
44
types:
55
- closed
66

7+
permissions:
8+
actions: write
9+
contents: read
10+
711
jobs:
812
cleanup:
913
runs-on: ubuntu-latest
@@ -18,4 +22,4 @@ jobs:
1822
env:
1923
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
2024
REPO: ${{ github.repository }}
21-
BRANCH: refs/pull/${{ github.event.pull_request.number }}/merge
25+
BRANCH: refs/pull/${{ github.event.pull_request.number }}/merge

src/pstd/src/env.cc

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -194,29 +194,56 @@ uint64_t Du(const std::string& path) {
194194
if (!filesystem::exists(path)) {
195195
return 0;
196196
}
197-
if (filesystem::is_symlink(path)) {
198-
filesystem::path symlink_path = filesystem::read_symlink(path);
199-
sum = Du(symlink_path);
200-
} else if (filesystem::is_directory(path)) {
197+
198+
std::error_code ec;
199+
200+
if (filesystem::is_symlink(path, ec) && !ec) {
201+
filesystem::path symlink_path = filesystem::read_symlink(path, ec);
202+
if (!ec) {
203+
sum = Du(symlink_path.string());
204+
}
205+
} else if (filesystem::is_directory(path, ec) && !ec) {
201206
for (const auto& entry : filesystem::directory_iterator(path)) {
202-
if (entry.is_symlink()) {
203-
sum += Du(filesystem::read_symlink(entry.path()));
204-
} else if (entry.is_directory()) {
205-
sum += Du(entry.path());
206-
} else if (entry.is_regular_file()) {
207-
sum += entry.file_size();
207+
auto st = entry.symlink_status(ec);
208+
if (ec) {
209+
ec.clear();
210+
continue;
208211
}
212+
213+
if (filesystem::is_symlink(st)) {
214+
auto p = filesystem::read_symlink(entry.path(), ec);
215+
if (!ec) {
216+
sum += Du(p.string());
217+
} else {
218+
ec.clear();
219+
}
220+
} else if (filesystem::is_directory(st)) {
221+
sum += Du(entry.path().string());
222+
} else if (filesystem::is_regular_file(st)) {
223+
auto sz = filesystem::file_size(entry.path(), ec);
224+
if (!ec) {
225+
sum += sz;
226+
} else {
227+
ec.clear();
228+
}
229+
}
230+
}
231+
} else if (filesystem::is_regular_file(path, ec) && !ec) {
232+
auto sz = filesystem::file_size(path, ec);
233+
if (!ec) {
234+
sum = sz;
209235
}
210-
} else if (filesystem::is_regular_file(path)) {
211-
sum = filesystem::file_size(path);
212236
}
213237
} catch (const filesystem::filesystem_error& ex) {
214238
LOG(WARNING) << "Error accessing path: " << ex.what();
239+
} catch (const std::exception& ex) {
240+
LOG(WARNING) << "Error accessing path: " << ex.what();
215241
}
216242

217243
return sum;
218244
}
219245

246+
220247
uint64_t NowMicros() {
221248
auto now = std::chrono::system_clock::now();
222249
return std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();

tools/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
add_subdirectory(./aof_to_pika)
22
add_subdirectory(./benchmark_client)
3+
add_subdirectory(./bigkey_analyzer)
34
add_subdirectory(./binlog_sender)
45
add_subdirectory(./manifest_generator)
56
add_subdirectory(./rdb_to_pika)
67
add_subdirectory(./pika_to_txt)
78
add_subdirectory(./txt_to_pika)
8-
add_subdirectory(./pika-port/pika_port_3)
9+
add_subdirectory(./pika-port/pika_port_3)
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
cmake_minimum_required(VERSION 3.18)
2+
3+
project(bigkey_analyzer)
4+
5+
set(CMAKE_CXX_STANDARD 17)
6+
7+
# Add the bigkey_analyzer executable
8+
add_executable(bigkey_analyzer
9+
bigkey_analyzer.cc
10+
)
11+
12+
# Include directories
13+
target_include_directories(bigkey_analyzer PRIVATE
14+
${PROJECT_SOURCE_DIR}/../../include
15+
${PROJECT_SOURCE_DIR}/../../src
16+
${ROCKSDB_INCLUDE_DIR}
17+
${INSTALL_INCLUDEDIR}
18+
)
19+
20+
# Link directories
21+
target_link_directories(bigkey_analyzer PRIVATE
22+
${INSTALL_LIBDIR_64}
23+
${INSTALL_LIBDIR}
24+
)
25+
26+
# Link libraries
27+
target_link_libraries(bigkey_analyzer
28+
storage
29+
${ROCKSDB_LIBRARY}
30+
${GLOG_LIBRARY}
31+
${LIB_GFLAGS}
32+
${LIB_FMT}
33+
libsnappy.a
34+
libzstd.a
35+
liblz4.a
36+
libz.a
37+
${LIBUNWIND_LIBRARY}
38+
${JEMALLOC_LIBRARY}
39+
)
40+
41+
# Add dependencies (only if these targets exist in the parent CMake)
42+
if(TARGET storage)
43+
add_dependencies(bigkey_analyzer storage)
44+
endif()
45+
46+
# Installation
47+
install(TARGETS bigkey_analyzer
48+
RUNTIME DESTINATION bin
49+
)

tools/bigkey_analyzer/README.md

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
# BigKey Analyzer
2+
3+
BigKey Analyzer 是一个离线分析工具,用于检测和分析 PikiwiDB 数据库中的大键(Big Keys)。该工具直接读取 RocksDB 数据文件,无需启动数据库服务,可以帮助运维人员识别可能影响系统性能的大键。
4+
5+
## 功能特性
6+
7+
- 直接读取 RocksDB 数据文件,无需服务端运行
8+
- 支持所有 PikiwiDB 数据类型:strings, hashes, sets, zsets, lists
9+
- 按键大小排序显示结果
10+
- 支持设置最小键大小阈值,只显示超过阈值的键
11+
- 支持限制显示结果数量(Top N)
12+
- 按前缀分析键分布情况
13+
- 可输出到文件
14+
15+
## 编译
16+
17+
```bash
18+
# 确保在 PikiwiDB 根目录下
19+
mkdir -p build && cd build
20+
21+
# 带上工具编译参数
22+
cmake .. -DUSE_PIKA_TOOLS=ON
23+
make bigkey_analyzer
24+
```
25+
26+
## 使用方法
27+
28+
```bash
29+
./bigkey_analyzer [OPTIONS] <db_path>
30+
```
31+
32+
### 选项
33+
34+
- `--min-size=SIZE`:只显示大于 SIZE 字节的键
35+
- `--top=N`:只显示前 N 个最大的键
36+
- `--prefix-stat`:显示按前缀分组的统计信息
37+
- `--prefix-delimiter=C`:设置前缀分隔符(默认为 ":")
38+
- `--type=TYPE`:只分析指定类型,可选值:strings|hashes|lists|sets|zsets|all
39+
- `--output=FILE`:输出结果到文件
40+
- `--help`:显示帮助信息
41+
42+
### 示例
43+
44+
分析数据库中所有键:
45+
```bash
46+
./bigkey_analyzer /path/to/db_directory
47+
```
48+
49+
只显示大于 1MB 的键:
50+
```bash
51+
./bigkey_analyzer --min-size=1048576 /path/to/db_directory
52+
```
53+
54+
只显示前 10 个最大的键:
55+
```bash
56+
./bigkey_analyzer --top=10 /path/to/db_directory
57+
```
58+
59+
按前缀统计键分布:
60+
```bash
61+
./bigkey_analyzer --prefix-stat /path/to/db_directory
62+
```
63+
64+
使用自定义前缀分隔符:
65+
```bash
66+
./bigkey_analyzer --prefix-stat --prefix-delimiter="." /path/to/db_directory
67+
```
68+
69+
只分析字符串类型:
70+
```bash
71+
./bigkey_analyzer --type=strings /path/to/db_directory
72+
```
73+
74+
输出到文件:
75+
```bash
76+
./bigkey_analyzer --output=result.txt /path/to/db_directory
77+
```
78+
79+
## 输出格式
80+
81+
基本输出格式:
82+
```
83+
Type Size Key TTL
84+
```
85+
86+
- `Type`:键类型(string, hash, list, set, zset)
87+
- `Size`:键占用的总字节数(包括元数据)
88+
- `Key`:键名称
89+
- `TTL`:剩余生存时间(秒),-1 表示无过期时间
90+
91+
前缀统计输出格式:
92+
```
93+
Prefix Count TotalSize AvgSize
94+
```
95+
96+
- `Prefix`:键前缀
97+
- `Count`:该前缀下的键数量
98+
- `TotalSize`:该前缀下所有键的总大小(字节)
99+
- `AvgSize`:该前缀下键的平均大小(字节)
100+
101+
## 提示
102+
103+
1. 对于非常大的数据库,建议先使用 `--min-size` 设置一个较大的阈值(如 1MB)来过滤小键。
104+
2. 使用 `--prefix-stat` 可以帮助识别特定前缀下的键分布情况,有助于发现问题模块。
105+
3. 大键可能导致性能问题,可以考虑以下解决方案:
106+
- 拆分大的 hash, set, zset 为多个小的
107+
- 使用适当的过期策略
108+
- 使用压缩算法减小值的大小
109+
110+
## 常见问题
111+
112+
1. **"Error: Database directory does not exist"**
113+
- 确保提供了正确的数据库路径
114+
- 数据库路径通常包含 strings, hashes, sets, zsets, lists 子目录
115+
116+
2. **"Error opening X database"**
117+
- 确保数据库文件未被锁定(如数据库正在运行)
118+
- 检查是否有足够的文件访问权限
119+
120+
3. **显示的键大小与内存使用不匹配**
121+
- 此工具计算的是键在存储引擎中的总大小,包括元数据
122+
- 实际内存使用可能因内存分配和 RocksDB 缓存策略而有所不同

0 commit comments

Comments
 (0)