Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .github/workflows/clean-cache.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ on:
types:
- closed

permissions:
actions: write
contents: read

jobs:
cleanup:
runs-on: ubuntu-latest
Expand All @@ -18,4 +22,4 @@ jobs:
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ github.repository }}
BRANCH: refs/pull/${{ github.event.pull_request.number }}/merge
BRANCH: refs/pull/${{ github.event.pull_request.number }}/merge
51 changes: 39 additions & 12 deletions src/pstd/src/env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -194,29 +194,56 @@ uint64_t Du(const std::string& path) {
if (!filesystem::exists(path)) {
return 0;
}
if (filesystem::is_symlink(path)) {
filesystem::path symlink_path = filesystem::read_symlink(path);
sum = Du(symlink_path);
} else if (filesystem::is_directory(path)) {

std::error_code ec;

if (filesystem::is_symlink(path, ec) && !ec) {
filesystem::path symlink_path = filesystem::read_symlink(path, ec);
if (!ec) {
sum = Du(symlink_path.string());
}
} else if (filesystem::is_directory(path, ec) && !ec) {
for (const auto& entry : filesystem::directory_iterator(path)) {
if (entry.is_symlink()) {
sum += Du(filesystem::read_symlink(entry.path()));
} else if (entry.is_directory()) {
sum += Du(entry.path());
} else if (entry.is_regular_file()) {
sum += entry.file_size();
auto st = entry.symlink_status(ec);
if (ec) {
ec.clear();
continue;
}

if (filesystem::is_symlink(st)) {
auto p = filesystem::read_symlink(entry.path(), ec);
if (!ec) {
sum += Du(p.string());
} else {
ec.clear();
}
} else if (filesystem::is_directory(st)) {
sum += Du(entry.path().string());
} else if (filesystem::is_regular_file(st)) {
auto sz = filesystem::file_size(entry.path(), ec);
if (!ec) {
sum += sz;
} else {
ec.clear();
}
}
}
} else if (filesystem::is_regular_file(path, ec) && !ec) {
auto sz = filesystem::file_size(path, ec);
if (!ec) {
sum = sz;
}
} else if (filesystem::is_regular_file(path)) {
sum = filesystem::file_size(path);
}
} catch (const filesystem::filesystem_error& ex) {
LOG(WARNING) << "Error accessing path: " << ex.what();
} catch (const std::exception& ex) {
LOG(WARNING) << "Error accessing path: " << ex.what();
}

return sum;
}


uint64_t NowMicros() {
auto now = std::chrono::system_clock::now();
return std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();
Expand Down
3 changes: 2 additions & 1 deletion tools/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
add_subdirectory(./aof_to_pika)
add_subdirectory(./benchmark_client)
add_subdirectory(./bigkey_analyzer)
add_subdirectory(./binlog_sender)
add_subdirectory(./manifest_generator)
add_subdirectory(./rdb_to_pika)
add_subdirectory(./pika_to_txt)
add_subdirectory(./txt_to_pika)
add_subdirectory(./pika-port/pika_port_3)
add_subdirectory(./pika-port/pika_port_3)
49 changes: 49 additions & 0 deletions tools/bigkey_analyzer/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
cmake_minimum_required(VERSION 3.18)

project(bigkey_analyzer)

set(CMAKE_CXX_STANDARD 17)

# Add the bigkey_analyzer executable
add_executable(bigkey_analyzer
bigkey_analyzer.cc
)

# Include directories
target_include_directories(bigkey_analyzer PRIVATE
${PROJECT_SOURCE_DIR}/../../include
${PROJECT_SOURCE_DIR}/../../src
${ROCKSDB_INCLUDE_DIR}
${INSTALL_INCLUDEDIR}
)

# Link directories
target_link_directories(bigkey_analyzer PRIVATE
${INSTALL_LIBDIR_64}
${INSTALL_LIBDIR}
)

# Link libraries
target_link_libraries(bigkey_analyzer
storage
${ROCKSDB_LIBRARY}
${GLOG_LIBRARY}
${LIB_GFLAGS}
${LIB_FMT}
libsnappy.a
libzstd.a
liblz4.a
libz.a
${LIBUNWIND_LIBRARY}
${JEMALLOC_LIBRARY}
)

# Add dependencies (only if these targets exist in the parent CMake)
if(TARGET storage)
add_dependencies(bigkey_analyzer storage)
endif()

# Installation
install(TARGETS bigkey_analyzer
RUNTIME DESTINATION bin
)
122 changes: 122 additions & 0 deletions tools/bigkey_analyzer/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# BigKey Analyzer

BigKey Analyzer 是一个离线分析工具,用于检测和分析 PikiwiDB 数据库中的大键(Big Keys)。该工具直接读取 RocksDB 数据文件,无需启动数据库服务,可以帮助运维人员识别可能影响系统性能的大键。

## 功能特性

- 直接读取 RocksDB 数据文件,无需服务端运行
- 支持所有 PikiwiDB 数据类型:strings, hashes, sets, zsets, lists
- 按键大小排序显示结果
- 支持设置最小键大小阈值,只显示超过阈值的键
- 支持限制显示结果数量(Top N)
- 按前缀分析键分布情况
- 可输出到文件

## 编译

```bash
# 确保在 PikiwiDB 根目录下
mkdir -p build && cd build

# 带上工具编译参数
cmake .. -DUSE_PIKA_TOOLS=ON
make bigkey_analyzer
```

## 使用方法

```bash
./bigkey_analyzer [OPTIONS] <db_path>
```

### 选项

- `--min-size=SIZE`:只显示大于 SIZE 字节的键
- `--top=N`:只显示前 N 个最大的键
- `--prefix-stat`:显示按前缀分组的统计信息
- `--prefix-delimiter=C`:设置前缀分隔符(默认为 ":")
- `--type=TYPE`:只分析指定类型,可选值:strings|hashes|lists|sets|zsets|all
- `--output=FILE`:输出结果到文件
- `--help`:显示帮助信息

### 示例

分析数据库中所有键:
```bash
./bigkey_analyzer /path/to/db_directory
```

只显示大于 1MB 的键:
```bash
./bigkey_analyzer --min-size=1048576 /path/to/db_directory
```

只显示前 10 个最大的键:
```bash
./bigkey_analyzer --top=10 /path/to/db_directory
```

按前缀统计键分布:
```bash
./bigkey_analyzer --prefix-stat /path/to/db_directory
```

使用自定义前缀分隔符:
```bash
./bigkey_analyzer --prefix-stat --prefix-delimiter="." /path/to/db_directory
```

只分析字符串类型:
```bash
./bigkey_analyzer --type=strings /path/to/db_directory
```

输出到文件:
```bash
./bigkey_analyzer --output=result.txt /path/to/db_directory
```

## 输出格式

基本输出格式:
```
Type Size Key TTL
```

- `Type`:键类型(string, hash, list, set, zset)
- `Size`:键占用的总字节数(包括元数据)
- `Key`:键名称
- `TTL`:剩余生存时间(秒),-1 表示无过期时间

前缀统计输出格式:
```
Prefix Count TotalSize AvgSize
```

- `Prefix`:键前缀
- `Count`:该前缀下的键数量
- `TotalSize`:该前缀下所有键的总大小(字节)
- `AvgSize`:该前缀下键的平均大小(字节)

## 提示

1. 对于非常大的数据库,建议先使用 `--min-size` 设置一个较大的阈值(如 1MB)来过滤小键。
2. 使用 `--prefix-stat` 可以帮助识别特定前缀下的键分布情况,有助于发现问题模块。
3. 大键可能导致性能问题,可以考虑以下解决方案:
- 拆分大的 hash, set, zset 为多个小的
- 使用适当的过期策略
- 使用压缩算法减小值的大小

## 常见问题

1. **"Error: Database directory does not exist"**
- 确保提供了正确的数据库路径
- 数据库路径通常包含 strings, hashes, sets, zsets, lists 子目录

2. **"Error opening X database"**
- 确保数据库文件未被锁定(如数据库正在运行)
- 检查是否有足够的文件访问权限

3. **显示的键大小与内存使用不匹配**
- 此工具计算的是键在存储引擎中的总大小,包括元数据
- 实际内存使用可能因内存分配和 RocksDB 缓存策略而有所不同
Loading
Loading