Skip to content

Commit 8103862

Browse files
committed
feat: 添加渐进式 compact (incremental-compact)
- 新增 compaction-strategy: incremental-compact - 使用 CompactFiles 每次处理少量最老的 SST 文件 - 可配置单次处理文件数、执行时间、压缩率阈值 - 与 dump 硬连接方案分离,独立测试
1 parent f2970d4 commit 8103862

17 files changed

Lines changed: 1240 additions & 5 deletions

.claude/settings.local.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"permissions": {
3+
"allow": [
4+
"Bash(claude doctor:*)",
5+
"WebFetch(domain:github.com)",
6+
"Bash(ls -la:*)",
7+
"Bash(git stash push:*)",
8+
"Bash(git fetch:*)",
9+
"Bash(git checkout:*)",
10+
"Bash(git stash pop:*)",
11+
"Bash(git add:*)"
12+
]
13+
}
14+
}

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,7 @@ pkg
7474
!codis/cmd/fe/assets/**
7575

7676
tests/tmp
77+
./.claude
78+
CLAUDE.md
79+
./.claude
80+
CLAUDE.md

check_delayed_cleanup.sh

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#!/bin/bash
2+
# 分析日志,找出被添加到延迟删除队列但实际没有执行删除的文件
3+
4+
LOG_FILE="${1:-/app/pika4/pika-9454/log/pika.INFO}"
5+
6+
echo "=========================================="
7+
echo "分析延迟清理队列执行情况"
8+
echo "日志文件: $LOG_FILE"
9+
echo "=========================================="
10+
11+
# 提取所有 Scheduled 的文件(延迟删除调度)
12+
echo ""
13+
echo "步骤 1: 提取所有被调度的文件..."
14+
grep "Scheduled file for delayed cleanup" "$LOG_FILE" 2>/dev/null | \
15+
sed 's/.*cleanup: //g' | \
16+
awk '{print $1}' | \
17+
grep "^/" | \
18+
sort -u > /tmp/scheduled_files.txt
19+
20+
scheduled_count=$(wc -l < /tmp/scheduled_files.txt 2>/dev/null | tr -d ' ')
21+
echo " 被调度的文件数: $scheduled_count"
22+
23+
# 提取所有 Deleted 的文件(实际删除)
24+
echo ""
25+
echo "步骤 2: 提取所有实际删除的文件..."
26+
grep "Deleted delayed cleanup file" "$LOG_FILE" 2>/dev/null | \
27+
sed 's/.*Deleted delayed cleanup file: //g' | \
28+
awk '{print $1}' | \
29+
grep "^/" | \
30+
sort -u > /tmp/deleted_files.txt
31+
32+
deleted_count=$(wc -l < /tmp/deleted_files.txt 2>/dev/null | tr -d ' ')
33+
echo " 实际删除的文件数: $deleted_count"
34+
35+
# 如果没有任何记录,直接退出
36+
if [ "$scheduled_count" -eq 0 ] && [ "$deleted_count" -eq 0 ]; then
37+
echo ""
38+
echo "未找到任何延迟清理相关日志"
39+
rm -f /tmp/scheduled_files.txt /tmp/deleted_files.txt
40+
exit 0
41+
fi
42+
43+
# 找出被调度但未删除的文件
44+
echo ""
45+
echo "步骤 3: 找出被调度但未删除的文件..."
46+
comm -23 /tmp/scheduled_files.txt /tmp/deleted_files.txt > /tmp/missing_files.txt
47+
48+
missing_count=$(wc -l < /tmp/missing_files.txt 2>/dev/null | tr -d ' ')
49+
echo " 被调度但未删除的文件数: $missing_count"
50+
51+
if [ "$missing_count" -gt 0 ]; then
52+
echo ""
53+
echo "=========================================="
54+
echo "被调度但未删除的文件列表:"
55+
echo "=========================================="
56+
57+
while IFS= read -r filepath; do
58+
if [ -n "$filepath" ]; then
59+
# 检查文件是否仍然存在
60+
if [ -f "$filepath" ]; then
61+
# 获取文件大小和 nlink
62+
size=$(stat -c %s "$filepath" 2>/dev/null || stat -f %z "$filepath" 2>/dev/null)
63+
nlink=$(stat -c %h "$filepath" 2>/dev/null || stat -f %l "$filepath" 2>/dev/null)
64+
65+
if command -v numfmt >/dev/null 2>&1; then
66+
human_size=$(numfmt --to=iec-i --suffix=B "$size" 2>/dev/null)
67+
else
68+
human_size="${size} bytes"
69+
fi
70+
71+
echo " [仍存在] $filepath"
72+
echo " 大小: $human_size, 硬链接数: $nlink"
73+
74+
# 查找该文件的调度时间
75+
scheduled_time=$(grep "Scheduled file for delayed cleanup.*$filepath" "$LOG_FILE" | tail -1 | awk '{print $1}')
76+
if [ -n "$scheduled_time" ]; then
77+
echo " 调度时间: $scheduled_time"
78+
fi
79+
echo ""
80+
else
81+
echo " [已消失] $filepath (可能已被其他方式删除)"
82+
fi
83+
fi
84+
done < /tmp/missing_files.txt
85+
fi
86+
87+
# 清理临时文件
88+
rm -f /tmp/scheduled_files.txt /tmp/deleted_files.txt /tmp/missing_files.txt
89+
90+
echo ""
91+
echo "=========================================="
92+
echo "总结:"
93+
echo " 调度文件: $scheduled_count"
94+
echo " 删除文件: $deleted_count"
95+
echo " 未删除文件: $missing_count"
96+
if [ "$missing_count" -gt 0 ]; then
97+
echo ""
98+
echo "⚠️ 发现 $missing_count 个文件被调度但未删除!"
99+
echo " 可能原因:"
100+
echo " 1. 延迟时间未到 (600秒)"
101+
echo " 2. 文件在删除时 nlink != 1 (不再是孤儿文件)"
102+
echo " 3. ProcessPendingCleanupFiles 未执行或执行失败"
103+
fi
104+
echo "=========================================="

check_orphan_files.sh

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#!/bin/bash
2+
# 检查 dump 目录下的所有孤儿文件(nlink=1 的 SST 文件)及其大小
3+
4+
DUMP_DIR="${1:-/app/pika4/pika-9454/dump}"
5+
6+
echo "=========================================="
7+
echo "扫描 Dump 目录下的孤儿文件 (nlink=1)"
8+
echo "目录: $DUMP_DIR"
9+
echo "=========================================="
10+
11+
# 统计总文件数、孤儿文件数、总大小
12+
total_files=0
13+
orphan_files=0
14+
total_orphan_size=0
15+
16+
# 遍历所有 dump 子目录
17+
for dump_subdir in "$DUMP_DIR"/dump-*/; do
18+
if [ -d "$dump_subdir" ]; then
19+
echo ""
20+
echo "检查目录: $dump_subdir"
21+
echo "----------------------------------------"
22+
23+
# 查找所有 .sst 文件并检查 nlink
24+
find "$dump_subdir" -name "*.sst" -type f 2>/dev/null | while read -r file; do
25+
total_files=$((total_files + 1))
26+
27+
# 获取硬链接数
28+
nlink=$(stat -c %h "$file" 2>/dev/null || stat -f %l "$file" 2>/dev/null)
29+
30+
if [ "$nlink" -eq 1 ]; then
31+
# 获取文件大小
32+
size=$(stat -c %s "$file" 2>/dev/null || stat -f %z "$file" 2>/dev/null)
33+
34+
if command -v numfmt >/dev/null 2>&1; then
35+
human_size=$(numfmt --to=iec-i --suffix=B "$size" 2>/dev/null)
36+
else
37+
human_size="${size} bytes"
38+
fi
39+
40+
echo "[孤儿文件] $file (大小: $human_size)"
41+
fi
42+
done
43+
fi
44+
done
45+
46+
echo ""
47+
echo "=========================================="
48+
echo "正在统计总数..."
49+
echo "=========================================="
50+
51+
# 重新统计
52+
total_files=0
53+
orphan_files=0
54+
total_orphan_size=0
55+
56+
for dump_subdir in "$DUMP_DIR"/dump-*/; do
57+
if [ -d "$dump_subdir" ]; then
58+
find "$dump_subdir" -name "*.sst" -type f 2>/dev/null | while read -r file; do
59+
total_files=$((total_files + 1))
60+
nlink=$(stat -c %h "$file" 2>/dev/null || stat -f %l "$file" 2>/dev/null)
61+
if [ "$nlink" -eq 1 ]; then
62+
size=$(stat -c %s "$file" 2>/dev/null || stat -f %z "$file" 2>/dev/null)
63+
orphan_files=$((orphan_files + 1))
64+
total_orphan_size=$((total_orphan_size + size))
65+
echo "$size $file"
66+
fi
67+
done
68+
fi
69+
done > /tmp/orphan_list.txt
70+
71+
orphan_files=$(wc -l < /tmp/orphan_list.txt 2>/dev/null || echo 0)
72+
total_orphan_size=$(awk '{sum+=$1} END {print sum}' /tmp/orphan_list.txt 2>/dev/null || echo 0)
73+
74+
echo "统计结果:"
75+
echo " 孤儿文件数: $orphan_files"
76+
if command -v numfmt >/dev/null 2>&1; then
77+
echo " 孤儿文件总大小: $(numfmt --to=iec-i --suffix=B $total_orphan_size 2>/dev/null || echo ${total_orphan_size}bytes)"
78+
else
79+
echo " 孤儿文件总大小: $total_orphan_size bytes"
80+
fi
81+
82+
rm -f /tmp/orphan_list.txt
83+
echo "=========================================="

conf/pika.conf

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -677,8 +677,9 @@ wash-data: true
677677

678678
# Pika automatic compact compact strategy, a complement to rocksdb compact.
679679
# Trigger the compact background task periodically according to `compact-interval`
680-
# Can choose `full-compact` or `obd-compact`.
680+
# Can choose `full-compact`, `obd-compact` or `incremental-compact`.
681681
# obd-compact https://github.com/OpenAtomFoundation/pika/issues/2255
682+
# incremental-compact: incremental compact using CompactFiles, processes a small number of oldest SST files each time
682683
compaction-strategy : obd-compact
683684

684685
# For OBD_Compact
@@ -704,7 +705,28 @@ force-compact-min-delete-ratio : 10
704705
# compact every `compact-every-num-of-files` file.
705706
dont-compact-sst-created-in-seconds : 20
706707

707-
# For OBD_Compact
708-
# According to the number of sst files in rocksdb,
708+
# For OBD_Compact
709+
# According to the number of sst files in rocksdb,
709710
# compact every `compact-every-num-of-files` file.
710-
best-delete-min-ratio : 10
711+
best-delete-min-ratio : 10
712+
713+
# ============================================
714+
# For incremental-compact (when compaction-strategy = incremental-compact)
715+
# ============================================
716+
# Execution interval in seconds
717+
incremental-compact-interval : 60
718+
719+
# Maximum number of files to compact per run
720+
incremental-compact-max-files : 1
721+
722+
# Maximum execution time per run in milliseconds
723+
incremental-compact-max-time-ms : 1000
724+
725+
# Compression rate threshold (%), continue processing if rate is below this value
726+
incremental-compact-min-rate : 70
727+
728+
# Target level for compact files (-1 means current level + 1)
729+
incremental-compact-target-level : -1
730+
731+
# Minimum file age in seconds to be considered for compaction
732+
incremental-compact-min-file-age : 60

0 commit comments

Comments
 (0)