Skip to content

Commit 6970887

Browse files
author
wuxianrong
committed
The data backup and recovery functions have been added
1 parent 3e19283 commit 6970887

27 files changed

Lines changed: 2218 additions & 35 deletions

.github/workflows/pika.yml

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@ env:
1212
ARTIFACT_PIKA_NAME: artifact-pika
1313

1414
jobs:
15+
1516
build_on_ubuntu:
1617
# The CMake configure and build commands are platform-agnostic and should work equally well on Windows or Mac.
1718
# You can convert this to a matrix build if you need cross-platform coverage.
1819
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
19-
runs-on: ubuntu-latest
20+
runs-on: ubuntu-22.04
2021

2122
steps:
2223
- name: Free Disk Space (Ubuntu Host)
@@ -41,15 +42,21 @@ jobs:
4142
- name: Install Deps
4243
run: |
4344
sudo apt-get update
44-
sudo apt-get install -y autoconf libprotobuf-dev protobuf-compiler clang-tidy
45+
sudo apt-get install -y autoconf libprotobuf-dev protobuf-compiler clang-tidy gcc-9 g++-9
4546
4647
- name: Configure CMake
4748
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
4849
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
50+
env:
51+
CC: gcc-9
52+
CXX: g++-9
4953
run: cmake -B build -DCMAKE_BUILD_TYPE=${{ env.BUILD_TYPE }} -DUSE_PIKA_TOOLS=ON -DCMAKE_CXX_FLAGS_DEBUG=-fsanitize=address -D CMAKE_C_COMPILER_LAUNCHER=ccache -D CMAKE_CXX_COMPILER_LAUNCHER=ccache
5054

5155
- name: Build
5256
# Build your program with the given configuration
57+
env:
58+
CC: gcc-9
59+
CXX: g++-9
5360
run: cmake --build build --config ${{ env.BUILD_TYPE }}
5461

5562
- name: Cleanup
@@ -90,7 +97,7 @@ jobs:
9097
sh integrate_test.sh
9198
9299
build_on_rocky:
93-
runs-on: ubuntu-latest
100+
runs-on: ubuntu-22.04
94101
container:
95102
image: rockylinux:9
96103

@@ -166,7 +173,7 @@ jobs:
166173
- name: Install deps
167174
run: |
168175
dnf update -y
169-
dnf install -y bash cmake wget git autoconf gcc perl-Digest-SHA tcl which tar g++ tar epel-release gcc-c++ libstdc++-devel gcc-toolset-13 binutils
176+
dnf install -y bash cmake wget git autoconf gcc perl-Digest-SHA tcl which tar g++ tar epel-release gcc-c++ libstdc++-devel gcc-toolset-9 binutils libssl-dev
170177
dnf clean all
171178
rm -rf /var/cache/dnf
172179
@@ -182,7 +189,7 @@ jobs:
182189

183190
- name: Configure CMake
184191
run: |
185-
source /opt/rh/gcc-toolset-13/enable
192+
source /opt/rh/gcc-toolset-9/enable
186193
cmake -B build -DCMAKE_BUILD_TYPE=${{ env.BUILD_TYPE }} -DUSE_PIKA_TOOLS=ON -DCMAKE_CXX_FLAGS_DEBUG=-fsanitize=address .
187194
188195
- uses: actions/cache@v3
@@ -197,7 +204,7 @@ jobs:
197204

198205
- name: Build
199206
run: |
200-
source /opt/rh/gcc-toolset-13/enable
207+
source /opt/rh/gcc-toolset-9/enable
201208
cmake --build build --config ${{ env.BUILD_TYPE }}
202209
203210
- name: Cleanup
@@ -272,6 +279,7 @@ jobs:
272279
chmod +x integrate_test.sh
273280
sh integrate_test.sh
274281
282+
275283
build_on_macos:
276284

277285
runs-on: macos-14

CMakeLists.txt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,18 @@ set(INSTALL_LIBDIR ${STAGED_INSTALL_PREFIX}/lib)
8989
set(INSTALL_LIBDIR_64 ${STAGED_INSTALL_PREFIX}/lib64)
9090
set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} ${STAGED_INSTALL_PREFIX})
9191

92+
# Fix __const__ macro conflict with GCC 13+/glibc 2.38+ in existing brpc headers
93+
# This must run during CMake configuration to fix cached/existing headers
94+
if(EXISTS "${INSTALL_INCLUDEDIR}/butil/errno.h")
95+
file(READ "${INSTALL_INCLUDEDIR}/butil/errno.h" ERRNO_H_CONTENT)
96+
string(FIND "${ERRNO_H_CONTENT}" "#ifndef __const__" HAS_CONST_MACRO)
97+
if(NOT HAS_CONST_MACRO EQUAL -1)
98+
message(STATUS "Patching existing brpc errno.h for GCC 13+ compatibility")
99+
string(REPLACE "#ifndef __const__" "#if 0 /* disabled for GCC 13+ compatibility */" ERRNO_H_CONTENT "${ERRNO_H_CONTENT}")
100+
file(WRITE "${INSTALL_INCLUDEDIR}/butil/errno.h" "${ERRNO_H_CONTENT}")
101+
endif()
102+
endif()
103+
92104
execute_process(COMMAND sh ${CMAKE_UTILS_DIR}/Get_OS_Version.sh
93105
OUTPUT_VARIABLE OS_VERSION)
94106

@@ -664,6 +676,12 @@ ExternalProject_Add(brpc
664676
1
665677
UPDATE_COMMAND
666678
""
679+
# Patch to fix __const__ macro conflict with GCC 13+/glibc 2.38+
680+
# The issue is that brpc defines __const__ as __unused__ which conflicts with
681+
# glibc's __glibc_has_attribute(__const__) macro in sys/cdefs.h
682+
# We replace '#ifndef __const__' with '#if 0' to disable the problematic macro
683+
PATCH_COMMAND
684+
sh -c "sed -i.bak 's/#ifndef __const__/#if 0 /' src/butil/errno.h && cat src/butil/errno.h | head -35"
667685
LOG_CONFIGURE
668686
1
669687
LOG_BUILD
@@ -677,16 +695,29 @@ ExternalProject_Add(brpc
677695
-DCMAKE_INSTALL_PREFIX=${STAGED_INSTALL_PREFIX}
678696
-DCMAKE_BUILD_TYPE=${LIB_BUILD_TYPE}
679697
-DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}
698+
-DCMAKE_FIND_ROOT_PATH=${STAGED_INSTALL_PREFIX}
699+
-DCMAKE_INCLUDE_PATH=${INSTALL_INCLUDEDIR}
700+
-DCMAKE_LIBRARY_PATH=${INSTALL_LIBDIR}
680701
-DWITH_GLOG=ON
681702
-DWITH_SNAPPY=ON
682703
-DBUILD_SHARED_LIBS=OFF
683704
-DDOWNLOAD_GTEST=OFF
705+
-DCMAKE_EXE_LINKER_FLAGS=-L${INSTALL_LIBDIR}\ -L${INSTALL_LIBDIR_64}
684706
BUILD_ALWAYS
685707
1
686708
BUILD_COMMAND
687709
make -j${CPU_CORE}
688710
)
689711

712+
# Fix the installed header file after brpc installation
713+
# This is needed because braft uses the installed headers, not the patched source
714+
# The __const__ macro in butil/errno.h conflicts with glibc 2.38+ / GCC 13+
715+
ExternalProject_Add_Step(brpc fix_installed_headers
716+
COMMAND sh -c "sed -i.bak 's/#ifndef __const__/#if 0 /' ${INSTALL_INCLUDEDIR}/butil/errno.h && cat ${INSTALL_INCLUDEDIR}/butil/errno.h | head -35"
717+
DEPENDEES install
718+
COMMENT "Patching installed brpc headers for GCC 13+ compatibility"
719+
)
720+
690721
if(${OS_VERSION} MATCHES "Rocky" OR ${OS_VERSION} MATCHES "CentOS")
691722
set(BRPC_LIBRARY ${INSTALL_LIBDIR_64}/libbrpc.a)
692723
else()

ci/Dockerfile

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,43 @@
1-
FROM ubuntu:22.04
1+
FROM ubuntu:20.04
22

3+
ENV DEBIAN_FRONTEND=noninteractive
4+
ENV TZ=Asia/Shanghai
5+
6+
# 设置 TMPDIR 到工作目录,避免 /tmp 空间不足
7+
ENV TMPDIR=/work/tmp
8+
RUN mkdir -p $TMPDIR
9+
10+
# 安装基础工具 + gcc-9 + protobuf + cmake
311
RUN apt-get update && apt-get install -y \
12+
build-essential \
13+
gcc-9 g++-9 \
14+
git \
15+
autoconf \
16+
libssl-dev \
17+
libprotobuf-dev \
18+
libgflags-dev \
19+
protobuf-compiler \
20+
curl \
421
ca-certificates \
5-
rsync && \
6-
apt-get clean && \
7-
rm -rf /var/lib/apt/lists /var/cache/apt/archives
8-
9-
ENV PIKA=/pika \
10-
PATH=${PIKA}:${PIKA}/bin:${PATH}
22+
python3 \
23+
&& rm -rf /var/lib/apt/lists/*
1124

12-
WORKDIR ${PIKA}
25+
# 安装新版 CMake >= 3.18
26+
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v3.27.9/cmake-3.27.9-linux-x86_64.tar.gz \
27+
| tar -xz -C /opt && \
28+
ln -s /opt/cmake-3.27.9-linux-x86_64/bin/cmake /usr/local/bin/cmake
1329

14-
COPY artifact/pika ${PIKA}/bin/pika
15-
COPY entrypoint.sh /entrypoint.sh
16-
COPY conf/pika.conf ${PIKA}/conf/pika.conf
30+
# Go 1.19
31+
RUN curl -fsSL https://go.dev/dl/go1.19.linux-amd64.tar.gz | tar -C /usr/local -xz
32+
ENV PATH=/usr/local/go/bin:$PATH
1733

18-
ENTRYPOINT ["/entrypoint.sh"]
34+
ENV CC=gcc-9
35+
ENV CXX=g++-9
1936

20-
EXPOSE 9221
37+
WORKDIR /work
38+
COPY . /work
2139

22-
CMD ["/pika/bin/pika", "-c", "/pika/conf/pika.conf"]
40+
# 构建 Pika + braft
41+
RUN mkdir -p build && cd build && \
42+
cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-g0" && \
43+
make -j2

docs/raft_implementation_review.md

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# Pika Raft 模式实现 Review 文档
2+
3+
## 1. 概述
4+
5+
Pika 的 Raft 模式基于 [braft](https://github.com/baidu/braft) 实现分布式一致性,使用 brpc 进行节点间通信。
6+
7+
**核心特性**
8+
- 强一致性保证
9+
- 自动 Leader 选举
10+
- 基于 RocksDB Checkpoint 的快照机制
11+
12+
## 2. 架构概览
13+
14+
```
15+
┌─────────────────────────────────────────────────────┐
16+
│ Pika Server │
17+
├─────────────────────────────────────────────────────┤
18+
│ Client Request → RaftManager → PikaRaftNode │
19+
│ ↓ │
20+
│ braft::Node │
21+
│ ↓ │
22+
│ PikaStateMachine │
23+
│ ↓ │
24+
│ Storage::OnBinlogWrite() │
25+
│ ↓ │
26+
│ RocksDB │
27+
└─────────────────────────────────────────────────────┘
28+
```
29+
30+
## 3. 核心组件
31+
32+
| 组件 | 文件 | 职责 |
33+
|------|------|------|
34+
| `RaftManager` | [praft.h](../src/praft/include/praft/praft.h) | 管理多个 Raft 节点(每个 DB 一个) |
35+
| `PikaRaftNode` | [praft.h](../src/praft/include/praft/praft.h) | 封装 braft::Node,提供日志追加接口 |
36+
| `PikaStateMachine` | [praft.cc](../src/praft/src/praft.cc) | 实现状态机,处理日志应用和快照 |
37+
| `PPosixFileSystemAdaptor` | [psnapshot.cc](../src/praft/src/psnapshot.cc) | 快照文件系统适配器 |
38+
39+
## 4. 数据流
40+
41+
### 写入流程
42+
1. 客户端发送写命令
43+
2. Storage 层构建 Binlog(Protobuf 格式)
44+
3. 通过 `PikaRaftNode::AppendLog()` 提交到 Raft
45+
4. braft 复制日志到多数节点
46+
5. 提交后 `PikaStateMachine::on_apply()` 被调用
47+
6. 调用 `Storage::OnBinlogWrite()` 写入 RocksDB
48+
49+
### Binlog 格式
50+
51+
定义在 [binlog.proto](../src/praft/src/binlog.proto)
52+
- 支持数据类型:Strings, Hashes, Lists, Sets, ZSets, Streams
53+
- 操作类型:Put, Delete
54+
55+
## 5. 快照机制
56+
57+
- 使用 RocksDB Checkpoint 创建快照
58+
- 快照恢复点基于 `GetSmallestFlushedLogIndex()`
59+
- 通过 `LogIndexOfColumnFamilies` 追踪各 CF 的日志应用进度
60+
61+
## 6. 配置选项
62+
63+
| 配置项 | 默认值 | 说明 |
64+
|--------|--------|------|
65+
| `raft_enabled` | false | 是否启用 Raft |
66+
| `raft_group_id` | "" | Raft Group ID |
67+
| `raft_election_timeout_ms` | 1000 | 选举超时(ms) |
68+
| `raft_snapshot_interval_s` | 3600 | 快照间隔(s) |
69+
70+
## 7. Review 重点
71+
72+
### 正确性
73+
- **日志幂等性**: `IsApplied()` 检查是否已应用,防止重复应用
74+
- **快照一致性**: 使用最小 flushed_log_index 确定快照点
75+
- **WAL 禁用**: Raft 日志提供持久性保证,RocksDB WAL 被禁用
76+
77+
### 性能
78+
- 同步写入等待 Raft 提交(可优化为批量提交)
79+
- Binlog 使用 Protobuf 序列化
80+
81+
### 线程安全
82+
- `LogIndexOfColumnFamilies` 使用 mutex 保护
83+
- braft 保证 `on_apply()` 顺序调用
84+
85+
## 8. 关键代码位置
86+
87+
| 功能 | 文件 |
88+
|------|------|
89+
| 日志应用 | [praft.cc](../src/praft/src/praft.cc) - `on_apply()` |
90+
| Binlog 处理 | [storage.cc](../src/storage/src/storage.cc) - `OnBinlogWrite()` |
91+
| Log Index 追踪 | [log_index.cc](../src/storage/src/log_index.cc) |
92+
| 快照创建 | [psnapshot.cc](../src/praft/src/psnapshot.cc) |
93+
| Raft 命令 | [pika_raft.cc](../src/pika_raft.cc) |

include/pika_db.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ class DB : public std::enable_shared_from_this<DB>, public pstd::noncopyable {
9494
std::shared_ptr<storage::Storage> storage() const;
9595
void GetBgSaveMetaData(std::vector<std::string>* fileNames, std::string* snapshot_uuid);
9696
void BgSaveDB();
97+
pstd::Status CreateCheckpoint(const std::string& checkpoint_dir);
98+
pstd::Status LoadDBFromCheckpoint(const std::string& checkpoint_dir);
9799
void SetBinlogIoError();
98100
void SetBinlogIoErrorrelieve();
99101
bool IsBinlogIoError();

include/pika_server.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ enum TaskType {
7171
kCompactRangeSets,
7272
kCompactRangeZSets,
7373
kCompactRangeList,
74+
kLoadDBFromCheckpoint,
75+
kCreateCheckpoint,
7476
};
7577

7678
struct TaskArg {

src/pika_db.cc

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,75 @@ void DB::BgSaveDB() {
6767
g_pika_server->BGSaveTaskSchedule(&DoBgSave, static_cast<void*>(bg_task_arg));
6868
}
6969

70+
pstd::Status DB::CreateCheckpoint(const std::string& checkpoint_dir) {
71+
std::string checkpoint_sub_path = checkpoint_dir;
72+
if (!checkpoint_sub_path.empty() && checkpoint_sub_path.back() != '/') {
73+
checkpoint_sub_path.push_back('/');
74+
}
75+
checkpoint_sub_path += db_name_;
76+
77+
if (!pstd::FileExists(checkpoint_sub_path)) {
78+
if (pstd::CreatePath(checkpoint_sub_path, 0755) != 0) {
79+
return Status::IOError("Failed to create checkpoint path", checkpoint_sub_path);
80+
}
81+
}
82+
83+
std::shared_lock guard(dbs_rw_);
84+
auto tasks = storage_->CreateCheckpoint(checkpoint_sub_path);
85+
for (auto& task : tasks) {
86+
auto status = task.get();
87+
if (!status.ok()) {
88+
return Status::Corruption("Create checkpoint failed: " + status.ToString());
89+
}
90+
}
91+
return Status::OK();
92+
}
93+
94+
pstd::Status DB::LoadDBFromCheckpoint(const std::string& checkpoint_dir) {
95+
std::string checkpoint_sub_path = checkpoint_dir;
96+
if (!checkpoint_sub_path.empty() && checkpoint_sub_path.back() != '/') {
97+
checkpoint_sub_path.push_back('/');
98+
}
99+
checkpoint_sub_path += db_name_;
100+
101+
if (!pstd::FileExists(checkpoint_sub_path)) {
102+
return Status::NotFound("Checkpoint dir does not exist: " + checkpoint_sub_path);
103+
}
104+
105+
std::lock_guard<std::shared_mutex> guard(dbs_rw_);
106+
opened_ = false;
107+
108+
auto old_storage = storage_;
109+
storage_.reset();
110+
if (old_storage) {
111+
old_storage->Close();
112+
}
113+
114+
storage_ = std::make_shared<storage::Storage>();
115+
auto checkpoint_tasks = storage_->LoadCheckpoint(checkpoint_sub_path, db_path_);
116+
for (auto& task : checkpoint_tasks) {
117+
auto status = task.get();
118+
if (!status.ok()) {
119+
storage_.reset();
120+
return Status::Corruption("Load checkpoint failed: " + status.ToString());
121+
}
122+
}
123+
124+
storage::StorageOptions storage_options = g_pika_server->storage_options();
125+
auto open_status = storage_->Open(storage_options, db_path_);
126+
if (!open_status.ok()) {
127+
storage_.reset();
128+
return Status::Corruption("Storage open failed: " + open_status.ToString());
129+
}
130+
131+
if (!g_pika_conf->raft_enabled()) {
132+
storage_->DisableWal(false);
133+
}
134+
135+
opened_ = true;
136+
return Status::OK();
137+
}
138+
70139
void DB::SetBinlogIoError() { return binlog_io_error_.store(true); }
71140
void DB::SetBinlogIoErrorrelieve() { return binlog_io_error_.store(false); }
72141
bool DB::IsBinlogIoError() { return binlog_io_error_.load(); }

0 commit comments

Comments
 (0)