33// LICENSE file in the root directory of this source tree. An additional grant
44// of patent rights can be found in the PATENTS file in the same directory.
55
6+ #include < sys/stat.h>
67#include < fstream>
78#include < utility>
89
@@ -300,8 +301,9 @@ bool DB::RunBgsaveEngine() {
300301 LOG (INFO) << db_name_ << " bgsave_info: path=" << info.path << " , filenum=" << info.offset .b_offset .filenum
301302 << " , offset=" << info.offset .b_offset .offset ;
302303
303- // Backup to tmp dir
304- rocksdb::Status s = bgsave_engine_->CreateNewBackup (info.path );
304+ // Use SetBackupContentAndCreate to minimize time window between GetLiveFiles and CreateCheckpoint
305+ // This reduces the chance of compaction occurring and creating orphan files
306+ rocksdb::Status s = bgsave_engine_->SetBackupContentAndCreate (info.path );
305307
306308 if (!s.ok ()) {
307309 LOG (WARNING) << db_name_ << " create new backup failed :" << s.ToString ();
@@ -324,29 +326,61 @@ void DB::FinishBgsave() {
324326}
325327
326328// Prepare engine, need bgsave_protector protect
329+ // Scheme A: Each slave has exclusive dump, so we need unique dump directories
327330bool DB::InitBgsaveEnv () {
328331 std::lock_guard l (bgsave_protector_);
329332 // Prepare for bgsave dir
330333 bgsave_info_.start_time = time (nullptr );
331334 char s_time[32 ];
332335 int len = static_cast <int32_t >(strftime (s_time, sizeof (s_time), " %Y%m%d%H%M%S" , localtime (&bgsave_info_.start_time )));
333336 bgsave_info_.s_start_time .assign (s_time, len);
334- std::string time_sub_path = g_pika_conf->bgsave_prefix () + std::string (s_time, 8 );
335- bgsave_info_.path = g_pika_conf->bgsave_path () + time_sub_path + " /" + bgsave_sub_path_;
336- if (!pstd::DeleteDirIfExist (bgsave_info_.path )) {
337- LOG (WARNING) << db_name_ << " remove exist bgsave dir failed" ;
337+
338+ // Scheme A: Use unique directory name with sequence number
339+ // Format: dump-YYYYMMDD-NN/db_name where NN is sequence number
340+ std::string base_path = g_pika_conf->bgsave_path ();
341+ std::string date_str (s_time, 8 );
342+ std::string prefix = g_pika_conf->bgsave_prefix () + date_str;
343+
344+ // Find first available sequence number
345+ int seq = 0 ;
346+ std::string time_sub_path;
347+ std::string full_path;
348+ do {
349+ time_sub_path = prefix + " -" + std::to_string (seq);
350+ full_path = base_path + time_sub_path + " /" + bgsave_sub_path_;
351+ seq++;
352+ } while (pstd::FileExists (full_path) && seq < 1000 ); // Max 1000 dumps per day
353+
354+ if (seq >= 1000 ) {
355+ LOG (ERROR) << db_name_ << " too many dump directories for today" ;
338356 return false ;
339357 }
340- pstd::CreatePath (bgsave_info_.path , 0755 );
341- // Prepare for failed dir
342- if (!pstd::DeleteDirIfExist (bgsave_info_.path + " _FAILED" )) {
343- LOG (WARNING) << db_name_ << " remove exist fail bgsave dir failed :" ;
358+
359+ bgsave_info_.path = full_path;
360+ LOG (INFO) << db_name_ << " preparing bgsave dir: " << bgsave_info_.path ;
361+
362+ // Note: In Scheme A, we don't delete existing directories
363+ // because other slaves may be using them
364+ // Just create the new path
365+ if (!PikaServer::EnsureDirExists (bgsave_info_.path , 0755 )) {
366+ LOG (WARNING) << db_name_ << " create bgsave dir failed: " << bgsave_info_.path
367+ << " , errno=" << errno << " , error=" << strerror (errno);
368+ // Clear the path on failure to avoid using invalid path in GetDumpMeta
369+ bgsave_info_.path .clear ();
344370 return false ;
345371 }
372+
373+ // Prepare for failed dir
374+ std::string failed_dir = bgsave_info_.path + " _FAILED" ;
375+ if (pstd::FileExists (failed_dir)) {
376+ pstd::DeleteDirIfExist (failed_dir);
377+ }
346378 return true ;
347379}
348380
349381// Prepare bgsave env, need bgsave_protector protect
382+ // Note: SetBackupContent is now done in RunBgsaveEngine using SetBackupContentAndCreate
383+ // to minimize time window between GetLiveFiles and CreateCheckpoint
350384bool DB::InitBgsaveEngine () {
351385 bgsave_engine_.reset ();
352386 rocksdb::Status s = storage::BackupEngine::Open (storage ().get (), bgsave_engine_, g_pika_conf->db_instance_num ());
@@ -371,11 +405,7 @@ bool DB::InitBgsaveEngine() {
371405 std::lock_guard l (bgsave_protector_);
372406 bgsave_info_.offset = bgsave_offset;
373407 }
374- s = bgsave_engine_->SetBackupContent ();
375- if (!s.ok ()) {
376- LOG (WARNING) << db_name_ << " set backup content failed " << s.ToString ();
377- return false ;
378- }
408+ // SetBackupContent is now done in RunBgsaveEngine to minimize time window
379409 }
380410 return true ;
381411}
@@ -390,25 +420,73 @@ void DB::Init() {
390420
391421void DB::GetBgSaveMetaData (std::vector<std::string>* fileNames, std::string* snapshot_uuid) {
392422 const std::string dbPath = bgsave_info ().path ;
423+ size_t total_sst_files = 0 ;
424+ size_t orphan_sst_files = 0 ;
425+
426+ LOG (INFO) << " [GetBgSaveMetaData] Starting scan, dbPath=" << dbPath;
427+
428+ // dbPath is already the specific DB path (e.g., .../dump/dump-9454-20260302/db0)
429+ // We need to scan its subdirectories (0, 1, 2 for rocksdb instances)
430+ std::vector<std::string> subDirs;
431+ int ret = pstd::GetChildren (dbPath, subDirs);
432+ LOG (INFO) << " [GetBgSaveMetaData] GetChildren for dbPath returned " << ret
433+ << " , subDirs count=" << subDirs.size ();
434+ if (ret) {
435+ LOG (WARNING) << " [GetBgSaveMetaData] Failed to read dbPath: " << dbPath;
436+ return ;
437+ }
393438
394- int db_instance_num = g_pika_conf->db_instance_num ();
395- for (int index = 0 ; index < db_instance_num; index++) {
396- std::string instPath = dbPath + ((dbPath.back () != ' /' ) ? " /" : " " ) + std::to_string (index);
397- if (!pstd::FileExists (instPath)) {
398- continue ;
439+ for (const std::string& subDir : subDirs) {
440+ std::string instPath = dbPath + " /" + subDir;
441+ // Skip if not exists or is a file (not directory)
442+ // Note: IsDir returns 0 for directory, 1 for file, -1 for error
443+ if (!pstd::FileExists (instPath) || pstd::IsDir (instPath) != 0 ) {
444+ continue ;
399445 }
400446
401447 std::vector<std::string> tmpFileNames;
402- int ret = pstd::GetChildren (instPath, tmpFileNames);
448+ ret = pstd::GetChildren (instPath, tmpFileNames);
403449 if (ret) {
404- LOG (WARNING) << dbPath << " read dump meta files failed, path " << instPath;
405- return ;
450+ LOG (WARNING) << " [GetBgSaveMetaData] Failed to read instPath: " << instPath;
451+ continue ;
406452 }
407453
408- for (const std::string fileName : tmpFileNames) {
409- fileNames -> push_back (std::to_string (index) + " /" + fileName);
454+ for (const std::string& fileName : tmpFileNames) {
455+ std::string fullPath = instPath + " /" + fileName;
456+ struct stat st;
457+ // Check if file exists and get its stat
458+ if (stat (fullPath.c_str (), &st) != 0 ) {
459+ // File doesn't exist, skip it
460+ LOG (WARNING) << " [GetBgSaveMetaData] File does not exist: " << fullPath;
461+ continue ;
462+ }
463+
464+ // Check if it's an SST file and if it's an orphan (Links=1)
465+ if (fileName.size () > 4 && fileName.substr (fileName.size () - 4 ) == " .sst" ) {
466+ total_sst_files++;
467+ if (st.st_nlink == 1 ) {
468+ // This is an orphan file, but we need to include it in the meta
469+ // to ensure data consistency. The file will be cleaned up after
470+ // a delay to allow for retries.
471+ orphan_sst_files++;
472+ LOG (INFO) << " [GetBgSaveMetaData] Including orphan SST file: " << fullPath
473+ << " , size=" << st.st_size ;
474+ // NOTE: We no longer skip orphan files here. They will be included
475+ // in the file list and cleaned up with a delay after transfer.
476+ }
477+ }
478+ // Construct relative path like "0/xxx.sst" or "1/xxx.sst"
479+ fileNames->push_back (subDir + " /" + fileName);
410480 }
411481 }
482+
483+ if (orphan_sst_files > 0 ) {
484+ LOG (INFO) << " [GetBgSaveMetaData] Summary for " << dbPath
485+ << " : total_sst=" << total_sst_files
486+ << " , orphan_included=" << orphan_sst_files
487+ << " , returned=" << fileNames->size ();
488+ }
489+
412490 fileNames->push_back (kBgsaveInfoFile );
413491 pstd::Status s = GetBgSaveUUID (snapshot_uuid);
414492 if (!s.ok ()) {
0 commit comments