From 22c41641c35db73e85772a4163a37fde2a33895a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 09:17:17 +0000 Subject: [PATCH 01/13] Initial plan From e7bc3fe06c9327d4c51091563a2c329f2929ecfc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 09:27:21 +0000 Subject: [PATCH 02/13] Add test_add_node_with_corrupted_ledger to reproduce issue #6612 Co-authored-by: achamayou <4016369+achamayou@users.noreply.github.com> --- tests/reconfiguration.py | 75 +++++++++++++++++++++++++++++++++++++++ tests/suite/test_suite.py | 1 + 2 files changed, 76 insertions(+) diff --git a/tests/reconfiguration.py b/tests/reconfiguration.py index 0768241008bf..18455d3beb26 100644 --- a/tests/reconfiguration.py +++ b/tests/reconfiguration.py @@ -148,6 +148,81 @@ def test_add_node(network, args, from_snapshot=True): return network +@reqs.description("Adding a node with corrupted ledger file") +def test_add_node_with_corrupted_ledger(network, args): + # Reproduce issue #6612: a node joining with a corrupted (truncated) ledger + # file should fail to start rather than crash unexpectedly. + new_node = network.create_node() + + # Set up the join node (copies ledger, snapshots, etc.) but do not start it yet + network.setup_join_node( + new_node, + args.package, + args, + from_snapshot=True, + fetch_recent_snapshot=True, + ) + + # Find the latest uncommitted ledger file in the node's working directory + ledger_dir = new_node.remote.get_main_ledger_dir() + ledger_files = sorted( + [ + f + for f in os.listdir(ledger_dir) + if f.startswith("ledger_") and not f.endswith(".committed") + ] + ) + + if not ledger_files: + LOG.warning("No uncommitted ledger files found, skipping corruption test") + new_node.stop() + network.nodes.remove(new_node) + return network + + # Corrupt the latest uncommitted ledger file by truncating it in the middle + # of a transaction, so the transaction size does not match the number of + # bytes available left to read in the file (as described in issue #6612) + ledger = ccf.ledger.Ledger([ledger_dir], committed_only=False) + chunk_filename = None + truncate_offset = None + for chunk in ledger: + for tx in chunk: + offset, next_offset = tx.get_offsets() + chunk_filename = chunk.filename() + truncate_offset = offset + (next_offset - offset) // 2 + + if truncate_offset is None: + LOG.warning("Could not find a transaction to corrupt, skipping") + new_node.stop() + network.nodes.remove(new_node) + return network + + LOG.info( + f"Corrupting ledger file {chunk_filename} by truncating at offset {truncate_offset}" + ) + with open(chunk_filename, "r+", encoding="utf-8") as f: + f.truncate(truncate_offset) + + # Attempt to start the node - it should fail due to the corrupted ledger + try: + network.run_join_node(new_node, timeout=3) + except (RuntimeError, TimeoutError) as e: + LOG.info( + f"Node {new_node.local_node_id} with corrupted ledger failed to start, as expected: {e}" + ) + # Cleanup: run_join_node may have already stopped and removed the node + # on TimeoutError, but not on RuntimeError + new_node.stop() + if new_node in network.nodes: + network.nodes.remove(new_node) + else: + assert ( + False + ), f"Node {new_node.local_node_id} with corrupted ledger unexpectedly started" + + return network + + @reqs.description("Test ignore_first_sigterm") def test_ignore_first_sigterm(network, args): # Note: host is supplied explicitly to avoid having differently diff --git a/tests/suite/test_suite.py b/tests/suite/test_suite.py index a2fddd4839eb..e540e6830ca9 100644 --- a/tests/suite/test_suite.py +++ b/tests/suite/test_suite.py @@ -61,6 +61,7 @@ reconfiguration.test_retire_primary, e2e_logging.test_rekey, reconfiguration.test_add_node, + reconfiguration.test_add_node_with_corrupted_ledger, nodes.test_kill_primary, nodes.test_commit_view_history, reconfiguration.test_add_node, From 960f87f15af7df452e1a01b776eb130c34eb07cf Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 09:35:40 +0000 Subject: [PATCH 03/13] Replace skip-on-missing with assertions for ledger corruption preconditions Co-authored-by: achamayou <4016369+achamayou@users.noreply.github.com> --- tests/reconfiguration.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/tests/reconfiguration.py b/tests/reconfiguration.py index 18455d3beb26..2e91fad2e41e 100644 --- a/tests/reconfiguration.py +++ b/tests/reconfiguration.py @@ -173,11 +173,7 @@ def test_add_node_with_corrupted_ledger(network, args): ] ) - if not ledger_files: - LOG.warning("No uncommitted ledger files found, skipping corruption test") - new_node.stop() - network.nodes.remove(new_node) - return network + assert ledger_files, "Expected uncommitted ledger files to corrupt" # Corrupt the latest uncommitted ledger file by truncating it in the middle # of a transaction, so the transaction size does not match the number of @@ -191,11 +187,7 @@ def test_add_node_with_corrupted_ledger(network, args): chunk_filename = chunk.filename() truncate_offset = offset + (next_offset - offset) // 2 - if truncate_offset is None: - LOG.warning("Could not find a transaction to corrupt, skipping") - new_node.stop() - network.nodes.remove(new_node) - return network + assert truncate_offset is not None, "Should always find a transaction to corrupt" LOG.info( f"Corrupting ledger file {chunk_filename} by truncating at offset {truncate_offset}" From 9b5c9806c544b686dada38de6a42573e74704ec1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 17 Feb 2026 14:56:26 +0000 Subject: [PATCH 04/13] Add test_add_node_with_corrupted_ledger to run_all in reconfiguration.py Co-authored-by: eddyashton <6000239+eddyashton@users.noreply.github.com> --- tests/reconfiguration.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/reconfiguration.py b/tests/reconfiguration.py index 2e91fad2e41e..880460dc69cd 100644 --- a/tests/reconfiguration.py +++ b/tests/reconfiguration.py @@ -856,6 +856,7 @@ def run_all(args): test_add_node_invalid_service_cert(network, args) test_add_node(network, args, from_snapshot=False) test_add_node_with_read_only_ledger(network, args) + test_add_node_with_corrupted_ledger(network, args) test_join_straddling_primary_replacement(network, args) test_node_replacement(network, args) test_add_node_from_backup(network, args) From 6b2822392cd065e459870ca2fdc933d4ca954d68 Mon Sep 17 00:00:00 2001 From: Amaury Chamayou Date: Wed, 3 Jun 2026 09:29:40 +0100 Subject: [PATCH 05/13] Apply suggestion from @achamayou --- tests/reconfiguration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/reconfiguration.py b/tests/reconfiguration.py index 70b5567c2608..c38e84df9855 100644 --- a/tests/reconfiguration.py +++ b/tests/reconfiguration.py @@ -166,7 +166,7 @@ def test_add_node_with_corrupted_ledger(network, args): new_node, args.package, args, - from_snapshot=True, + from_snapshot=False, fetch_recent_snapshot=True, ) From 07848859daa8d322682d66d1b6fb37c2b6abd8b8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 4 Jun 2026 12:45:23 +0000 Subject: [PATCH 06/13] Fix corrupted ledger reconfiguration test --- tests/reconfiguration.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/reconfiguration.py b/tests/reconfiguration.py index 141b778e32f4..cfb3584a5e1e 100644 --- a/tests/reconfiguration.py +++ b/tests/reconfiguration.py @@ -168,7 +168,9 @@ def test_add_node_with_corrupted_ledger(network, args): args.package, args, from_snapshot=False, - fetch_recent_snapshot=True, + # This regression test must replay the copied ledger, rather than + # fetching a newer snapshot which could bypass the corrupted chunk. + fetch_recent_snapshot=False, ) # Find the latest uncommitted ledger file in the node's working directory @@ -200,7 +202,7 @@ def test_add_node_with_corrupted_ledger(network, args): LOG.info( f"Corrupting ledger file {chunk_filename} by truncating at offset {truncate_offset}" ) - with open(chunk_filename, "r+", encoding="utf-8") as f: + with open(chunk_filename, "rb+") as f: f.truncate(truncate_offset) # Attempt to start the node - it should fail due to the corrupted ledger From dc0dbb63172e567d07fa6d4c7b527372be64caa8 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:17:48 +0100 Subject: [PATCH 07/13] Align corrupted-ledger join regression with snapshot-start behavior (#7925) Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> --- tests/reconfiguration.py | 99 +++++++++++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 27 deletions(-) diff --git a/tests/reconfiguration.py b/tests/reconfiguration.py index cfb3584a5e1e..c16f6107de87 100644 --- a/tests/reconfiguration.py +++ b/tests/reconfiguration.py @@ -158,44 +158,78 @@ def test_add_node(network, args, copy_snapshot=False, fetch_recent_snapshot=True @reqs.description("Adding a node with corrupted ledger file") def test_add_node_with_corrupted_ledger(network, args): - # Reproduce issue #6612: a node joining with a corrupted (truncated) ledger - # file should fail to start rather than crash unexpectedly. + # Reproduce issue #6612: when joining from a recent snapshot, an older + # corrupted/truncated uncommitted ledger file should not prevent startup. + primary, _ = network.find_primary() + snapshot_trigger_txid = primary.trigger_snapshot() + snapshots_dir = network.get_committed_snapshots( + primary, + target_seqno=snapshot_trigger_txid.seqno, + wait_for_target_seqno=True, + ) + new_node = network.create_node() - # Set up the join node (copies ledger, snapshots, etc.) but do not start it yet + # Set up the join node (copies ledger and snapshots) but do not start it yet network.setup_join_node( new_node, args.package, args, - from_snapshot=False, - # This regression test must replay the copied ledger, rather than - # fetching a newer snapshot which could bypass the corrupted chunk. + from_snapshot=True, + snapshots_dir=snapshots_dir, fetch_recent_snapshot=False, ) - # Find the latest uncommitted ledger file in the node's working directory + # Find an uncommitted ledger file in the node's main ledger directory ledger_dir = new_node.remote.get_main_ledger_dir() ledger_files = sorted( [ f for f in os.listdir(ledger_dir) if f.startswith("ledger_") and not f.endswith(".committed") - ] + ], + key=lambda f: ccf.ledger.range_from_filename(f)[0], ) + assert ( + ledger_files + ), "Expected to find uncommitted ledger files for corruption test" + ledger_ranges = { + ledger_file: ccf.ledger.range_from_filename(ledger_file) + for ledger_file in ledger_files + } - assert ledger_files, "Expected uncommitted ledger files to corrupt" + # Prefer a chunk whose range is older than the snapshot we are joining from. + corrupted_ledger_file = next( + ( + f + for f in ledger_files + if ( + ledger_ranges[f][1] is not None + and ledger_ranges[f][1] < snapshot_trigger_txid.seqno + ) + ), + ledger_files[-1], + ) - # Corrupt the latest uncommitted ledger file by truncating it in the middle - # of a transaction, so the transaction size does not match the number of - # bytes available left to read in the file (as described in issue #6612) + # Corrupt the chosen uncommitted ledger file by truncating it in the middle + # of a transaction. ledger = ccf.ledger.Ledger([ledger_dir], committed_only=False) chunk_filename = None truncate_offset = None for chunk in ledger: + if os.path.basename(chunk.filename()) != corrupted_ledger_file: + continue + for tx in chunk: offset, next_offset = tx.get_offsets() chunk_filename = chunk.filename() truncate_offset = offset + (next_offset - offset) // 2 + # Corrupting a single transaction in the selected chunk is + # sufficient to make the file malformed. + break + + if truncate_offset is not None: + break assert truncate_offset is not None, "Should always find a transaction to corrupt" @@ -205,23 +239,34 @@ def test_add_node_with_corrupted_ledger(network, args): with open(chunk_filename, "rb+") as f: f.truncate(truncate_offset) - # Attempt to start the node - it should fail due to the corrupted ledger - try: - network.run_join_node(new_node, timeout=3) - except (RuntimeError, TimeoutError) as e: - LOG.info( - f"Node {new_node.local_node_id} with corrupted ledger failed to start, as expected: {e}" + network.run_join_node(new_node) + network.trust_node(new_node, args) + + with new_node.client() as c: + r = c.get("/node/state") + assert r.body.json()["startup_seqno"] != 0, ( + f"Node {new_node.local_node_id} should have started from snapshot" ) - # Cleanup: run_join_node may have already stopped and removed the node - # on TimeoutError, but not on RuntimeError - new_node.stop() - if new_node in network.nodes: - network.nodes.remove(new_node) - else: - assert ( - False - ), f"Node {new_node.local_node_id} with corrupted ledger unexpectedly started" + out_path, err_path = new_node.get_logs() + if out_path is not None and err_path is not None: + with open(out_path, encoding="utf-8") as out: + out_logs = out.read() + with open(err_path, encoding="utf-8") as err: + err_logs = err.read() + # Depending on where recovery skips/truncates the stale chunk, this + # malformed-ledger line may or may not be emitted. + combined_logs = (out_logs + err_logs).lower() + if "malformed" in combined_logs and "ledger file" in combined_logs: + LOG.info("Observed malformed ledger handling while joining from snapshot") + else: + LOG.info( + "Did not observe malformed ledger log line; join success remains the test invariant" + ) + + primary, _ = network.find_primary() + network.retire_node(primary, new_node) + new_node.stop() return network From ce68a85db0e52d9c5477ebe603fb5003e8f52302 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 9 Jun 2026 10:46:04 +0000 Subject: [PATCH 08/13] Require corrupted-ledger txid log assertion --- src/host/ledger.h | 61 ++++++++++++++++++++++++++++++++++------ tests/reconfiguration.py | 53 ++++++++++++++++++++++------------ 2 files changed, 88 insertions(+), 26 deletions(-) diff --git a/src/host/ledger.h b/src/host/ledger.h index 73381117e154..c9268b57a8f5 100644 --- a/src/host/ledger.h +++ b/src/host/ledger.h @@ -2,8 +2,10 @@ // Licensed under the Apache 2.0 License. #pragma once +#include "ccf/crypto/symmetric_key.h" #include "ccf/ds/nonstd.h" #include "ccf/pal/locking.h" +#include "ccf/tx_id.h" #include "consensus/ledger_enclave_types.h" #include "ds/files.h" #include "ds/internal_logger.h" @@ -81,6 +83,36 @@ namespace asynchost using positions_offset_header_t = size_t; static constexpr auto file_name_prefix = "ledger"; + static std::optional get_entry_tx_id(FILE* file, size_t size) + { + if (size < ccf::crypto::StandardGcmHeader::serialised_size()) + { + return std::nullopt; + } + + auto header_data = + std::vector(ccf::crypto::StandardGcmHeader::serialised_size()); + if (fread(header_data.data(), header_data.size(), 1, file) != 1) + { + return std::nullopt; + } + + const uint8_t* data = header_data.data(); + size_t data_size = header_data.size(); + ccf::crypto::StandardGcmHeader gcm_header; + gcm_header.deserialise(data, data_size); + + auto iv_data = gcm_header.get_iv().data(); + auto iv_size = gcm_header.get_iv().size(); + + auto tx_id = ccf::TxID{}; + tx_id.seqno = serialized::read(iv_data, iv_size); + tx_id.view = + serialized::read(iv_data, iv_size) & 0x7FFFFFFF; + + return tx_id; + } + const fs::path dir; fs::path file_name; @@ -283,14 +315,27 @@ namespace asynchost const auto& entry_size = entry_header.size; if (len < entry_size) { - LOG_FAIL_FMT( - "Malformed incomplete ledger file {} at seqno {} (expecting " - "entry of size " - "{}, remaining {})", - file_path, - current_idx, - entry_size, - len); + const auto tx_id = get_entry_tx_id(file, len); + if (tx_id.has_value()) + { + LOG_FAIL_FMT( + "Malformed incomplete ledger file {} at txid {} (expecting " + "entry of size {}, remaining {})", + file_path, + tx_id->to_str(), + entry_size, + len); + } + else + { + LOG_FAIL_FMT( + "Malformed incomplete ledger file {} at seqno {} (expecting " + "entry of size {}, remaining {})", + file_path, + current_idx, + entry_size, + len); + } return; } diff --git a/tests/reconfiguration.py b/tests/reconfiguration.py index c16f6107de87..184df5f27b24 100644 --- a/tests/reconfiguration.py +++ b/tests/reconfiguration.py @@ -215,23 +215,36 @@ def test_add_node_with_corrupted_ledger(network, args): # of a transaction. ledger = ccf.ledger.Ledger([ledger_dir], committed_only=False) chunk_filename = None + corrupted_txid = None truncate_offset = None + minimum_truncated_tx_size = ( + ccf.ledger.TransactionHeader.get_size() + ccf.ledger.GcmHeader.size() + 1 + ) for chunk in ledger: if os.path.basename(chunk.filename()) != corrupted_ledger_file: continue for tx in chunk: - offset, next_offset = tx.get_offsets() + offset, tx_len = tx.get_offsets() + if tx_len <= minimum_truncated_tx_size: + continue + chunk_filename = chunk.filename() - truncate_offset = offset + (next_offset - offset) // 2 - # Corrupting a single transaction in the selected chunk is - # sufficient to make the file malformed. + corrupted_txid = tx.get_txid() + truncate_offset = offset + max( + tx_len // 2, + minimum_truncated_tx_size, + ) + # Corrupting a single transaction in the selected chunk after the + # GCM header is sufficient to make the file malformed while keeping + # the txid observable in the node logs. break if truncate_offset is not None: break assert truncate_offset is not None, "Should always find a transaction to corrupt" + assert corrupted_txid is not None LOG.info( f"Corrupting ledger file {chunk_filename} by truncating at offset {truncate_offset}" @@ -249,20 +262,24 @@ def test_add_node_with_corrupted_ledger(network, args): ) out_path, err_path = new_node.get_logs() - if out_path is not None and err_path is not None: - with open(out_path, encoding="utf-8") as out: - out_logs = out.read() - with open(err_path, encoding="utf-8") as err: - err_logs = err.read() - # Depending on where recovery skips/truncates the stale chunk, this - # malformed-ledger line may or may not be emitted. - combined_logs = (out_logs + err_logs).lower() - if "malformed" in combined_logs and "ledger file" in combined_logs: - LOG.info("Observed malformed ledger handling while joining from snapshot") - else: - LOG.info( - "Did not observe malformed ledger log line; join success remains the test invariant" - ) + assert out_path is not None and err_path is not None + with open(out_path, encoding="utf-8") as out: + out_logs = out.read() + with open(err_path, encoding="utf-8") as err: + err_logs = err.read() + + combined_logs = out_logs + err_logs + matching_lines = [ + line + for line in combined_logs.splitlines() + if "Malformed incomplete ledger file" in line + and os.path.basename(chunk_filename) in line + and f"at txid {corrupted_txid}" in line + ] + assert ( + matching_lines + ), f"Expected malformed ledger log line for txid {corrupted_txid}\n{combined_logs}" + LOG.info(f"Observed malformed ledger handling: {matching_lines[0]}") primary, _ = network.find_primary() network.retire_node(primary, new_node) From a03d7e3e4a88b87c6e155723d6549ad24bb2bf34 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 9 Jun 2026 10:47:41 +0000 Subject: [PATCH 09/13] Polish corrupted-ledger txid checks --- src/host/ledger.h | 11 +++++++++++ tests/reconfiguration.py | 7 ++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/host/ledger.h b/src/host/ledger.h index c9268b57a8f5..a9591357056c 100644 --- a/src/host/ledger.h +++ b/src/host/ledger.h @@ -90,10 +90,17 @@ namespace asynchost return std::nullopt; } + const auto current_pos = ftello(file); + if (current_pos == -1) + { + return std::nullopt; + } + auto header_data = std::vector(ccf::crypto::StandardGcmHeader::serialised_size()); if (fread(header_data.data(), header_data.size(), 1, file) != 1) { + std::ignore = fseeko(file, current_pos, SEEK_SET); return std::nullopt; } @@ -107,9 +114,13 @@ namespace asynchost auto tx_id = ccf::TxID{}; tx_id.seqno = serialized::read(iv_data, iv_size); + // The top bit of the IV's 32-bit term field is reserved to indicate + // snapshots, so mask it off when reconstructing the ledger txid view. tx_id.view = serialized::read(iv_data, iv_size) & 0x7FFFFFFF; + std::ignore = fseeko(file, current_pos, SEEK_SET); + return tx_id; } diff --git a/tests/reconfiguration.py b/tests/reconfiguration.py index 184df5f27b24..dd02c58353d7 100644 --- a/tests/reconfiguration.py +++ b/tests/reconfiguration.py @@ -225,14 +225,15 @@ def test_add_node_with_corrupted_ledger(network, args): continue for tx in chunk: - offset, tx_len = tx.get_offsets() - if tx_len <= minimum_truncated_tx_size: + offset, _ = tx.get_offsets() + tx_size = tx.get_len() + if tx_size <= minimum_truncated_tx_size: continue chunk_filename = chunk.filename() corrupted_txid = tx.get_txid() truncate_offset = offset + max( - tx_len // 2, + tx_size // 2, minimum_truncated_tx_size, ) # Corrupting a single transaction in the selected chunk after the From 397c51022fbbbc83389ecda92c0d9cf2c862d26e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 9 Jun 2026 10:48:49 +0000 Subject: [PATCH 10/13] Rename targeted corrupted txid helper var --- tests/reconfiguration.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/reconfiguration.py b/tests/reconfiguration.py index dd02c58353d7..3a3a93d39246 100644 --- a/tests/reconfiguration.py +++ b/tests/reconfiguration.py @@ -215,7 +215,7 @@ def test_add_node_with_corrupted_ledger(network, args): # of a transaction. ledger = ccf.ledger.Ledger([ledger_dir], committed_only=False) chunk_filename = None - corrupted_txid = None + target_txid = None truncate_offset = None minimum_truncated_tx_size = ( ccf.ledger.TransactionHeader.get_size() + ccf.ledger.GcmHeader.size() + 1 @@ -231,7 +231,7 @@ def test_add_node_with_corrupted_ledger(network, args): continue chunk_filename = chunk.filename() - corrupted_txid = tx.get_txid() + target_txid = tx.get_txid() truncate_offset = offset + max( tx_size // 2, minimum_truncated_tx_size, @@ -245,7 +245,7 @@ def test_add_node_with_corrupted_ledger(network, args): break assert truncate_offset is not None, "Should always find a transaction to corrupt" - assert corrupted_txid is not None + assert target_txid is not None LOG.info( f"Corrupting ledger file {chunk_filename} by truncating at offset {truncate_offset}" @@ -275,11 +275,11 @@ def test_add_node_with_corrupted_ledger(network, args): for line in combined_logs.splitlines() if "Malformed incomplete ledger file" in line and os.path.basename(chunk_filename) in line - and f"at txid {corrupted_txid}" in line + and f"at txid {target_txid}" in line ] assert ( matching_lines - ), f"Expected malformed ledger log line for txid {corrupted_txid}\n{combined_logs}" + ), f"Expected malformed ledger log line for txid {target_txid}\n{combined_logs}" LOG.info(f"Observed malformed ledger handling: {matching_lines[0]}") primary, _ = network.find_primary() From 9e30cebd87a54b9703a784ecfed37b4ecaabbb8e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 9 Jun 2026 20:03:18 +0000 Subject: [PATCH 11/13] Fix formatting in reconfiguration repro test --- tests/reconfiguration.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/reconfiguration.py b/tests/reconfiguration.py index 3a3a93d39246..ce30deca26d9 100644 --- a/tests/reconfiguration.py +++ b/tests/reconfiguration.py @@ -190,9 +190,7 @@ def test_add_node_with_corrupted_ledger(network, args): ], key=lambda f: ccf.ledger.range_from_filename(f)[0], ) - assert ( - ledger_files - ), "Expected to find uncommitted ledger files for corruption test" + assert ledger_files, "Expected to find uncommitted ledger files for corruption test" ledger_ranges = { ledger_file: ccf.ledger.range_from_filename(ledger_file) for ledger_file in ledger_files @@ -258,9 +256,9 @@ def test_add_node_with_corrupted_ledger(network, args): with new_node.client() as c: r = c.get("/node/state") - assert r.body.json()["startup_seqno"] != 0, ( - f"Node {new_node.local_node_id} should have started from snapshot" - ) + assert ( + r.body.json()["startup_seqno"] != 0 + ), f"Node {new_node.local_node_id} should have started from snapshot" out_path, err_path = new_node.get_logs() assert out_path is not None and err_path is not None From 3b0971d2499f482659aebbd1d4ddf975ae70c51a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 10 Jun 2026 12:14:17 +0000 Subject: [PATCH 12/13] Simplify malformed ledger seqno logging --- src/host/ledger.h | 71 ++++------------------------------------ tests/reconfiguration.py | 15 ++++----- 2 files changed, 14 insertions(+), 72 deletions(-) diff --git a/src/host/ledger.h b/src/host/ledger.h index 4a47a64d35d6..562d1c1af513 100644 --- a/src/host/ledger.h +++ b/src/host/ledger.h @@ -5,7 +5,6 @@ #include "ccf/crypto/symmetric_key.h" #include "ccf/ds/nonstd.h" #include "ccf/pal/locking.h" -#include "ccf/tx_id.h" #include "consensus/ledger_enclave_types.h" #include "ds/files.h" #include "ds/internal_logger.h" @@ -82,47 +81,6 @@ namespace asynchost using positions_offset_header_t = size_t; static constexpr auto file_name_prefix = "ledger"; - static std::optional get_entry_tx_id(FILE* file, size_t size) - { - if (size < ccf::crypto::StandardGcmHeader::serialised_size()) - { - return std::nullopt; - } - - const auto current_pos = ftello(file); - if (current_pos == -1) - { - return std::nullopt; - } - - auto header_data = - std::vector(ccf::crypto::StandardGcmHeader::serialised_size()); - if (fread(header_data.data(), header_data.size(), 1, file) != 1) - { - std::ignore = fseeko(file, current_pos, SEEK_SET); - return std::nullopt; - } - - const uint8_t* data = header_data.data(); - size_t data_size = header_data.size(); - ccf::crypto::StandardGcmHeader gcm_header; - gcm_header.deserialise(data, data_size); - - auto iv_data = gcm_header.get_iv().data(); - auto iv_size = gcm_header.get_iv().size(); - - auto tx_id = ccf::TxID{}; - tx_id.seqno = serialized::read(iv_data, iv_size); - // The top bit of the IV's 32-bit term field is reserved to indicate - // snapshots, so mask it off when reconstructing the ledger txid view. - tx_id.view = - serialized::read(iv_data, iv_size) & 0x7FFFFFFF; - - std::ignore = fseeko(file, current_pos, SEEK_SET); - - return tx_id; - } - const fs::path dir; fs::path file_name; @@ -324,28 +282,13 @@ namespace asynchost const auto& entry_size = entry_header.size; if (len < entry_size) { - const auto tx_id = get_entry_tx_id(file, len); - if (tx_id.has_value()) - { - LOG_FAIL_FMT( - "Malformed incomplete ledger file {} at txid {} (expecting " - "entry of size {}, remaining {})", - file_path, - tx_id->to_str(), - entry_size, - len); - } - else - { - LOG_FAIL_FMT( - "Malformed incomplete ledger file {} at seqno {} (expecting " - "entry of size {}, remaining {})", - file_path, - current_idx, - entry_size, - len); - } - + LOG_FAIL_FMT( + "Malformed incomplete ledger file {} at seqno {} (expecting " + "entry of size {}, remaining {})", + file_path, + current_idx, + entry_size, + len); return; } diff --git a/tests/reconfiguration.py b/tests/reconfiguration.py index ce30deca26d9..2b9c4230b38c 100644 --- a/tests/reconfiguration.py +++ b/tests/reconfiguration.py @@ -213,7 +213,7 @@ def test_add_node_with_corrupted_ledger(network, args): # of a transaction. ledger = ccf.ledger.Ledger([ledger_dir], committed_only=False) chunk_filename = None - target_txid = None + target_seqno = None truncate_offset = None minimum_truncated_tx_size = ( ccf.ledger.TransactionHeader.get_size() + ccf.ledger.GcmHeader.size() + 1 @@ -229,21 +229,20 @@ def test_add_node_with_corrupted_ledger(network, args): continue chunk_filename = chunk.filename() - target_txid = tx.get_txid() + target_seqno = tx.get_txid().seqno truncate_offset = offset + max( tx_size // 2, minimum_truncated_tx_size, ) - # Corrupting a single transaction in the selected chunk after the - # GCM header is sufficient to make the file malformed while keeping - # the txid observable in the node logs. + # Corrupting a single transaction in the selected chunk is + # sufficient to make the file malformed at this seqno. break if truncate_offset is not None: break assert truncate_offset is not None, "Should always find a transaction to corrupt" - assert target_txid is not None + assert target_seqno is not None LOG.info( f"Corrupting ledger file {chunk_filename} by truncating at offset {truncate_offset}" @@ -273,11 +272,11 @@ def test_add_node_with_corrupted_ledger(network, args): for line in combined_logs.splitlines() if "Malformed incomplete ledger file" in line and os.path.basename(chunk_filename) in line - and f"at txid {target_txid}" in line + and f"at seqno {target_seqno}" in line ] assert ( matching_lines - ), f"Expected malformed ledger log line for txid {target_txid}\n{combined_logs}" + ), f"Expected malformed ledger log line for seqno {target_seqno}\n{combined_logs}" LOG.info(f"Observed malformed ledger handling: {matching_lines[0]}") primary, _ = network.find_primary() From 542a1d1ee3612f3a024199748e1e5ba2cf93c354 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 10 Jun 2026 20:18:53 +0000 Subject: [PATCH 13/13] Fix get_txid() to return TxID object instead of string Co-authored-by: achamayou <4016369+achamayou@users.noreply.github.com> --- python/src/ccf/ledger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/src/ccf/ledger.py b/python/src/ccf/ledger.py index a8b162afd4e5..21e488a10fdd 100644 --- a/python/src/ccf/ledger.py +++ b/python/src/ccf/ledger.py @@ -905,9 +905,9 @@ def _read_header(self): return entry_start_pos - def get_txid(self) -> str: + def get_txid(self) -> TxID: assert self.gcm_header is not None - return f"{self.gcm_header.view}.{self.gcm_header.seqno}" + return TxID(self.gcm_header.view, self.gcm_header.seqno) def get_public_domain(self) -> PublicDomain: """