Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
22c4164
Initial plan
Copilot Feb 13, 2026
e7bc3fe
Add test_add_node_with_corrupted_ledger to reproduce issue #6612
Copilot Feb 13, 2026
960f87f
Replace skip-on-missing with assertions for ledger corruption precond…
Copilot Feb 13, 2026
569aa40
Merge branch 'main' into copilot/add-testcase-for-issue-6612
eddyashton Feb 17, 2026
9b5c980
Add test_add_node_with_corrupted_ledger to run_all in reconfiguration.py
Copilot Feb 17, 2026
2703d8c
Merge branch 'main' into copilot/add-testcase-for-issue-6612
eddyashton Feb 18, 2026
3314d02
Merge branch 'main' into copilot/add-testcase-for-issue-6612
achamayou Mar 6, 2026
a262e69
Merge branch 'main' into copilot/add-testcase-for-issue-6612
achamayou Apr 29, 2026
703c70b
Merge branch 'main' into copilot/add-testcase-for-issue-6612
achamayou Jun 2, 2026
6b28223
Apply suggestion from @achamayou
achamayou Jun 3, 2026
c1fb5f8
Merge branch 'main' into copilot/add-testcase-for-issue-6612
achamayou Jun 4, 2026
0784885
Fix corrupted ledger reconfiguration test
Copilot Jun 4, 2026
b53354d
Merge branch 'main' into copilot/add-testcase-for-issue-6612
achamayou Jun 4, 2026
3f1a8c1
Merge branch 'main' into copilot/add-testcase-for-issue-6612
achamayou Jun 5, 2026
dc0dbb6
Align corrupted-ledger join regression with snapshot-start behavior (…
Copilot Jun 5, 2026
12823f8
Merge branch 'main' into copilot/add-testcase-for-issue-6612
achamayou Jun 5, 2026
ce68a85
Require corrupted-ledger txid log assertion
Copilot Jun 9, 2026
a03d7e3
Polish corrupted-ledger txid checks
Copilot Jun 9, 2026
397c510
Rename targeted corrupted txid helper var
Copilot Jun 9, 2026
c300769
Merge branch 'main' into copilot/add-testcase-for-issue-6612
achamayou Jun 9, 2026
7939c6a
Merge branch 'main' into copilot/add-testcase-for-issue-6612
achamayou Jun 9, 2026
9e30ceb
Fix formatting in reconfiguration repro test
Copilot Jun 9, 2026
3b0971d
Simplify malformed ledger seqno logging
Copilot Jun 10, 2026
57d108c
Merge branch 'main' into copilot/add-testcase-for-issue-6612
achamayou Jun 10, 2026
542a1d1
Fix get_txid() to return TxID object instead of string
Copilot Jun 10, 2026
5cca7c9
Merge branch 'main' into copilot/add-testcase-for-issue-6612
achamayou Jun 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions python/src/ccf/ledger.py
Original file line number Diff line number Diff line change
Expand Up @@ -905,9 +905,9 @@ def _read_header(self):

return entry_start_pos

def get_txid(self) -> str:
def get_txid(self) -> TxID:
assert self.gcm_header is not None
return f"{self.gcm_header.view}.{self.gcm_header.seqno}"
return TxID(self.gcm_header.view, self.gcm_header.seqno)

def get_public_domain(self) -> PublicDomain:
"""
Expand Down
5 changes: 2 additions & 3 deletions src/host/ledger.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Licensed under the Apache 2.0 License.
#pragma once

#include "ccf/crypto/symmetric_key.h"
#include "ccf/ds/nonstd.h"
#include "ccf/pal/locking.h"
#include "consensus/ledger_enclave_types.h"
Expand Down Expand Up @@ -283,13 +284,11 @@ namespace asynchost
{
LOG_FAIL_FMT(
"Malformed incomplete ledger file {} at seqno {} (expecting "
"entry of size "
"{}, remaining {})",
"entry of size {}, remaining {})",
file_path,
current_idx,
entry_size,
len);

return;
}

Expand Down
130 changes: 130 additions & 0 deletions tests/reconfiguration.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,135 @@ def test_add_node(network, args, copy_snapshot=False, fetch_recent_snapshot=True
return network


@reqs.description("Adding a node with corrupted ledger file")
def test_add_node_with_corrupted_ledger(network, args):
Comment thread
eddyashton marked this conversation as resolved.
# Reproduce issue #6612: when joining from a recent snapshot, an older
# corrupted/truncated uncommitted ledger file should not prevent startup.
primary, _ = network.find_primary()
snapshot_trigger_txid = primary.trigger_snapshot()
snapshots_dir = network.get_committed_snapshots(
primary,
target_seqno=snapshot_trigger_txid.seqno,
wait_for_target_seqno=True,
)

new_node = network.create_node()

# Set up the join node (copies ledger and snapshots) but do not start it yet
network.setup_join_node(
new_node,
args.package,
args,
from_snapshot=True,
snapshots_dir=snapshots_dir,
fetch_recent_snapshot=False,
)

# Find an uncommitted ledger file in the node's main ledger directory
ledger_dir = new_node.remote.get_main_ledger_dir()
ledger_files = sorted(
[
f
for f in os.listdir(ledger_dir)
if f.startswith("ledger_") and not f.endswith(".committed")
],
key=lambda f: ccf.ledger.range_from_filename(f)[0],
)
assert ledger_files, "Expected to find uncommitted ledger files for corruption test"
ledger_ranges = {
ledger_file: ccf.ledger.range_from_filename(ledger_file)
for ledger_file in ledger_files
}

# Prefer a chunk whose range is older than the snapshot we are joining from.
corrupted_ledger_file = next(
(
f
for f in ledger_files
if (
ledger_ranges[f][1] is not None
and ledger_ranges[f][1] < snapshot_trigger_txid.seqno
)
),
ledger_files[-1],
)

# Corrupt the chosen uncommitted ledger file by truncating it in the middle
# of a transaction.
ledger = ccf.ledger.Ledger([ledger_dir], committed_only=False)
chunk_filename = None
target_seqno = None
truncate_offset = None
minimum_truncated_tx_size = (
ccf.ledger.TransactionHeader.get_size() + ccf.ledger.GcmHeader.size() + 1
)
for chunk in ledger:
if os.path.basename(chunk.filename()) != corrupted_ledger_file:
continue

for tx in chunk:
offset, _ = tx.get_offsets()
tx_size = tx.get_len()
if tx_size <= minimum_truncated_tx_size:
continue

chunk_filename = chunk.filename()
target_seqno = tx.get_txid().seqno
truncate_offset = offset + max(
tx_size // 2,
minimum_truncated_tx_size,
)
# Corrupting a single transaction in the selected chunk is
# sufficient to make the file malformed at this seqno.
break

if truncate_offset is not None:
break

assert truncate_offset is not None, "Should always find a transaction to corrupt"
assert target_seqno is not None

LOG.info(
f"Corrupting ledger file {chunk_filename} by truncating at offset {truncate_offset}"
)
with open(chunk_filename, "rb+") as f:
f.truncate(truncate_offset)

network.run_join_node(new_node)
network.trust_node(new_node, args)

with new_node.client() as c:
r = c.get("/node/state")
assert (
r.body.json()["startup_seqno"] != 0
), f"Node {new_node.local_node_id} should have started from snapshot"

out_path, err_path = new_node.get_logs()
assert out_path is not None and err_path is not None
with open(out_path, encoding="utf-8") as out:
out_logs = out.read()
with open(err_path, encoding="utf-8") as err:
err_logs = err.read()

combined_logs = out_logs + err_logs
matching_lines = [
line
for line in combined_logs.splitlines()
if "Malformed incomplete ledger file" in line
and os.path.basename(chunk_filename) in line
and f"at seqno {target_seqno}" in line
]
assert (
matching_lines
), f"Expected malformed ledger log line for seqno {target_seqno}\n{combined_logs}"
LOG.info(f"Observed malformed ledger handling: {matching_lines[0]}")

primary, _ = network.find_primary()
network.retire_node(primary, new_node)
new_node.stop()
return network


@reqs.description("Test ignore_first_sigterm")
def test_ignore_first_sigterm(network, args):
# Note: host is supplied explicitly to avoid having differently
Expand Down Expand Up @@ -1008,6 +1137,7 @@ def run_all(args):
test_add_node(network, args, copy_snapshot=True, fetch_recent_snapshot=False)
test_add_node(network, args, copy_snapshot=True, fetch_recent_snapshot=True)
test_add_node_with_read_only_ledger(network, args)
test_add_node_with_corrupted_ledger(network, args)
test_join_straddling_primary_replacement(network, args)
test_node_replacement(network, args)
test_add_node_from_backup(network, args)
Expand Down
1 change: 1 addition & 0 deletions tests/suite/test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
reconfiguration.test_retire_primary,
e2e_logging.test_rekey,
reconfiguration.test_add_node,
reconfiguration.test_add_node_with_corrupted_ledger,
nodes.test_kill_primary,
nodes.test_commit_view_history,
reconfiguration.test_add_node,
Expand Down