Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
4ebfa74
feat(nodectl): add audit log event types, config, and JSON serde tests
mrnkslv May 27, 2026
e79063c
Merge branch 'release/nodectl/v0.6.0' into feature/sma-100-audit-log-…
mrnkslv May 28, 2026
edb0b94
fix: review comments
mrnkslv May 28, 2026
459c74b
feat(nodectl): wire AuditLog trait and factory into composition root
mrnkslv May 28, 2026
f17d9bd
Merge branch 'feature/sma-99-audit-log-architecture' into feature/sma…
mrnkslv May 28, 2026
a799486
fix: copilot comments
mrnkslv May 28, 2026
e3501cc
feat(nodectl): implement JsonlAuditLog with background writer and gra…
mrnkslv Jun 1, 2026
35417a5
fix
mrnkslv Jun 1, 2026
f0582e3
fix:test
mrnkslv Jun 1, 2026
e73b5c7
Merge branch 'feature/sma-99-audit-log-architecture' into feature/sma…
mrnkslv Jun 1, 2026
c3588fb
fix(version): thiserror = "2"
mrnkslv Jun 1, 2026
bb77b09
fix: copilot fixes
mrnkslv Jun 1, 2026
9cf4d0e
feat(nodectl): emit elections audit events from ElectionRunner (SMA-103)
mrnkslv Jun 3, 2026
4b8e8d5
refactor(nodectl): redesign audit event format and file envelope
mrnkslv Jun 3, 2026
327b0c0
fix: copilot comments
mrnkslv Jun 4, 2026
de21c29
merge: integrate feature/sma-99-audit-log-architecture
mrnkslv Jun 4, 2026
705480b
fix:fmt
mrnkslv Jun 4, 2026
a9d6557
fix: delete unused
mrnkslv Jun 4, 2026
9f43584
fix: delete scheduler as an actor
mrnkslv Jun 4, 2026
ccea7b1
feat(nodectl): emit REST API audit events with AuditActorBuilder
mrnkslv Jun 4, 2026
de54752
fix: review comments
mrnkslv Jun 8, 2026
fd6c66e
feat(audit): ring buffer, dedup stake_skipped, rename payload fields
mrnkslv Jun 8, 2026
2d3bea6
merge: integrate feature/sma-104 REST audit producers + add elections…
mrnkslv Jun 8, 2026
7f5ad77
fix:fmt
mrnkslv Jun 8, 2026
c9936ea
Merge remote-tracking branch 'origin/feature/sma-99-audit-log-archite…
mrnkslv Jun 9, 2026
b65c3b5
fix: resolve post-merge compilation errors after integrating SMA-103
mrnkslv Jun 9, 2026
a2d19bf
fix:fmt
mrnkslv Jun 9, 2026
7d6edf7
remove: drop unused ElectionsTickFailed variant from AuditEventPayload
mrnkslv Jun 9, 2026
c8085e0
fix: imports
mrnkslv Jun 9, 2026
4808fdc
Merge remote-tracking branch 'origin/feature/sma-104-audit-log-rest-p…
mrnkslv Jun 9, 2026
f6a28f0
fix: post-merge fixups after integrating remote sma-104
mrnkslv Jun 9, 2026
7f09c0a
refactor(adaptive): flatten AdaptiveStakeZero into AdaptiveStakeResult
mrnkslv Jun 9, 2026
9305242
fix:fmt
mrnkslv Jun 9, 2026
be88956
feat(audit): project elections audit events into GET /v1/elections
mrnkslv Jun 9, 2026
068fcbf
merge: integrate origin/feature/sma-99-audit-log-architecture into sm…
mrnkslv Jun 11, 2026
92f33a3
merge: integrate sma-105 into sma-106
mrnkslv Jun 11, 2026
4805e28
fix(audit): address Copilot review comments on ring buffer and projec…
mrnkslv Jun 11, 2026
7cd7d32
docs(audit): add operator-facing audit-log.md and README section
mrnkslv Jun 11, 2026
b110bd4
Merge branch 'feature/sma-99-audit-log-architecture' into sma-106
mrnkslv Jun 15, 2026
b875a92
merge: integrate sma-106 into sma-107
mrnkslv Jun 15, 2026
3b73355
fix: bug found after testing
mrnkslv Jun 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/node-control/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
- [Voting Commands](#voting-commands)
- [REST API Endpoints](#rest-api-endpoints)
- [Configuration](#configuration)
- [Audit log](#audit-log)
- [Config Structure](#config-structure)
- [Section Descriptions](#section-descriptions)
- [Default Config Example](#default-config-example)
Expand Down Expand Up @@ -2521,9 +2522,18 @@ curl -X POST http://127.0.0.1:8080/v1/task/elections \

---

## Audit log

nodectl writes a structured audit log of domain events (elections, config
mutations, auth) to `./logs/audit.jsonl`. See [docs/audit-log.md](docs/audit-log.md)
for configuration, retention, PII handling, and log analysis.

---

## Related Setup Guides

- [Hashicorp Vault Dedicated Setup](./docs/hcp-vault-setup.md)
- [Node Control Service Setup](./docs/nodectl-setup.md)
- [Contracts automation (auto-deploy / auto-topup)](./docs/contracts-automation.md) — `automation` config, REST and CLI
- [Security Guide](./docs/nodectl-security.md) — roles, token lifecycle, rate limiting, monitoring
- [Audit Log](./docs/audit-log.md) — configuration, durability, PII, log analysis
Original file line number Diff line number Diff line change
Expand Up @@ -673,18 +673,19 @@ fn print_elections_table(body: &str) -> anyhow::Result<()> {

println!("\n {} ({})\n", "Our Participants".cyan().bold(), participants.len());
println!(
" {} {} {} {} {} {} {} {} {}",
" {} {} {} {} {} {} {} {} {} {}",
format!("{:<14}", "Node").cyan().bold(),
format!("{:<13}", "Status").cyan().bold(),
format!("{:<5}", "Pos").cyan().bold(),
format!("{:<15}", "Submitted TON").cyan().bold(),
format!("{:<15}", "Accepted TON").cyan().bold(),
format!("{:<24}", "Submitted At").cyan().bold(),
format!("{:<6}", "MaxF").cyan().bold(),
format!("{:<30}", "Last error").cyan().bold(),
format!("{:<44}", "Pubkey").cyan().bold(),
"ADNL".cyan().bold(),
);
println!(" {}", "-".repeat(148).dimmed());
println!(" {}", "-".repeat(180).dimmed());

for p in participants {
let node = binding_str(p, "node_id");
Expand Down Expand Up @@ -736,26 +737,28 @@ fn print_elections_table(body: &str) -> anyhow::Result<()> {
.unwrap_or_else(|| "-".to_string());

let accepted_stake = binding_str(p, "accepted_stake");
let last_error = binding_str(p, "last_error");
let last_error_display =
if last_error == "-" { "-".to_string() } else { last_error.yellow().to_string() };
let pubkey = binding_str(p, "pubkey");
let adnl = binding_str(p, "adnl");

println!(
" {:<14} {} {:<5} {:<15} {:<15} {:<24} {:<6} {:<44} {}",
" {:<14} {} {:<5} {:<15} {:<15} {:<24} {:<6} {:<30} {:<44} {}",
node,
status,
position,
display_tons_from_str(&submitted_stake),
display_tons_from_str(&accepted_stake),
submitted_at,
max_factor,
last_error_display,
pubkey,
adnl,
);
}
println!();

print_recent_events_table(&value);

Ok(())
}

Expand Down
177 changes: 177 additions & 0 deletions src/node-control/docs/audit-log.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
# Audit Log

## What it is

nodectl writes a structured, append-only log of domain events — elections, config
mutations, authentication, vault operations — to a newline-delimited JSON file
(`audit.jsonl`).

The audit log is **separate from the `tracing` service log** (stderr / journald).
Use the table below to decide where to look.

| Use case | Where |
|---|---|
| Debugging service internals, stack traces | tracing logs (`RUST_LOG`) |
| HTTP access / request logs | *(not implemented; would be tracing spans)* |
| Metrics / counters | Prometheus *(future)* |
| Domain events: who did what and when | **audit log** |

## Out of scope

- Per-RPC / per-request logging
- Metrics / dashboards
- Debug noise (heartbeats, cache refreshes, routine polls)
- High-frequency sources (> ~10 events/sec)
- Tamper-evidence (hash chain, signed events) — see RFC 9162 for future work

## Event types

Events are grouped by source:

| Prefix | Events |
|---|---|
| `elections.*` | Key generated, stake submitted/accepted/skipped/failed/recovered, withdraw processed/failed |
| `rest_api.*` | Config updated, auth login succeeded/rejected, token rejected |
| `vault.*` | Key created / removed *(producers not wired yet)* |
| `rewards.*` | Distribution started/completed/failed, recipient skipped *(producers not wired yet)* |
| `system.*` | Service started/stopped, audit events dropped |

Each event contains:

- `id` — UUID v7 (sortable by creation time)
- `ts` — RFC3339 timestamp with millisecond precision (`2026-05-22T12:10:30.123Z`)
- `outcome` — `success`, `failure`, or `skipped`
- `event_type` — dotted string (e.g. `elections.stake_submitted`)
- `data` — event-specific payload (omitted when `include_payload = false`)
- `actor` — who triggered the action (`service` task or `user` identity)
- `target` — what the action was applied to (node, config, vault key, …)

## File layout

```
./logs/audit.jsonl ← current file (line 0 is a system.service_started event)
./logs/audit.jsonl.1 ← most-recent rotation
./logs/audit.jsonl.2
./logs/audit.jsonl.9
```

The first line of every (rotated) file is a regular `system.service_started`
event whose `data` carries the service `version` and `host`. There is no
special header format — every line is a uniform JSONL event:

```json
{"id":"019ecb64-...","ts":"2026-05-22T12:00:00.000Z","outcome":"success","event_type":"system.service_started","data":{"version":"0.7.0","host":"validator-1"},"actor":{"kind":"system"},"target":{"kind":"system"}}
```

Defaults: 100 MiB per file, 10 files → ~1 GiB total history.

## Configuration

All fields live under the `audit_log` key in the nodectl config file.
None of them require a service restart — the values are read at startup.

| Field | Default | Description |
|---|---|---|
| `enabled` | `true` | Set to `false` to disable the audit log entirely |
| `path` | `./logs/audit.jsonl` | Path to the active log file; rotated files get `.1`…`.N` suffixes |
| `max_size_bytes` | `104857600` (100 MiB) | Rotate when the live file exceeds this size |
| `max_files` | `10` | Number of rotated files to keep (oldest is deleted on overflow) |
| `batch_interval_ms` | `1000` | How often (ms) the writer flushes a batch to disk |
| `batch_max_events` | `100` | Flush early when a batch reaches this many events |
| `queue_capacity` | `10000` | In-memory channel capacity between `record()` callers and the writer task |
| `queue_full_timeout_ms` | `250` | How long (ms) `record()` waits before dropping an event when the queue is full |
| `fsync_on_batch` | `false` | Call `fsync` after every batch — see [Durability](#durability) |
| `include_payload` | `true` | Write `data` fields; set to `false` to log event metadata only |
| `record_client_ip` | `false` | Include client IP in `rest_api.*` events — see [PII](#pii-and-retention) |
| `ip_anonymize` | `false` | Mask last IPv4 octet / last two IPv6 groups when recording IP |
| `ring_buffer_capacity` | `100` | In-memory ring for the REST read-path (see [Where it's consumed](#where-its-consumed)) |

Example minimal override (all other fields keep their defaults):

```json
{
"audit_log": {
"path": "/var/log/nodectl/audit.jsonl",
"max_files": 30,
"fsync_on_batch": true
}
}
```

## Durability

With `fsync_on_batch = false` (default), the kernel page cache is flushed on
the OS's own schedule. On a hard kill (`SIGKILL`) or power loss, up to
`batch_interval_ms` (~1 s) of events may be lost.

Set `fsync_on_batch = true` for strict durability at higher disk cost (one
`fsync` per second by default; one per `batch_max_events` events at high
throughput).

Events dropped because the writer queue is full are counted in the
`system.audit_events_dropped` event emitted on the next flush.

## PII and retention

Audit events may contain operator usernames, optionally client IP addresses,
and config change details. In GDPR-style regimes, IP addresses and usernames
are personal data.

- `record_client_ip = false` (default): no IP is ever written.
- `record_client_ip = true`, `ip_anonymize = false`: full IP written.
- `record_client_ip = true`, `ip_anonymize = true`: last IPv4 octet zeroed,
last two IPv6 groups masked (`::0:0`).

Retention is bounded by `max_size_bytes × max_files`. Tune for your policy.
Log files are **not** automatically deleted after a time-based retention period —
external tooling (logrotate, cron) is needed if you require time-based purges.

## File permissions

On Unix, the live file and all rotated files are created with mode `0600`
(owner read/write only). The directory is not created with any special mode —
ensure the directory itself has appropriate permissions.

Tamper-evidence (hash chains, signed events) is **out of scope** for the
current release. Treat the audit log as protected by host trust and filesystem
ACLs, not by cryptography.

## Where it's consumed

`GET /v1/elections` reads from the **in-memory ring buffer** (last
`ring_buffer_capacity` events, default 100) and enriches `our_participants`
with:

- `stake_submissions` — stake submission history from audit
- `last_error` — latest error-class event (stake skipped, stake failed, withdraw failed)

The JSONL file on disk is **not** parsed on the hot path.

## Analyzing the log

```sh
# Count events by type
jq -r .event_type logs/audit.jsonl | sort | uniq -c | sort -rn

# All events for one election round
jq 'select(.target.election_id == 1779265552)' logs/audit.jsonl

# Failed or skipped stakes in the last file
jq 'select(.outcome == "failure" or .outcome == "skipped")
| select(.event_type | startswith("elections.stake"))' logs/audit.jsonl

# Config mutations by a specific user
jq 'select(.event_type == "rest_api.config_updated" and .actor.id == "alice")' \
logs/audit.jsonl

# Tail-follow live events
tail -f logs/audit.jsonl | jq .

# All events in a time range
jq 'select(.ts >= "2026-05-22T10:00:00Z" and .ts < "2026-05-22T11:00:00Z")' \
logs/audit.jsonl

# Events across rotated files (newest first)
cat logs/audit.jsonl.1 logs/audit.jsonl | jq .
```
2 changes: 1 addition & 1 deletion src/node-control/service/src/audit/enums.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ pub enum AuditEventPayload {

// ── system ───────────────────────────────────────────────────────────────
#[serde(rename = "system.service_started")]
SystemServiceStarted { version: String },
SystemServiceStarted { version: String, host: String },

#[serde(rename = "system.service_stopped")]
SystemServiceStopped {},
Expand Down
77 changes: 33 additions & 44 deletions src/node-control/service/src/audit/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
use uuid::Uuid;

/// Renders timestamps as RFC3339 with millisecond precision and a trailing `Z`
/// (e.g. `2026-05-22T12:10:30.123Z`), used for `ts` and `started_at`.
/// (e.g. `2026-05-22T12:10:30.123Z`), used for the `ts` field.
mod ts_millis_rfc3339 {
use super::*;

Expand All @@ -31,26 +31,11 @@ mod ts_millis_rfc3339 {
}
}

/// First JSONL line of every (rotated) audit file. Readers distinguish it from
/// events by the absence of an `event_type` field.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct AuditFileHeader {
pub schema_version: u16,
/// Logical service name, e.g. `"nodectl"`.
pub service: String,
/// Service semver.
pub service_version: String,
pub host: String,
#[serde(with = "ts_millis_rfc3339")]
pub started_at: DateTime<Utc>,
}

/// A single audit record.
///
/// Wire shape: `id`, `ts`, `outcome`, the flattened payload
/// (`event_type` + `data`), `actor`, `target`. `severity`/`source` are derived
/// from the payload at the display layer and `schema_version` lives in
/// [`AuditFileHeader`], so none of them are stored per event.
/// from the payload at the display layer, so they are not stored per event.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct AuditEvent {
/// UUID v7 — sortable by creation time.
Expand Down Expand Up @@ -287,12 +272,12 @@ impl AuditEvent {
)
}

pub fn system_service_started(version: impl Into<String>) -> Self {
pub fn system_service_started(version: impl Into<String>, host: impl Into<String>) -> Self {
Self::new(
AuditActor::System,
AuditTarget::System,
AuditOutcome::Success,
AuditEventPayload::SystemServiceStarted { version: version.into() },
AuditEventPayload::SystemServiceStarted { version: version.into(), host: host.into() },
)
}

Expand Down Expand Up @@ -341,6 +326,31 @@ mod tests {
AuditEvent { id: fixture_id(), ts: fixture_ts(), outcome, payload, actor, target }
}

#[test]
fn serializes_service_started_to_expected_json() {
let event = fixed(
AuditOutcome::Success,
AuditActor::System,
AuditTarget::System,
AuditEventPayload::SystemServiceStarted {
version: "0.5.1".into(),
host: "node-host".into(),
},
);
assert_json_eq(
&event,
json!({
"id": FIXTURE_ID,
"ts": FIXTURE_TS,
"outcome": "success",
"event_type": "system.service_started",
"data": { "version": "0.5.1", "host": "node-host" },
"actor": { "kind": "system" },
"target": { "kind": "system" }
}),
);
}

#[test]
fn serializes_stake_submitted_to_expected_json() {
let event = fixed(
Expand Down Expand Up @@ -405,30 +415,6 @@ mod tests {
);
}

#[test]
fn file_header_serializes_with_millis_ts() {
let header = AuditFileHeader {
schema_version: 1,
service: "nodectl".into(),
service_version: "0.5.1".into(),
host: "node-host".into(),
started_at: fixture_ts(),
};
let value = serde_json::to_value(&header).expect("serialize header");
assert_eq!(
value,
json!({
"schema_version": 1,
"service": "nodectl",
"service_version": "0.5.1",
"host": "node-host",
"started_at": FIXTURE_TS
})
);
// Header has no event_type — that is how readers tell it apart from events.
assert!(value.get("event_type").is_none());
}

fn sample_event(payload: AuditEventPayload) -> AuditEvent {
fixed(
AuditOutcome::Success,
Expand Down Expand Up @@ -481,7 +467,10 @@ mod tests {
AuditEventPayload::RestApiTokenRejected { reason: "expired".into() },
AuditEventPayload::VaultKeyCreated {},
AuditEventPayload::VaultKeyRemoved {},
AuditEventPayload::SystemServiceStarted { version: "0.5.0".into() },
AuditEventPayload::SystemServiceStarted {
version: "0.5.0".into(),
host: "test-host".into(),
},
AuditEventPayload::SystemServiceStopped {},
AuditEventPayload::SystemAuditEventsDropped {
dropped_events: 3,
Expand Down
Loading