From f5bedb31a88d29c86260a7cf6ed3048bac98e939 Mon Sep 17 00:00:00 2001 From: Pringled Date: Fri, 12 Jun 2026 12:13:12 +0200 Subject: [PATCH 1/6] Make savings pretty --- README.md | 25 +++++- src/semble/stats.py | 208 ++++++++++++++++++++++++++++++++++++++------ tests/test_stats.py | 6 +- 3 files changed, 205 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index dabd5999..f6181a23 100644 --- a/README.md +++ b/README.md @@ -127,13 +127,30 @@ semble savings --verbose # also show breakdown by call type ``` Semble Token Savings ════════════════════════════════════════════════════════════════ - Period Calls Savings + + Total saved: ~714.2M tokens (94%) + Total calls: 14.3k + Efficiency: ███████████████████████░ 94% + + By Period + ──────────────────────────────────────────────────────────────── + Period Calls Saved Ratio + ──────────────────────────────────────────────────────────────── + Today 198 ~1.4M tokens ███████████████████████░ 95% + Last 7 days 13.1k ~707.2M tokens ███████████████████████░ 94% + All time 14.3k ~714.2M tokens ███████████████████████░ 94% + + By Call Type + ──────────────────────────────────────────────────────────────── + # Call type Calls Share ──────────────────────────────────────────────────────────────── - Today 42 [███████████████░] ~58.4k tokens (95%) - Last 7 days 287 [██████████████░░] ~312.4k tokens (90%) - All time 1.4k [██████████████░░] ~1.2M tokens (89%) + 1. search 14.1k ████████████████ 99% + 2. find_related 205 █░░░░░░░░░░░░░░░ 1% + ════════════════════════════════════════════════════════════════ ``` +The report is colourised when stdout is a TTY and respects `NO_COLOR`. + Savings are calculated as follows: for each call, semble records the total character count of the unique files containing returned chunks and the character count of the snippets returned. Estimated tokens saved is `(file chars − snippet chars) / 4` (4 chars per token). This is a conservative estimate: the baseline is reading matched files in full, which is how coding agents often explore unfamiliar code. diff --git a/src/semble/stats.py b/src/semble/stats.py index bebc9888..ccb27830 100644 --- a/src/semble/stats.py +++ b/src/semble/stats.py @@ -1,5 +1,8 @@ import json import logging +import os +import re +import sys from collections import defaultdict from dataclasses import dataclass from datetime import datetime, timedelta, timezone @@ -10,12 +13,73 @@ logger = logging.getLogger(__name__) +_ANSI_RE = re.compile(r"\033\[[0-9;]*m") + def _get_stats_file() -> Path: """Safely create a stats file.""" return resolve_cache_folder() / "savings.jsonl" +def _use_color() -> bool: + """Return True when ANSI color codes should be emitted.""" + if os.environ.get("NO_COLOR"): + return False + if os.environ.get("TERM") == "dumb": + return False + return sys.stdout.isatty() + + +def _vis_len(s: str) -> int: + """Visible length of a string, ignoring ANSI escape sequences.""" + return len(_ANSI_RE.sub("", s)) + + +def _align_left(s: str, width: int) -> str: + """Pad `s` on the right so its visible width matches `width` (left-aligned).""" + pad = max(0, width - _vis_len(s)) + return s + " " * pad + + +def _align_right(s: str, width: int) -> str: + """Pad `s` on the left so its visible width matches `width` (right-aligned).""" + pad = max(0, width - _vis_len(s)) + return " " * pad + s + + +class _C: + """ANSI color helpers; no-op when color is disabled.""" + + __slots__ = ("enabled",) + + def __init__(self, enabled: bool) -> None: + self.enabled = enabled + + def _wrap(self, code: str, text: str) -> str: + return f"\033[{code}m{text}\033[0m" if self.enabled else text + + def title(self, text: str) -> str: + return self._wrap("1;36", text) + + def dim(self, text: str) -> str: + return self._wrap("38;5;244", text) + + def label(self, text: str) -> str: + return self._wrap("1", text) + + def num(self, text: str) -> str: + return self._wrap("1;33", text) + + def good(self, text: str) -> str: + return self._wrap("32", text) + + def bad(self, text: str) -> str: + return self._wrap("31", text) + + def mid(self, text: str) -> str: + return self._wrap("33", text) + + @dataclass class BucketStats: calls: int = 0 @@ -35,6 +99,7 @@ def add(self, snippet_chars: int, file_chars: int) -> None: class SavingsSummary: buckets: dict[str, BucketStats] call_type_counts: dict[str, int] + call_type_saved_chars: dict[str, int] def save_search_stats( @@ -78,6 +143,7 @@ def build_savings_summary(path: Path | None = None) -> SavingsSummary: "All time": BucketStats(), } call_type_counts: defaultdict[str, int] = defaultdict(int) + call_type_saved_chars: defaultdict[str, int] = defaultdict(int) with path.open() as f: for line in f: @@ -90,6 +156,7 @@ def build_savings_summary(path: Path | None = None) -> SavingsSummary: file_chars = record["file_chars"] call_type = record["call"] call_type_counts[call_type] += 1 + call_type_saved_chars[call_type] += max(0, file_chars - snippet_chars) dt = datetime.fromtimestamp(record["ts"], tz=timezone.utc) in_today = dt.date() == today in_last_7 = dt.date() > seven_days_ago @@ -99,7 +166,48 @@ def build_savings_summary(path: Path | None = None) -> SavingsSummary: if in_today: buckets["Today"].add(snippet_chars, file_chars) - return SavingsSummary(buckets=buckets, call_type_counts=dict(call_type_counts)) + return SavingsSummary( + buckets=buckets, + call_type_counts=dict(call_type_counts), + call_type_saved_chars=dict(call_type_saved_chars), + ) + + +def _format_token_count(tokens: int) -> str: + """Format a token count with k/M suffix, keeping the ~ prefix for estimates.""" + if tokens >= 1_000_000: + return f"~{tokens / 1_000_000:.1f}M" + if tokens >= 1_000: + return f"~{tokens / 1_000:.1f}k" + return f"~{tokens}" + + +def _format_calls(calls: int) -> str: + """Format a call count with k suffix for thousands.""" + return f"{calls / 1_000:.1f}k" if calls >= 1_000 else str(calls) + + +def _ratio_color(pct: int, c: _C) -> str: + """Pick a color for a savings ratio percentage.""" + if pct >= 80: + return c.good(f"{pct}%") + if pct >= 50: + return c.mid(f"{pct}%") + return c.bad(f"{pct}%") + + +def _row(c: _C, cols: list[tuple[str, int, str]]) -> str: + """Build a table row with 2-space gutters between columns.""" + gutter = " " + parts: list[str] = [] + for i, (align, width, text) in enumerate(cols): + if i > 0: + parts.append(gutter) + if align == "left": + parts.append(_align_left(text, width)) + else: + parts.append(_align_right(text, width)) + return "".join(parts) def format_savings_report(path: Path | None = None, *, verbose: bool = False) -> str: @@ -110,39 +218,85 @@ def format_savings_report(path: Path | None = None, *, verbose: bool = False) -> return "No stats yet. Run a search first." summary = build_savings_summary(path) - bar_width = 16 - heavy_line = " " + "═" * 64 - light_line = " " + "─" * 64 - - lines = [ - "", - " Semble Token Savings", - heavy_line, - f" {'Period':<12} {'Calls':<6} Savings", - light_line, - ] + c = _C(_use_color()) + bar_width = 24 + border_w = 64 + heavy_line = " " + c.dim("═" * border_w) + light_line = " " + c.dim("─" * border_w) + + all_time = summary.buckets["All time"] + total_saved_tokens = all_time.saved_chars // 4 + overall_pct = round(all_time.saved_chars / all_time.file_chars * 100) if all_time.file_chars else 0 + + lines: list[str] = ["", " " + c.title("Semble Token Savings"), heavy_line, ""] + + total_label = c.label("Total saved:") + total_value = c.num(_format_token_count(total_saved_tokens) + " tokens") + pct_value = _ratio_color(overall_pct, c) + lines.append(f" {total_label} {total_value} {c.dim('(')}{pct_value}{c.dim(')')}") + + calls_label = c.label("Total calls:") + calls_value = c.num(_format_calls(all_time.calls)) + lines.append(f" {calls_label} {calls_value}") + + eff_label = c.label("Efficiency:") + eff_filled = round(overall_pct / 100 * bar_width) + eff_bar = c.good("█" * eff_filled) + c.dim("░" * (bar_width - eff_filled)) + lines.append(f" {eff_label} {eff_bar} {pct_value}") + lines.append("") + + lines.append(" " + c.label("By Period")) + lines.append(light_line) + period_cols = [("left", 14, "Period"), ("right", 8, "Calls"), ("right", 14, "Saved")] + lines.append(" " + _row(c, period_cols) + " " + c.dim("Ratio")) + lines.append(light_line) for label, bucket in summary.buckets.items(): - saved_tokens = bucket.saved_chars // 4 # standard ~4 chars/token approximation - if saved_tokens >= 1_000_000: - saved_str = f"~{saved_tokens / 1_000_000:.1f}M" - elif saved_tokens >= 1000: - saved_str = f"~{saved_tokens / 1000:.1f}k" - else: - saved_str = f"~{saved_tokens}" - calls_str = f"{bucket.calls / 1000:.1f}k" if bucket.calls >= 1000 else str(bucket.calls) + saved_tokens = bucket.saved_chars // 4 + saved_str = c.num(_format_token_count(saved_tokens) + " tokens") + calls_str = c.num(_format_calls(bucket.calls)) if bucket.file_chars > 0: ratio = bucket.saved_chars / bucket.file_chars filled = round(ratio * bar_width) - bar = "█" * filled + "░" * (bar_width - filled) + row_bar = c.good("█" * filled) + c.dim("░" * (bar_width - filled)) pct = round(ratio * 100) - lines.append(f" {label:<12} {calls_str:<6} [{bar}] {saved_str} tokens ({pct}%)") + pct_str = _ratio_color(pct, c) else: - lines.append(f" {label:<12} {calls_str:<6} [{'░' * bar_width}] {saved_str} tokens") + row_bar = c.dim("░" * bar_width) + pct_str = c.dim("–") + data_cols = [("left", 14, c.label(label)), ("right", 8, calls_str), ("right", 14, saved_str)] + lines.append(" " + _row(c, data_cols) + " " + row_bar + " " + pct_str) + + if summary.call_type_counts: + lines.append("") + lines.append(" " + c.label("By Call Type")) + lines.append(light_line) + call_cols = [("left", 4, "#"), ("left", 16, "Call type"), ("right", 8, "Calls")] + lines.append(" " + _row(c, call_cols) + " " + c.dim("Share")) + lines.append(light_line) + top = sorted(summary.call_type_counts.items(), key=lambda kv: -kv[1]) + total = max(1, sum(summary.call_type_counts.values())) + max_bar = 16 + for i, (call_type, count) in enumerate(top, start=1): + share = count / total + filled = max(1, round(share * max_bar)) if share > 0 else 0 + bar = c.good("█" * filled) + c.dim("░" * (max_bar - filled)) + calls_str = c.num(_format_calls(count)) + share_str = c.dim(f"{share * 100:>4.0f}%") + data_cols = [("left", 4, c.dim(f"{i}.")), ("left", 16, call_type), ("right", 8, calls_str)] + lines.append(" " + _row(c, data_cols) + " " + bar + " " + share_str) + if verbose and summary.call_type_counts: - lines += ["", " Usage Breakdown", light_line, f" {'Call type':<16} Calls"] + lines.append("") + lines.append(" " + c.label("Per-Call-Type Savings")) + lines.append(light_line) + vcols = [("left", 16, "Call type"), ("right", 8, "Calls"), ("right", 14, "Saved")] + lines.append(" " + _row(c, vcols)) + lines.append(light_line) for call_type, count in sorted(summary.call_type_counts.items()): - count_str = f"{count / 1000:.1f}k" if count >= 1000 else str(count) - lines.append(f" {call_type:<16} {count_str}") - lines.append(heavy_line) + count_str = c.num(_format_calls(count)) + saved_str = c.num(_format_token_count(summary.call_type_saved_chars.get(call_type, 0) // 4) + " tokens") + lines.append(" " + _row(c, [("left", 16, call_type), ("right", 8, count_str), ("right", 14, saved_str)])) + + lines.append(heavy_line) lines.append("") return "\n".join(lines) diff --git a/tests/test_stats.py b/tests/test_stats.py index e3c1f321..8dca0a63 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -51,13 +51,13 @@ def test_savings_no_file(tmp_path: Path) -> None: @pytest.mark.parametrize( ("verbose", "expected"), [ - (False, ["Savings", "Today"]), - (True, ["Savings", "Today", "Usage Breakdown", "search", "find_related"]), + (False, ["Savings", "Today", "By Period", "By Call Type", "search", "find_related"]), + (True, ["Savings", "Today", "By Call Type", "Per-Call-Type Savings", "search", "find_related"]), ], ids=["default", "verbose"], ) def test_savings_output(sample_stats_file: Path, verbose: bool, expected: list[str]) -> None: - """format_savings_report displays period buckets; --verbose adds call-type breakdown.""" + """format_savings_report displays period buckets and a call-type table; --verbose adds per-type savings.""" result = format_savings_report(path=sample_stats_file, verbose=verbose) for s in expected: assert s in result From aca146979a2c5ec6defa1e7d4e596f520386b8ac Mon Sep 17 00:00:00 2001 From: Pringled Date: Fri, 12 Jun 2026 12:18:19 +0200 Subject: [PATCH 2/6] Drop verbose flag --- README.md | 4 +--- src/semble/cli.py | 5 ++--- src/semble/stats.py | 23 ++--------------------- tests/test_stats.py | 32 ++++++++------------------------ 4 files changed, 13 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index f6181a23..a53d8f31 100644 --- a/README.md +++ b/README.md @@ -120,8 +120,7 @@ Semble also always skips a set of well-known non-source directories regardless o `semble savings` shows how many tokens semble has saved across all your searches: ```bash -semble savings # summary by period -semble savings --verbose # also show breakdown by call type +semble savings ``` ``` @@ -149,7 +148,6 @@ semble savings --verbose # also show breakdown by call type ════════════════════════════════════════════════════════════════ ``` -The report is colourised when stdout is a TTY and respects `NO_COLOR`. Savings are calculated as follows: for each call, semble records the total character count of the unique files containing returned chunks and the character count of the snippets returned. Estimated tokens saved is `(file chars − snippet chars) / 4` (4 chars per token). This is a conservative estimate: the baseline is reading matched files in full, which is how coding agents often explore unfamiliar code. diff --git a/src/semble/cli.py b/src/semble/cli.py index 98254d19..5294269f 100644 --- a/src/semble/cli.py +++ b/src/semble/cli.py @@ -187,8 +187,7 @@ def _cli_main() -> None: related_p.add_argument("-k", "--top-k", type=int, default=5, help="Number of results (default: 5).") _add_content_args(related_p) - savings_p = sub.add_parser("savings", help="Show token savings and usage stats.") - savings_p.add_argument("--verbose", action="store_true", help="Also show usage breakdown by call type.") + sub.add_parser("savings", help="Show token savings and usage stats.") sub.add_parser("install", help="Interactively configure semble across coding agents.") sub.add_parser("uninstall", help="Interactively remove semble configuration from coding agents.") @@ -196,7 +195,7 @@ def _cli_main() -> None: args = parser.parse_args() if args.command == "savings": - print(format_savings_report(verbose=args.verbose)) + print(format_savings_report()) elif args.command in ("install", "uninstall"): from semble.installer import run diff --git a/src/semble/stats.py b/src/semble/stats.py index ccb27830..174ea473 100644 --- a/src/semble/stats.py +++ b/src/semble/stats.py @@ -99,7 +99,6 @@ def add(self, snippet_chars: int, file_chars: int) -> None: class SavingsSummary: buckets: dict[str, BucketStats] call_type_counts: dict[str, int] - call_type_saved_chars: dict[str, int] def save_search_stats( @@ -143,7 +142,6 @@ def build_savings_summary(path: Path | None = None) -> SavingsSummary: "All time": BucketStats(), } call_type_counts: defaultdict[str, int] = defaultdict(int) - call_type_saved_chars: defaultdict[str, int] = defaultdict(int) with path.open() as f: for line in f: @@ -156,7 +154,6 @@ def build_savings_summary(path: Path | None = None) -> SavingsSummary: file_chars = record["file_chars"] call_type = record["call"] call_type_counts[call_type] += 1 - call_type_saved_chars[call_type] += max(0, file_chars - snippet_chars) dt = datetime.fromtimestamp(record["ts"], tz=timezone.utc) in_today = dt.date() == today in_last_7 = dt.date() > seven_days_ago @@ -166,11 +163,7 @@ def build_savings_summary(path: Path | None = None) -> SavingsSummary: if in_today: buckets["Today"].add(snippet_chars, file_chars) - return SavingsSummary( - buckets=buckets, - call_type_counts=dict(call_type_counts), - call_type_saved_chars=dict(call_type_saved_chars), - ) + return SavingsSummary(buckets=buckets, call_type_counts=dict(call_type_counts)) def _format_token_count(tokens: int) -> str: @@ -210,7 +203,7 @@ def _row(c: _C, cols: list[tuple[str, int, str]]) -> str: return "".join(parts) -def format_savings_report(path: Path | None = None, *, verbose: bool = False) -> str: +def format_savings_report(path: Path | None = None) -> str: """Return a formatted token-savings report.""" if path is None: path = _get_stats_file() @@ -285,18 +278,6 @@ def format_savings_report(path: Path | None = None, *, verbose: bool = False) -> data_cols = [("left", 4, c.dim(f"{i}.")), ("left", 16, call_type), ("right", 8, calls_str)] lines.append(" " + _row(c, data_cols) + " " + bar + " " + share_str) - if verbose and summary.call_type_counts: - lines.append("") - lines.append(" " + c.label("Per-Call-Type Savings")) - lines.append(light_line) - vcols = [("left", 16, "Call type"), ("right", 8, "Calls"), ("right", 14, "Saved")] - lines.append(" " + _row(c, vcols)) - lines.append(light_line) - for call_type, count in sorted(summary.call_type_counts.items()): - count_str = c.num(_format_calls(count)) - saved_str = c.num(_format_token_count(summary.call_type_saved_chars.get(call_type, 0) // 4) + " tokens") - lines.append(" " + _row(c, [("left", 16, call_type), ("right", 8, count_str), ("right", 14, saved_str)])) - lines.append(heavy_line) lines.append("") return "\n".join(lines) diff --git a/tests/test_stats.py b/tests/test_stats.py index 8dca0a63..093a963a 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -48,18 +48,10 @@ def test_savings_no_file(tmp_path: Path) -> None: assert "No stats yet" in format_savings_report(path=tmp_path / "nonexistent.jsonl") -@pytest.mark.parametrize( - ("verbose", "expected"), - [ - (False, ["Savings", "Today", "By Period", "By Call Type", "search", "find_related"]), - (True, ["Savings", "Today", "By Call Type", "Per-Call-Type Savings", "search", "find_related"]), - ], - ids=["default", "verbose"], -) -def test_savings_output(sample_stats_file: Path, verbose: bool, expected: list[str]) -> None: - """format_savings_report displays period buckets and a call-type table; --verbose adds per-type savings.""" - result = format_savings_report(path=sample_stats_file, verbose=verbose) - for s in expected: +def test_savings_output(sample_stats_file: Path) -> None: + """format_savings_report displays period buckets and a call-type table.""" + result = format_savings_report(path=sample_stats_file) + for s in ["Savings", "Today", "By Period", "By Call Type", "search", "find_related"]: assert s in result @@ -100,22 +92,14 @@ def test_savings_tolerates_bad_json(tmp_path: Path) -> None: assert "Savings" in format_savings_report(path=stats_file) -@pytest.mark.parametrize( - ("argv", "expected"), - [ - (["semble", "savings"], "No stats yet"), - (["semble", "savings", "--verbose"], "No stats yet"), - ], - ids=["default", "verbose"], -) def test_savings_cli_dispatch( - argv: list[str], expected: str, tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] ) -> None: - """Savings subcommand dispatches to format_savings_report, with and without --verbose.""" - monkeypatch.setattr(sys, "argv", argv) + """Savings subcommand dispatches to format_savings_report.""" + monkeypatch.setattr(sys, "argv", ["semble", "savings"]) monkeypatch.setattr("semble.stats._get_stats_file", lambda: tmp_path / "nonexistent.jsonl") _cli_main() - assert expected in capsys.readouterr().out + assert "No stats yet" in capsys.readouterr().out def test_savings_buckets_exclude_old_records(tmp_path: Path) -> None: From bdee2bdbb224efab4a34ed45279c42eab28e3534 Mon Sep 17 00:00:00 2001 From: Pringled Date: Fri, 12 Jun 2026 12:31:20 +0200 Subject: [PATCH 3/6] Simplify code --- README.md | 12 +-- src/semble/stats.py | 187 ++++++++++++++------------------------------ tests/test_stats.py | 32 +++++++- uv.lock | 2 +- 4 files changed, 94 insertions(+), 139 deletions(-) diff --git a/README.md b/README.md index a53d8f31..e41c89e3 100644 --- a/README.md +++ b/README.md @@ -125,27 +125,27 @@ semble savings ``` Semble Token Savings - ════════════════════════════════════════════════════════════════ + ════════════════════════════════════════════════════════════════════════ Total saved: ~714.2M tokens (94%) Total calls: 14.3k Efficiency: ███████████████████████░ 94% By Period - ──────────────────────────────────────────────────────────────── + ──────────────────────────────────────────────────────────────────────── Period Calls Saved Ratio - ──────────────────────────────────────────────────────────────── + ──────────────────────────────────────────────────────────────────────── Today 198 ~1.4M tokens ███████████████████████░ 95% Last 7 days 13.1k ~707.2M tokens ███████████████████████░ 94% All time 14.3k ~714.2M tokens ███████████████████████░ 94% By Call Type - ──────────────────────────────────────────────────────────────── + ──────────────────────────────────────────────────────────────────────── # Call type Calls Share - ──────────────────────────────────────────────────────────────── + ──────────────────────────────────────────────────────────────────────── 1. search 14.1k ████████████████ 99% 2. find_related 205 █░░░░░░░░░░░░░░░ 1% - ════════════════════════════════════════════════════════════════ + ════════════════════════════════════════════════════════════════════════ ``` diff --git a/src/semble/stats.py b/src/semble/stats.py index 174ea473..7e767eb8 100644 --- a/src/semble/stats.py +++ b/src/semble/stats.py @@ -1,7 +1,6 @@ import json import logging import os -import re import sys from collections import defaultdict from dataclasses import dataclass @@ -13,8 +12,6 @@ logger = logging.getLogger(__name__) -_ANSI_RE = re.compile(r"\033\[[0-9;]*m") - def _get_stats_file() -> Path: """Safely create a stats file.""" @@ -22,62 +19,13 @@ def _get_stats_file() -> Path: def _use_color() -> bool: - """Return True when ANSI color codes should be emitted.""" - if os.environ.get("NO_COLOR"): - return False - if os.environ.get("TERM") == "dumb": - return False - return sys.stdout.isatty() - - -def _vis_len(s: str) -> int: - """Visible length of a string, ignoring ANSI escape sequences.""" - return len(_ANSI_RE.sub("", s)) - - -def _align_left(s: str, width: int) -> str: - """Pad `s` on the right so its visible width matches `width` (left-aligned).""" - pad = max(0, width - _vis_len(s)) - return s + " " * pad - - -def _align_right(s: str, width: int) -> str: - """Pad `s` on the left so its visible width matches `width` (right-aligned).""" - pad = max(0, width - _vis_len(s)) - return " " * pad + s - - -class _C: - """ANSI color helpers; no-op when color is disabled.""" - - __slots__ = ("enabled",) - - def __init__(self, enabled: bool) -> None: - self.enabled = enabled - - def _wrap(self, code: str, text: str) -> str: - return f"\033[{code}m{text}\033[0m" if self.enabled else text - - def title(self, text: str) -> str: - return self._wrap("1;36", text) + """Return whether ANSI colors should be emitted.""" + return "NO_COLOR" not in os.environ and os.environ.get("TERM") != "dumb" and sys.stdout.isatty() - def dim(self, text: str) -> str: - return self._wrap("38;5;244", text) - def label(self, text: str) -> str: - return self._wrap("1", text) - - def num(self, text: str) -> str: - return self._wrap("1;33", text) - - def good(self, text: str) -> str: - return self._wrap("32", text) - - def bad(self, text: str) -> str: - return self._wrap("31", text) - - def mid(self, text: str) -> str: - return self._wrap("33", text) +def _color(code: str, text: str, enabled: bool) -> str: + """Apply an ANSI color code when enabled.""" + return f"\033[{code}m{text}\033[0m" if enabled else text @dataclass @@ -180,27 +128,10 @@ def _format_calls(calls: int) -> str: return f"{calls / 1_000:.1f}k" if calls >= 1_000 else str(calls) -def _ratio_color(pct: int, c: _C) -> str: - """Pick a color for a savings ratio percentage.""" - if pct >= 80: - return c.good(f"{pct}%") - if pct >= 50: - return c.mid(f"{pct}%") - return c.bad(f"{pct}%") - - -def _row(c: _C, cols: list[tuple[str, int, str]]) -> str: - """Build a table row with 2-space gutters between columns.""" - gutter = " " - parts: list[str] = [] - for i, (align, width, text) in enumerate(cols): - if i > 0: - parts.append(gutter) - if align == "left": - parts.append(_align_left(text, width)) - else: - parts.append(_align_right(text, width)) - return "".join(parts) +def _color_ratio(pct: int, enabled: bool) -> str: + """Color a savings percentage according to its value.""" + code = "32" if pct >= 80 else "33" if pct >= 50 else "31" + return _color(code, f"{pct}%", enabled) def format_savings_report(path: Path | None = None) -> str: @@ -211,72 +142,72 @@ def format_savings_report(path: Path | None = None) -> str: return "No stats yet. Run a search first." summary = build_savings_summary(path) - c = _C(_use_color()) + color = _use_color() bar_width = 24 - border_w = 64 - heavy_line = " " + c.dim("═" * border_w) - light_line = " " + c.dim("─" * border_w) + border_width = 72 + heavy_line = " " + _color("38;5;244", "═" * border_width, color) + light_line = " " + _color("38;5;244", "─" * border_width, color) all_time = summary.buckets["All time"] total_saved_tokens = all_time.saved_chars // 4 overall_pct = round(all_time.saved_chars / all_time.file_chars * 100) if all_time.file_chars else 0 - - lines: list[str] = ["", " " + c.title("Semble Token Savings"), heavy_line, ""] - - total_label = c.label("Total saved:") - total_value = c.num(_format_token_count(total_saved_tokens) + " tokens") - pct_value = _ratio_color(overall_pct, c) - lines.append(f" {total_label} {total_value} {c.dim('(')}{pct_value}{c.dim(')')}") - - calls_label = c.label("Total calls:") - calls_value = c.num(_format_calls(all_time.calls)) - lines.append(f" {calls_label} {calls_value}") - - eff_label = c.label("Efficiency:") - eff_filled = round(overall_pct / 100 * bar_width) - eff_bar = c.good("█" * eff_filled) + c.dim("░" * (bar_width - eff_filled)) - lines.append(f" {eff_label} {eff_bar} {pct_value}") - lines.append("") - - lines.append(" " + c.label("By Period")) - lines.append(light_line) - period_cols = [("left", 14, "Period"), ("right", 8, "Calls"), ("right", 14, "Saved")] - lines.append(" " + _row(c, period_cols) + " " + c.dim("Ratio")) - lines.append(light_line) + efficiency_filled = round(overall_pct / 100 * bar_width) + efficiency_bar = _color("32", "█" * efficiency_filled, color) + efficiency_bar += _color("38;5;244", "░" * (bar_width - efficiency_filled), color) + + lines = [ + "", + " " + _color("1;36", "Semble Token Savings", color), + heavy_line, + "", + f" {_color('1', 'Total saved:', color)} " + f"{_color('1;33', _format_token_count(total_saved_tokens) + ' tokens', color)} " + f"({_color_ratio(overall_pct, color)})", + f" {_color('1', 'Total calls:', color)} {_color('1;33', _format_calls(all_time.calls), color)}", + f" {_color('1', 'Efficiency:', color)} {efficiency_bar} {_color_ratio(overall_pct, color)}", + "", + " " + _color("1", "By Period", color), + light_line, + f" {'Period':<14} {'Calls':>8} {'Saved':>14} Ratio", + light_line, + ] for label, bucket in summary.buckets.items(): saved_tokens = bucket.saved_chars // 4 - saved_str = c.num(_format_token_count(saved_tokens) + " tokens") - calls_str = c.num(_format_calls(bucket.calls)) + saved_str = _format_token_count(saved_tokens) + " tokens" + calls_str = _format_calls(bucket.calls) if bucket.file_chars > 0: ratio = bucket.saved_chars / bucket.file_chars filled = round(ratio * bar_width) - row_bar = c.good("█" * filled) + c.dim("░" * (bar_width - filled)) - pct = round(ratio * 100) - pct_str = _ratio_color(pct, c) + row_bar = _color("32", "█" * filled, color) + _color("38;5;244", "░" * (bar_width - filled), color) + ratio_str = _color_ratio(round(ratio * 100), color) else: - row_bar = c.dim("░" * bar_width) - pct_str = c.dim("–") - data_cols = [("left", 14, c.label(label)), ("right", 8, calls_str), ("right", 14, saved_str)] - lines.append(" " + _row(c, data_cols) + " " + row_bar + " " + pct_str) + row_bar = _color("38;5;244", "░" * bar_width, color) + ratio_str = _color("38;5;244", "–", color) + lines.append( + f" {_color('1', f'{label:<14}', color)} {_color('1;33', f'{calls_str:>8}', color)} " + f"{_color('1;33', f'{saved_str:>14}', color)} {row_bar} {ratio_str}" + ) if summary.call_type_counts: - lines.append("") - lines.append(" " + c.label("By Call Type")) - lines.append(light_line) - call_cols = [("left", 4, "#"), ("left", 16, "Call type"), ("right", 8, "Calls")] - lines.append(" " + _row(c, call_cols) + " " + c.dim("Share")) - lines.append(light_line) + lines += [ + "", + " " + _color("1", "By Call Type", color), + light_line, + f" {'#':<4} {'Call type':<16} {'Calls':>8} Share", + light_line, + ] top = sorted(summary.call_type_counts.items(), key=lambda kv: -kv[1]) - total = max(1, sum(summary.call_type_counts.values())) - max_bar = 16 + total = sum(summary.call_type_counts.values()) for i, (call_type, count) in enumerate(top, start=1): share = count / total - filled = max(1, round(share * max_bar)) if share > 0 else 0 - bar = c.good("█" * filled) + c.dim("░" * (max_bar - filled)) - calls_str = c.num(_format_calls(count)) - share_str = c.dim(f"{share * 100:>4.0f}%") - data_cols = [("left", 4, c.dim(f"{i}.")), ("left", 16, call_type), ("right", 8, calls_str)] - lines.append(" " + _row(c, data_cols) + " " + bar + " " + share_str) + filled = max(1, round(share * 16)) + bar = _color("32", "█" * filled, color) + _color("38;5;244", "░" * (16 - filled), color) + rank = f"{i}." + lines.append( + f" {_color('38;5;244', f'{rank:<4}', color)} {call_type:<16} " + f"{_color('1;33', f'{_format_calls(count):>8}', color)} {bar} " + f"{_color('38;5;244', f'{share * 100:>4.0f}%', color)}" + ) lines.append(heavy_line) lines.append("") diff --git a/tests/test_stats.py b/tests/test_stats.py index 093a963a..1b6a776a 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -7,7 +7,7 @@ import pytest from semble.cli import _cli_main -from semble.stats import build_savings_summary, format_savings_report, save_search_stats +from semble.stats import _use_color, build_savings_summary, format_savings_report, save_search_stats from semble.types import CallType, SearchResult from tests.conftest import make_chunk @@ -49,10 +49,34 @@ def test_savings_no_file(tmp_path: Path) -> None: def test_savings_output(sample_stats_file: Path) -> None: - """format_savings_report displays period buckets and a call-type table.""" + """format_savings_report displays aligned period and call-type tables.""" result = format_savings_report(path=sample_stats_file) - for s in ["Savings", "Today", "By Period", "By Call Type", "search", "find_related"]: - assert s in result + lines = result.splitlines() + border_width = len(next(line for line in lines if "═" in line)) + period_line = next(line for line in lines if line.lstrip().startswith("Today")) + call_type_line = next(line for line in lines if line.lstrip().startswith("1.")) + + assert "\033[" not in result + assert period_line.split() == ["Today", "2", "~9.5k", "tokens", "███████████████████████░", "95%"] + assert call_type_line.split() == ["1.", "search", "1", "████████░░░░░░░░", "50%"] + assert all(len(line) <= border_width for line in lines if "tokens █" in line) + + +def test_savings_output_uses_color_in_tty(sample_stats_file: Path) -> None: + """format_savings_report colors the presentation when supported.""" + with patch("semble.stats._use_color", return_value=True): + result = format_savings_report(path=sample_stats_file) + + assert "\033[1;36mSemble Token Savings\033[0m" in result + assert "\033[32m███████████████████████\033[0m" in result + assert "\033[1;33m ~9.5k tokens\033[0m" in result + + +def test_no_color_disables_color_when_empty(monkeypatch: pytest.MonkeyPatch) -> None: + """NO_COLOR disables colors regardless of its value.""" + monkeypatch.setenv("NO_COLOR", "") + with patch("semble.stats.sys.stdout.isatty", return_value=True): + assert not _use_color() def test_savings_output_millions(tmp_path: Path) -> None: diff --git a/uv.lock b/uv.lock index d0db5aa6..63f2277d 100644 --- a/uv.lock +++ b/uv.lock @@ -10,7 +10,7 @@ resolution-markers = [ [options] exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for backwards compatibility when using relative exclude-newer values. -exclude-newer-span = "P3D" +exclude-newer-span = "P1W" [[package]] name = "annotated-doc" From 494bf4c4cd59d635c8ba0cc710a9358007012549 Mon Sep 17 00:00:00 2001 From: Pringled Date: Fri, 12 Jun 2026 12:34:35 +0200 Subject: [PATCH 4/6] Update tests --- tests/test_stats.py | 39 +++++++++++---------------------------- 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/tests/test_stats.py b/tests/test_stats.py index 1b6a776a..85de80b9 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -48,30 +48,6 @@ def test_savings_no_file(tmp_path: Path) -> None: assert "No stats yet" in format_savings_report(path=tmp_path / "nonexistent.jsonl") -def test_savings_output(sample_stats_file: Path) -> None: - """format_savings_report displays aligned period and call-type tables.""" - result = format_savings_report(path=sample_stats_file) - lines = result.splitlines() - border_width = len(next(line for line in lines if "═" in line)) - period_line = next(line for line in lines if line.lstrip().startswith("Today")) - call_type_line = next(line for line in lines if line.lstrip().startswith("1.")) - - assert "\033[" not in result - assert period_line.split() == ["Today", "2", "~9.5k", "tokens", "███████████████████████░", "95%"] - assert call_type_line.split() == ["1.", "search", "1", "████████░░░░░░░░", "50%"] - assert all(len(line) <= border_width for line in lines if "tokens █" in line) - - -def test_savings_output_uses_color_in_tty(sample_stats_file: Path) -> None: - """format_savings_report colors the presentation when supported.""" - with patch("semble.stats._use_color", return_value=True): - result = format_savings_report(path=sample_stats_file) - - assert "\033[1;36mSemble Token Savings\033[0m" in result - assert "\033[32m███████████████████████\033[0m" in result - assert "\033[1;33m ~9.5k tokens\033[0m" in result - - def test_no_color_disables_color_when_empty(monkeypatch: pytest.MonkeyPatch) -> None: """NO_COLOR disables colors regardless of its value.""" monkeypatch.setenv("NO_COLOR", "") @@ -79,13 +55,20 @@ def test_no_color_disables_color_when_empty(monkeypatch: pytest.MonkeyPatch) -> assert not _use_color() -def test_savings_output_millions(tmp_path: Path) -> None: - """Token counts >= 1M are formatted as M, not k.""" +@pytest.mark.parametrize( + ("file_chars", "expected"), + [ + (40_000, "~10.0k tokens"), + (4_000_000, "~1.0M tokens"), + ], +) +def test_savings_output_token_suffixes(tmp_path: Path, file_chars: int, expected: str) -> None: + """Token counts use the expected suffix formatting.""" stats_file = tmp_path / "stats.jsonl" stats_file.write_text( - _make_stats_record(datetime.now(timezone.utc).timestamp(), snippet_chars=0, file_chars=4_000_000) + "\n" + _make_stats_record(datetime.now(timezone.utc).timestamp(), snippet_chars=0, file_chars=file_chars) + "\n" ) - assert "M tokens" in format_savings_report(path=stats_file) + assert expected in format_savings_report(path=stats_file) def test_savings_do_not_subtract_unknown_baselines(tmp_path: Path) -> None: From 55e8db26a7aa3f10c8207691c7461c1bafdc55db Mon Sep 17 00:00:00 2001 From: Pringled Date: Fri, 12 Jun 2026 12:45:02 +0200 Subject: [PATCH 5/6] Remove dead code --- tests/test_stats.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tests/test_stats.py b/tests/test_stats.py index 85de80b9..ddd61e19 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -16,17 +16,6 @@ def _make_stats_record(ts: float, call: str = "search", snippet_chars: int = 1_0 return json.dumps({"ts": ts, "call": call, "results": 3, "snippet_chars": snippet_chars, "file_chars": file_chars}) -@pytest.fixture -def sample_stats_file(tmp_path: Path) -> Path: - """Stats file with one search and one find_related record from today.""" - stats_file = tmp_path / "stats.jsonl" - now = datetime.now(timezone.utc).timestamp() - stats_file.write_text( - _make_stats_record(now, call="search") + "\n" + _make_stats_record(now, call="find_related") + "\n" - ) - return stats_file - - def test_save_search_stats(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: """save_search_stats deduplicates file paths and silences write errors.""" chunk = make_chunk("hello", "src/foo.py") From c08ba43b8d1ede14972e2d832c7b7fbf3ee56583 Mon Sep 17 00:00:00 2001 From: Pringled Date: Fri, 12 Jun 2026 14:36:21 +0200 Subject: [PATCH 6/6] Bump version --- src/semble/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/semble/version.py b/src/semble/version.py index 9bfefb0c..f0768802 100644 --- a/src/semble/version.py +++ b/src/semble/version.py @@ -1,2 +1,2 @@ -__version_triple__ = (0, 3, 3) +__version_triple__ = (0, 3, 4) __version__ = ".".join(map(str, __version_triple__))