diff --git a/README.md b/README.md index dabd599..e41c89e 100644 --- a/README.md +++ b/README.md @@ -120,20 +120,35 @@ Semble also always skips a set of well-known non-source directories regardless o `semble savings` shows how many tokens semble has saved across all your searches: ```bash -semble savings # summary by period -semble savings --verbose # also show breakdown by call type +semble savings ``` ``` Semble Token Savings - ════════════════════════════════════════════════════════════════ - Period Calls Savings - ──────────────────────────────────────────────────────────────── - Today 42 [███████████████░] ~58.4k tokens (95%) - Last 7 days 287 [██████████████░░] ~312.4k tokens (90%) - All time 1.4k [██████████████░░] ~1.2M tokens (89%) + ════════════════════════════════════════════════════════════════════════ + + Total saved: ~714.2M tokens (94%) + Total calls: 14.3k + Efficiency: ███████████████████████░ 94% + + By Period + ──────────────────────────────────────────────────────────────────────── + Period Calls Saved Ratio + ──────────────────────────────────────────────────────────────────────── + Today 198 ~1.4M tokens ███████████████████████░ 95% + Last 7 days 13.1k ~707.2M tokens ███████████████████████░ 94% + All time 14.3k ~714.2M tokens ███████████████████████░ 94% + + By Call Type + ──────────────────────────────────────────────────────────────────────── + # Call type Calls Share + ──────────────────────────────────────────────────────────────────────── + 1. search 14.1k ████████████████ 99% + 2. find_related 205 █░░░░░░░░░░░░░░░ 1% + ════════════════════════════════════════════════════════════════════════ ``` + Savings are calculated as follows: for each call, semble records the total character count of the unique files containing returned chunks and the character count of the snippets returned. Estimated tokens saved is `(file chars − snippet chars) / 4` (4 chars per token). This is a conservative estimate: the baseline is reading matched files in full, which is how coding agents often explore unfamiliar code. diff --git a/src/semble/cli.py b/src/semble/cli.py index 98254d1..5294269 100644 --- a/src/semble/cli.py +++ b/src/semble/cli.py @@ -187,8 +187,7 @@ def _cli_main() -> None: related_p.add_argument("-k", "--top-k", type=int, default=5, help="Number of results (default: 5).") _add_content_args(related_p) - savings_p = sub.add_parser("savings", help="Show token savings and usage stats.") - savings_p.add_argument("--verbose", action="store_true", help="Also show usage breakdown by call type.") + sub.add_parser("savings", help="Show token savings and usage stats.") sub.add_parser("install", help="Interactively configure semble across coding agents.") sub.add_parser("uninstall", help="Interactively remove semble configuration from coding agents.") @@ -196,7 +195,7 @@ def _cli_main() -> None: args = parser.parse_args() if args.command == "savings": - print(format_savings_report(verbose=args.verbose)) + print(format_savings_report()) elif args.command in ("install", "uninstall"): from semble.installer import run diff --git a/src/semble/stats.py b/src/semble/stats.py index bebc988..7e767eb 100644 --- a/src/semble/stats.py +++ b/src/semble/stats.py @@ -1,5 +1,7 @@ import json import logging +import os +import sys from collections import defaultdict from dataclasses import dataclass from datetime import datetime, timedelta, timezone @@ -16,6 +18,16 @@ def _get_stats_file() -> Path: return resolve_cache_folder() / "savings.jsonl" +def _use_color() -> bool: + """Return whether ANSI colors should be emitted.""" + return "NO_COLOR" not in os.environ and os.environ.get("TERM") != "dumb" and sys.stdout.isatty() + + +def _color(code: str, text: str, enabled: bool) -> str: + """Apply an ANSI color code when enabled.""" + return f"\033[{code}m{text}\033[0m" if enabled else text + + @dataclass class BucketStats: calls: int = 0 @@ -102,7 +114,27 @@ def build_savings_summary(path: Path | None = None) -> SavingsSummary: return SavingsSummary(buckets=buckets, call_type_counts=dict(call_type_counts)) -def format_savings_report(path: Path | None = None, *, verbose: bool = False) -> str: +def _format_token_count(tokens: int) -> str: + """Format a token count with k/M suffix, keeping the ~ prefix for estimates.""" + if tokens >= 1_000_000: + return f"~{tokens / 1_000_000:.1f}M" + if tokens >= 1_000: + return f"~{tokens / 1_000:.1f}k" + return f"~{tokens}" + + +def _format_calls(calls: int) -> str: + """Format a call count with k suffix for thousands.""" + return f"{calls / 1_000:.1f}k" if calls >= 1_000 else str(calls) + + +def _color_ratio(pct: int, enabled: bool) -> str: + """Color a savings percentage according to its value.""" + code = "32" if pct >= 80 else "33" if pct >= 50 else "31" + return _color(code, f"{pct}%", enabled) + + +def format_savings_report(path: Path | None = None) -> str: """Return a formatted token-savings report.""" if path is None: path = _get_stats_file() @@ -110,39 +142,73 @@ def format_savings_report(path: Path | None = None, *, verbose: bool = False) -> return "No stats yet. Run a search first." summary = build_savings_summary(path) - bar_width = 16 - heavy_line = " " + "═" * 64 - light_line = " " + "─" * 64 + color = _use_color() + bar_width = 24 + border_width = 72 + heavy_line = " " + _color("38;5;244", "═" * border_width, color) + light_line = " " + _color("38;5;244", "─" * border_width, color) + + all_time = summary.buckets["All time"] + total_saved_tokens = all_time.saved_chars // 4 + overall_pct = round(all_time.saved_chars / all_time.file_chars * 100) if all_time.file_chars else 0 + efficiency_filled = round(overall_pct / 100 * bar_width) + efficiency_bar = _color("32", "█" * efficiency_filled, color) + efficiency_bar += _color("38;5;244", "░" * (bar_width - efficiency_filled), color) lines = [ "", - " Semble Token Savings", + " " + _color("1;36", "Semble Token Savings", color), heavy_line, - f" {'Period':<12} {'Calls':<6} Savings", + "", + f" {_color('1', 'Total saved:', color)} " + f"{_color('1;33', _format_token_count(total_saved_tokens) + ' tokens', color)} " + f"({_color_ratio(overall_pct, color)})", + f" {_color('1', 'Total calls:', color)} {_color('1;33', _format_calls(all_time.calls), color)}", + f" {_color('1', 'Efficiency:', color)} {efficiency_bar} {_color_ratio(overall_pct, color)}", + "", + " " + _color("1", "By Period", color), + light_line, + f" {'Period':<14} {'Calls':>8} {'Saved':>14} Ratio", light_line, ] for label, bucket in summary.buckets.items(): - saved_tokens = bucket.saved_chars // 4 # standard ~4 chars/token approximation - if saved_tokens >= 1_000_000: - saved_str = f"~{saved_tokens / 1_000_000:.1f}M" - elif saved_tokens >= 1000: - saved_str = f"~{saved_tokens / 1000:.1f}k" - else: - saved_str = f"~{saved_tokens}" - calls_str = f"{bucket.calls / 1000:.1f}k" if bucket.calls >= 1000 else str(bucket.calls) + saved_tokens = bucket.saved_chars // 4 + saved_str = _format_token_count(saved_tokens) + " tokens" + calls_str = _format_calls(bucket.calls) if bucket.file_chars > 0: ratio = bucket.saved_chars / bucket.file_chars filled = round(ratio * bar_width) - bar = "█" * filled + "░" * (bar_width - filled) - pct = round(ratio * 100) - lines.append(f" {label:<12} {calls_str:<6} [{bar}] {saved_str} tokens ({pct}%)") + row_bar = _color("32", "█" * filled, color) + _color("38;5;244", "░" * (bar_width - filled), color) + ratio_str = _color_ratio(round(ratio * 100), color) else: - lines.append(f" {label:<12} {calls_str:<6} [{'░' * bar_width}] {saved_str} tokens") - if verbose and summary.call_type_counts: - lines += ["", " Usage Breakdown", light_line, f" {'Call type':<16} Calls"] - for call_type, count in sorted(summary.call_type_counts.items()): - count_str = f"{count / 1000:.1f}k" if count >= 1000 else str(count) - lines.append(f" {call_type:<16} {count_str}") - lines.append(heavy_line) + row_bar = _color("38;5;244", "░" * bar_width, color) + ratio_str = _color("38;5;244", "–", color) + lines.append( + f" {_color('1', f'{label:<14}', color)} {_color('1;33', f'{calls_str:>8}', color)} " + f"{_color('1;33', f'{saved_str:>14}', color)} {row_bar} {ratio_str}" + ) + + if summary.call_type_counts: + lines += [ + "", + " " + _color("1", "By Call Type", color), + light_line, + f" {'#':<4} {'Call type':<16} {'Calls':>8} Share", + light_line, + ] + top = sorted(summary.call_type_counts.items(), key=lambda kv: -kv[1]) + total = sum(summary.call_type_counts.values()) + for i, (call_type, count) in enumerate(top, start=1): + share = count / total + filled = max(1, round(share * 16)) + bar = _color("32", "█" * filled, color) + _color("38;5;244", "░" * (16 - filled), color) + rank = f"{i}." + lines.append( + f" {_color('38;5;244', f'{rank:<4}', color)} {call_type:<16} " + f"{_color('1;33', f'{_format_calls(count):>8}', color)} {bar} " + f"{_color('38;5;244', f'{share * 100:>4.0f}%', color)}" + ) + + lines.append(heavy_line) lines.append("") return "\n".join(lines) diff --git a/src/semble/version.py b/src/semble/version.py index 9bfefb0..f076880 100644 --- a/src/semble/version.py +++ b/src/semble/version.py @@ -1,2 +1,2 @@ -__version_triple__ = (0, 3, 3) +__version_triple__ = (0, 3, 4) __version__ = ".".join(map(str, __version_triple__)) diff --git a/tests/test_stats.py b/tests/test_stats.py index e3c1f32..ddd61e1 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -7,7 +7,7 @@ import pytest from semble.cli import _cli_main -from semble.stats import build_savings_summary, format_savings_report, save_search_stats +from semble.stats import _use_color, build_savings_summary, format_savings_report, save_search_stats from semble.types import CallType, SearchResult from tests.conftest import make_chunk @@ -16,17 +16,6 @@ def _make_stats_record(ts: float, call: str = "search", snippet_chars: int = 1_0 return json.dumps({"ts": ts, "call": call, "results": 3, "snippet_chars": snippet_chars, "file_chars": file_chars}) -@pytest.fixture -def sample_stats_file(tmp_path: Path) -> Path: - """Stats file with one search and one find_related record from today.""" - stats_file = tmp_path / "stats.jsonl" - now = datetime.now(timezone.utc).timestamp() - stats_file.write_text( - _make_stats_record(now, call="search") + "\n" + _make_stats_record(now, call="find_related") + "\n" - ) - return stats_file - - def test_save_search_stats(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: """save_search_stats deduplicates file paths and silences write errors.""" chunk = make_chunk("hello", "src/foo.py") @@ -48,28 +37,27 @@ def test_savings_no_file(tmp_path: Path) -> None: assert "No stats yet" in format_savings_report(path=tmp_path / "nonexistent.jsonl") +def test_no_color_disables_color_when_empty(monkeypatch: pytest.MonkeyPatch) -> None: + """NO_COLOR disables colors regardless of its value.""" + monkeypatch.setenv("NO_COLOR", "") + with patch("semble.stats.sys.stdout.isatty", return_value=True): + assert not _use_color() + + @pytest.mark.parametrize( - ("verbose", "expected"), + ("file_chars", "expected"), [ - (False, ["Savings", "Today"]), - (True, ["Savings", "Today", "Usage Breakdown", "search", "find_related"]), + (40_000, "~10.0k tokens"), + (4_000_000, "~1.0M tokens"), ], - ids=["default", "verbose"], ) -def test_savings_output(sample_stats_file: Path, verbose: bool, expected: list[str]) -> None: - """format_savings_report displays period buckets; --verbose adds call-type breakdown.""" - result = format_savings_report(path=sample_stats_file, verbose=verbose) - for s in expected: - assert s in result - - -def test_savings_output_millions(tmp_path: Path) -> None: - """Token counts >= 1M are formatted as M, not k.""" +def test_savings_output_token_suffixes(tmp_path: Path, file_chars: int, expected: str) -> None: + """Token counts use the expected suffix formatting.""" stats_file = tmp_path / "stats.jsonl" stats_file.write_text( - _make_stats_record(datetime.now(timezone.utc).timestamp(), snippet_chars=0, file_chars=4_000_000) + "\n" + _make_stats_record(datetime.now(timezone.utc).timestamp(), snippet_chars=0, file_chars=file_chars) + "\n" ) - assert "M tokens" in format_savings_report(path=stats_file) + assert expected in format_savings_report(path=stats_file) def test_savings_do_not_subtract_unknown_baselines(tmp_path: Path) -> None: @@ -100,22 +88,14 @@ def test_savings_tolerates_bad_json(tmp_path: Path) -> None: assert "Savings" in format_savings_report(path=stats_file) -@pytest.mark.parametrize( - ("argv", "expected"), - [ - (["semble", "savings"], "No stats yet"), - (["semble", "savings", "--verbose"], "No stats yet"), - ], - ids=["default", "verbose"], -) def test_savings_cli_dispatch( - argv: list[str], expected: str, tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] ) -> None: - """Savings subcommand dispatches to format_savings_report, with and without --verbose.""" - monkeypatch.setattr(sys, "argv", argv) + """Savings subcommand dispatches to format_savings_report.""" + monkeypatch.setattr(sys, "argv", ["semble", "savings"]) monkeypatch.setattr("semble.stats._get_stats_file", lambda: tmp_path / "nonexistent.jsonl") _cli_main() - assert expected in capsys.readouterr().out + assert "No stats yet" in capsys.readouterr().out def test_savings_buckets_exclude_old_records(tmp_path: Path) -> None: diff --git a/uv.lock b/uv.lock index d0db5aa..63f2277 100644 --- a/uv.lock +++ b/uv.lock @@ -10,7 +10,7 @@ resolution-markers = [ [options] exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for backwards compatibility when using relative exclude-newer values. -exclude-newer-span = "P3D" +exclude-newer-span = "P1W" [[package]] name = "annotated-doc"