Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,20 +120,35 @@ Semble also always skips a set of well-known non-source directories regardless o
`semble savings` shows how many tokens semble has saved across all your searches:

```bash
semble savings # summary by period
semble savings --verbose # also show breakdown by call type
semble savings
```

```
Semble Token Savings
════════════════════════════════════════════════════════════════
Period Calls Savings
────────────────────────────────────────────────────────────────
Today 42 [███████████████░] ~58.4k tokens (95%)
Last 7 days 287 [██████████████░░] ~312.4k tokens (90%)
All time 1.4k [██████████████░░] ~1.2M tokens (89%)
════════════════════════════════════════════════════════════════════════
Total saved: ~714.2M tokens (94%)
Total calls: 14.3k
Efficiency: ███████████████████████░ 94%
By Period
────────────────────────────────────────────────────────────────────────
Period Calls Saved Ratio
────────────────────────────────────────────────────────────────────────
Today 198 ~1.4M tokens ███████████████████████░ 95%
Last 7 days 13.1k ~707.2M tokens ███████████████████████░ 94%
All time 14.3k ~714.2M tokens ███████████████████████░ 94%
By Call Type
────────────────────────────────────────────────────────────────────────
# Call type Calls Share
────────────────────────────────────────────────────────────────────────
1. search 14.1k ████████████████ 99%
2. find_related 205 █░░░░░░░░░░░░░░░ 1%
════════════════════════════════════════════════════════════════════════
```


Savings are calculated as follows: for each call, semble records the total character count of the unique files containing returned chunks and the character count of the snippets returned. Estimated tokens saved is `(file chars − snippet chars) / 4` (4 chars per token). This is a conservative estimate: the baseline is reading matched files in full, which is how coding agents often explore unfamiliar code.

</details>
Expand Down
5 changes: 2 additions & 3 deletions src/semble/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,16 +187,15 @@ def _cli_main() -> None:
related_p.add_argument("-k", "--top-k", type=int, default=5, help="Number of results (default: 5).")
_add_content_args(related_p)

savings_p = sub.add_parser("savings", help="Show token savings and usage stats.")
savings_p.add_argument("--verbose", action="store_true", help="Also show usage breakdown by call type.")
sub.add_parser("savings", help="Show token savings and usage stats.")

sub.add_parser("install", help="Interactively configure semble across coding agents.")
sub.add_parser("uninstall", help="Interactively remove semble configuration from coding agents.")

args = parser.parse_args()

if args.command == "savings":
print(format_savings_report(verbose=args.verbose))
print(format_savings_report())
elif args.command in ("install", "uninstall"):
from semble.installer import run

Expand Down
114 changes: 90 additions & 24 deletions src/semble/stats.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
import logging
import os
import sys
from collections import defaultdict
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
Expand All @@ -16,6 +18,16 @@ def _get_stats_file() -> Path:
return resolve_cache_folder() / "savings.jsonl"


def _use_color() -> bool:
"""Return whether ANSI colors should be emitted."""
return "NO_COLOR" not in os.environ and os.environ.get("TERM") != "dumb" and sys.stdout.isatty()


def _color(code: str, text: str, enabled: bool) -> str:
"""Apply an ANSI color code when enabled."""
return f"\033[{code}m{text}\033[0m" if enabled else text


@dataclass
class BucketStats:
calls: int = 0
Expand Down Expand Up @@ -102,47 +114,101 @@ def build_savings_summary(path: Path | None = None) -> SavingsSummary:
return SavingsSummary(buckets=buckets, call_type_counts=dict(call_type_counts))


def format_savings_report(path: Path | None = None, *, verbose: bool = False) -> str:
def _format_token_count(tokens: int) -> str:
"""Format a token count with k/M suffix, keeping the ~ prefix for estimates."""
if tokens >= 1_000_000:
return f"~{tokens / 1_000_000:.1f}M"
if tokens >= 1_000:
return f"~{tokens / 1_000:.1f}k"
return f"~{tokens}"


def _format_calls(calls: int) -> str:
"""Format a call count with k suffix for thousands."""
return f"{calls / 1_000:.1f}k" if calls >= 1_000 else str(calls)


def _color_ratio(pct: int, enabled: bool) -> str:
"""Color a savings percentage according to its value."""
code = "32" if pct >= 80 else "33" if pct >= 50 else "31"
return _color(code, f"{pct}%", enabled)


def format_savings_report(path: Path | None = None) -> str:
"""Return a formatted token-savings report."""
if path is None:
path = _get_stats_file()
if not path.exists():
return "No stats yet. Run a search first."

summary = build_savings_summary(path)
bar_width = 16
heavy_line = " " + "═" * 64
light_line = " " + "─" * 64
color = _use_color()
bar_width = 24
border_width = 72
heavy_line = " " + _color("38;5;244", "═" * border_width, color)
light_line = " " + _color("38;5;244", "─" * border_width, color)

all_time = summary.buckets["All time"]
total_saved_tokens = all_time.saved_chars // 4
overall_pct = round(all_time.saved_chars / all_time.file_chars * 100) if all_time.file_chars else 0
efficiency_filled = round(overall_pct / 100 * bar_width)
efficiency_bar = _color("32", "█" * efficiency_filled, color)
efficiency_bar += _color("38;5;244", "░" * (bar_width - efficiency_filled), color)

lines = [
"",
" Semble Token Savings",
" " + _color("1;36", "Semble Token Savings", color),
heavy_line,
f" {'Period':<12} {'Calls':<6} Savings",
"",
f" {_color('1', 'Total saved:', color)} "
f"{_color('1;33', _format_token_count(total_saved_tokens) + ' tokens', color)} "
f"({_color_ratio(overall_pct, color)})",
f" {_color('1', 'Total calls:', color)} {_color('1;33', _format_calls(all_time.calls), color)}",
f" {_color('1', 'Efficiency:', color)} {efficiency_bar} {_color_ratio(overall_pct, color)}",
"",
" " + _color("1", "By Period", color),
light_line,
f" {'Period':<14} {'Calls':>8} {'Saved':>14} Ratio",
light_line,
]
for label, bucket in summary.buckets.items():
saved_tokens = bucket.saved_chars // 4 # standard ~4 chars/token approximation
if saved_tokens >= 1_000_000:
saved_str = f"~{saved_tokens / 1_000_000:.1f}M"
elif saved_tokens >= 1000:
saved_str = f"~{saved_tokens / 1000:.1f}k"
else:
saved_str = f"~{saved_tokens}"
calls_str = f"{bucket.calls / 1000:.1f}k" if bucket.calls >= 1000 else str(bucket.calls)
saved_tokens = bucket.saved_chars // 4
saved_str = _format_token_count(saved_tokens) + " tokens"
calls_str = _format_calls(bucket.calls)
if bucket.file_chars > 0:
ratio = bucket.saved_chars / bucket.file_chars
filled = round(ratio * bar_width)
bar = "█" * filled + "░" * (bar_width - filled)
pct = round(ratio * 100)
lines.append(f" {label:<12} {calls_str:<6} [{bar}] {saved_str} tokens ({pct}%)")
row_bar = _color("32", "█" * filled, color) + _color("38;5;244", "░" * (bar_width - filled), color)
ratio_str = _color_ratio(round(ratio * 100), color)
else:
lines.append(f" {label:<12} {calls_str:<6} [{'░' * bar_width}] {saved_str} tokens")
if verbose and summary.call_type_counts:
lines += ["", " Usage Breakdown", light_line, f" {'Call type':<16} Calls"]
for call_type, count in sorted(summary.call_type_counts.items()):
count_str = f"{count / 1000:.1f}k" if count >= 1000 else str(count)
lines.append(f" {call_type:<16} {count_str}")
lines.append(heavy_line)
row_bar = _color("38;5;244", "░" * bar_width, color)
ratio_str = _color("38;5;244", "–", color)
lines.append(
f" {_color('1', f'{label:<14}', color)} {_color('1;33', f'{calls_str:>8}', color)} "
f"{_color('1;33', f'{saved_str:>14}', color)} {row_bar} {ratio_str}"
)

if summary.call_type_counts:
lines += [
"",
" " + _color("1", "By Call Type", color),
light_line,
f" {'#':<4} {'Call type':<16} {'Calls':>8} Share",
light_line,
]
top = sorted(summary.call_type_counts.items(), key=lambda kv: -kv[1])
total = sum(summary.call_type_counts.values())
for i, (call_type, count) in enumerate(top, start=1):
share = count / total
filled = max(1, round(share * 16))
bar = _color("32", "█" * filled, color) + _color("38;5;244", "░" * (16 - filled), color)
rank = f"{i}."
lines.append(
f" {_color('38;5;244', f'{rank:<4}', color)} {call_type:<16} "
f"{_color('1;33', f'{_format_calls(count):>8}', color)} {bar} "
f"{_color('38;5;244', f'{share * 100:>4.0f}%', color)}"
)

lines.append(heavy_line)
lines.append("")
return "\n".join(lines)
2 changes: 1 addition & 1 deletion src/semble/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version_triple__ = (0, 3, 3)
__version_triple__ = (0, 3, 4)
__version__ = ".".join(map(str, __version_triple__))
58 changes: 19 additions & 39 deletions tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pytest

from semble.cli import _cli_main
from semble.stats import build_savings_summary, format_savings_report, save_search_stats
from semble.stats import _use_color, build_savings_summary, format_savings_report, save_search_stats
from semble.types import CallType, SearchResult
from tests.conftest import make_chunk

Expand All @@ -16,17 +16,6 @@ def _make_stats_record(ts: float, call: str = "search", snippet_chars: int = 1_0
return json.dumps({"ts": ts, "call": call, "results": 3, "snippet_chars": snippet_chars, "file_chars": file_chars})


@pytest.fixture
def sample_stats_file(tmp_path: Path) -> Path:
"""Stats file with one search and one find_related record from today."""
stats_file = tmp_path / "stats.jsonl"
now = datetime.now(timezone.utc).timestamp()
stats_file.write_text(
_make_stats_record(now, call="search") + "\n" + _make_stats_record(now, call="find_related") + "\n"
)
return stats_file


def test_save_search_stats(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
"""save_search_stats deduplicates file paths and silences write errors."""
chunk = make_chunk("hello", "src/foo.py")
Expand All @@ -48,28 +37,27 @@ def test_savings_no_file(tmp_path: Path) -> None:
assert "No stats yet" in format_savings_report(path=tmp_path / "nonexistent.jsonl")


def test_no_color_disables_color_when_empty(monkeypatch: pytest.MonkeyPatch) -> None:
"""NO_COLOR disables colors regardless of its value."""
monkeypatch.setenv("NO_COLOR", "")
with patch("semble.stats.sys.stdout.isatty", return_value=True):
assert not _use_color()


@pytest.mark.parametrize(
("verbose", "expected"),
("file_chars", "expected"),
[
(False, ["Savings", "Today"]),
(True, ["Savings", "Today", "Usage Breakdown", "search", "find_related"]),
(40_000, "~10.0k tokens"),
(4_000_000, "~1.0M tokens"),
],
ids=["default", "verbose"],
)
def test_savings_output(sample_stats_file: Path, verbose: bool, expected: list[str]) -> None:
"""format_savings_report displays period buckets; --verbose adds call-type breakdown."""
result = format_savings_report(path=sample_stats_file, verbose=verbose)
for s in expected:
assert s in result


def test_savings_output_millions(tmp_path: Path) -> None:
"""Token counts >= 1M are formatted as M, not k."""
def test_savings_output_token_suffixes(tmp_path: Path, file_chars: int, expected: str) -> None:
"""Token counts use the expected suffix formatting."""
stats_file = tmp_path / "stats.jsonl"
stats_file.write_text(
_make_stats_record(datetime.now(timezone.utc).timestamp(), snippet_chars=0, file_chars=4_000_000) + "\n"
_make_stats_record(datetime.now(timezone.utc).timestamp(), snippet_chars=0, file_chars=file_chars) + "\n"
)
assert "M tokens" in format_savings_report(path=stats_file)
assert expected in format_savings_report(path=stats_file)


def test_savings_do_not_subtract_unknown_baselines(tmp_path: Path) -> None:
Expand Down Expand Up @@ -100,22 +88,14 @@ def test_savings_tolerates_bad_json(tmp_path: Path) -> None:
assert "Savings" in format_savings_report(path=stats_file)


@pytest.mark.parametrize(
("argv", "expected"),
[
(["semble", "savings"], "No stats yet"),
(["semble", "savings", "--verbose"], "No stats yet"),
],
ids=["default", "verbose"],
)
def test_savings_cli_dispatch(
argv: list[str], expected: str, tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
) -> None:
"""Savings subcommand dispatches to format_savings_report, with and without --verbose."""
monkeypatch.setattr(sys, "argv", argv)
"""Savings subcommand dispatches to format_savings_report."""
monkeypatch.setattr(sys, "argv", ["semble", "savings"])
monkeypatch.setattr("semble.stats._get_stats_file", lambda: tmp_path / "nonexistent.jsonl")
_cli_main()
assert expected in capsys.readouterr().out
assert "No stats yet" in capsys.readouterr().out


def test_savings_buckets_exclude_old_records(tmp_path: Path) -> None:
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading