latent-to · ai-hpc · Jun 8, 2026 · xavierlyu · Jun 9, 2026 · xavierlyu
@@ -0,0 +1,71 @@
+# Baseline Cache
+
+Pre-computed baseline eval results for skipping the 12-minute vLLM v0.22.0 container during local development and CI.
+
+## How it works
+
+`CACHEON_BASELINE_CACHE_DIR` (added to `validator/config.py`) enables persistent baseline caching:
+
+- On **first run** with a given `(block_hash, baseline_digest, PROMPT_ENGINE_VERSION)`:
+  - The vLLM baseline container runs as normal
+  - `BaselineCache` is serialized to `{cache_dir}/{cache_key}.json`
+- On **subsequent runs** with the same key: the JSON is loaded directly, skipping the container entirely (~12 min saved)
+
+## Cache key derivation
+
+```python
+raw = f"{block_hash}:{baseline_digest}:v{PROMPT_ENGINE_VERSION}:eval"
+cache_key = sha256(raw.encode()).hexdigest()[:16]
+```
+
+## File format
+
+Each `.json` file is the output of `BaselineCache.to_dict()`:
+
+```json
+{
+  "cache_key": "<16-char hex>",
+  "results": [
+    {
+      "tokens": ["▁Hello", "▁world", ...],
+      "top_logprobs": [[{"▁Hello": -0.12, ...}], ...],
+      "ttft_s": 0.195,
+      "throughput_tps": 92.3,
+      "output_tokens": 512,
+      "decode_elapsed_secs": [0.011, 0.012, ...]
+    },
+    ...
+  ]
+}
+```
+
+## Generating a cache file
+
+Run a full validator eval with `--baseline-cache-dir` pointed here:
+
+```bash
+python3 scripts/run_validator_eval.py \
+  --block-hash 0xdeadbeef1234567890abcdef \
+  --miner-image <your-image> \
+  --model-volume /path/to/Qwen2.5-72B-Instruct \
+  --gpu-count 8 \
+  --state-dir /tmp/state-eval \
+  --baseline-cache-dir /path/to/cacheon/tests/baseline_cache
+```
+
+The file `tests/baseline_cache/<cache_key>.json` will be written after the baseline completes.
+
+## Using the cache
+
+```bash
+export CACHEON_BASELINE_CACHE_DIR=/path/to/cacheon/tests/baseline_cache
+python3 scripts/run_validator_eval.py ...
+# OR
+python3 -m validator.gpu_eval  # reads CACHEON_BASELINE_CACHE_DIR from env
+```
+
+## Notes
+
+- The cache is block-hash-specific: different block hashes generate different prompts, so different cache files
+- For local dev/CI with a fixed test block hash, one cache file covers all test runs
+- The file is ~5-50 MB depending on `EVAL_PROMPT_COUNT` and `top_logprobs` depth
@@ -121,6 +121,14 @@ def _parse_preferred_gpu() -> str:
 SKIP_S3: bool = os.environ.get("CACHEON_SKIP_S3", "0") == "1"
 """When True, ``gpu_eval`` skips Hippius S3 download and upload (local pod testing)."""
 
+BASELINE_CACHE_DIR: str = os.environ.get("CACHEON_BASELINE_CACHE_DIR", "")
+"""Host directory for persisting baseline eval results (JSON).
+When set, gpu_eval saves BaselineCache to ``{dir}/{cache_key}.json`` after a
+fresh baseline run and loads from it on subsequent runs with the same
+block_hash + baseline_digest + PROMPT_ENGINE_VERSION — skipping the 12-min
+vLLM baseline container entirely.
+"""
+
 # --------------------------------------------------------------------------- #
 # Winner defender-advantage window
 # --------------------------------------------------------------------------- #

@@ -15,6 +15,7 @@
     CACHEON_BASELINE_DIGEST    (required)
     CACHEON_GPU_COUNT          (default: 8, auto-detect via nvidia-smi)
     CACHEON_VLLM_CACHE_DIR     (optional; host path for vLLM torch.compile cache)
+    CACHEON_BASELINE_CACHE_DIR (optional; host path for persisted baseline JSON cache)
     HIPPIUS_ACCESS_KEY         (required for S3 unless CACHEON_SKIP_S3=1)
     HIPPIUS_SECRET_KEY         (required for S3 unless CACHEON_SKIP_S3=1)
     CACHEON_SKIP_S3            (default: 0; set 1 for local pod testing without S3)
@@ -219,28 +220,67 @@ def main() -> int:
 
     update_progress(state_dir, phase="baseline_running", image=baseline_image)
     _upload_progress(state_dir)
-    try:
-        eval_baseline = run_baseline(
-            eval_prompts,
-            baseline_image=baseline_image,
-            baseline_digest=baseline_digest,
-            model_volume=model_volume,
-            gpu_count=gpu_count,
-            block_hash=block_hash,
-            evaluation_block=block,
-            state_dir=state_dir,
-        )
-    except Exception as exc:
-        logger.exception("Baseline failed: %s", exc)
-        update_progress(
-            state_dir,
-            phase="baseline_failed",
-            error=str(exc),
-            challengers_affected=len(eval_job.challengers),
-        )
-        _upload_state(state_dir)
-        _upload_progress(state_dir)
-        return 4
+
+    # ------------------------------------------------------------------
+    # Baseline cache: skip the 12-min vLLM container if a cached result
+    # exists for this (block_hash, baseline_digest, prompt_version) key.
+    # ------------------------------------------------------------------
+    import json as _json
+    from .baseline import BaselineCache, derive_cache_key
+
+    _baseline_cache_dir = validator_config.BASELINE_CACHE_DIR
+    _cache_key = derive_cache_key(block_hash, baseline_digest)
+    _cache_file = (
+        Path(_baseline_cache_dir) / f"{_cache_key}.json"
+        if _baseline_cache_dir
+        else None
+    )
+
+    eval_baseline: BaselineCache | None = None
+    if _cache_file and _cache_file.exists():
+        try:
+            eval_baseline = BaselineCache.from_dict(_json.loads(_cache_file.read_text()))
+            logger.info(
+                "Loaded baseline from cache: key=%s file=%s (%d prompts)",
+                _cache_key,
+                _cache_file,
+                len(eval_baseline.results),
+            )
+        except Exception as exc:
+            logger.warning("Baseline cache load failed (%s), re-running baseline", exc)
+            eval_baseline = None
+
+    if eval_baseline is None:
+        try:
+            eval_baseline = run_baseline(
+                eval_prompts,
+                baseline_image=baseline_image,
+                baseline_digest=baseline_digest,
+                model_volume=model_volume,
+                gpu_count=gpu_count,
+                block_hash=block_hash,
+                evaluation_block=block,
+                state_dir=state_dir,
+            )
+        except Exception as exc:
+            logger.exception("Baseline failed: %s", exc)
+            update_progress(
+                state_dir,
+                phase="baseline_failed",
+                error=str(exc),
+                challengers_affected=len(eval_job.challengers),
+            )
+            _upload_state(state_dir)
+            _upload_progress(state_dir)
+            return 4
+
+        if _cache_file:
+            try:
+                _cache_file.parent.mkdir(parents=True, exist_ok=True)
+                _cache_file.write_text(_json.dumps(eval_baseline.to_dict()))
+                logger.info("Saved baseline to cache: key=%s file=%s", _cache_key, _cache_file)
+            except Exception as exc:
+                logger.warning("Baseline cache save failed: %s", exc)
 
     update_progress(state_dir, phase="baseline_complete")
     _upload_state(state_dir)