diff --git a/.dockerignore b/.dockerignore index 363eff4..4bb65aa 100644 --- a/.dockerignore +++ b/.dockerignore @@ -6,7 +6,8 @@ __pycache__ *.pyc *.pyo data/ -tests/ +# tests/ must stay in the build context: Dockerfile `test` / `test-builder` targets +# copy them for CI in-container pytest (production stages never COPY tests into the image). deploy/ coverage.xml .coverage diff --git a/.env.example b/.env.example index 3a14286..1db9e7b 100644 --- a/.env.example +++ b/.env.example @@ -1,30 +1,30 @@ -# Slack credentials (required) +# --- Required credentials --- +# Slack (required for production; tests set _PAPERSCOUT_TESTING=1 to skip validation) SLACK_SIGNING_SECRET=your-signing-secret SLACK_BOT_TOKEN=xoxb-your-bot-token -# Server +# PostgreSQL (required) — shared PostgreSQL on the host. +# When running in Docker, use host.docker.internal to reach the host: +DATABASE_URL=postgresql://paperscout:secret@host.docker.internal:5432/paperscout + +# --- Server (optional) --- PORT=3000 HEALTH_PORT=8080 # Bind address for GET /health (127.0.0.1 = localhost only). Docker Compose sets HEALTH_BIND_HOST=0.0.0.0. HEALTH_BIND_HOST=127.0.0.1 -# Database (required) — shared PostgreSQL on the host. -# When running in Docker, use host.docker.internal to reach the host: -DATABASE_URL=postgresql://paperscout:secret@host.docker.internal:5432/paperscout - -# Scheduling +# --- Scheduling (optional) --- POLL_INTERVAL_MINUTES=30 # Min sleep after an overrun cycle (poll took > poll_interval_minutes). POLL_OVERRUN_COOLDOWN_SECONDS=300 ENABLE_BULK_WG21=true -ENABLE_BULK_OPENSTD=true ENABLE_ISO_PROBE=true -# Probe prefixes / extensions (used for gap/unknown numbers) +# --- Probe prefixes / extensions (optional) --- PROBE_PREFIXES=["D","P"] PROBE_EXTENSIONS=[".pdf",".html"] -# Frontier +# --- Frontier (optional) --- FRONTIER_WINDOW_ABOVE=60 FRONTIER_WINDOW_BELOW=30 FRONTIER_EXPLICIT_RANGES=[] @@ -32,13 +32,13 @@ FRONTIER_EXPLICIT_RANGES=[] # (filters pre-assigned far-future numbers like P5000 from the frontier). FRONTIER_GAP_THRESHOLD=50 -# Hot probing (every poll cycle) +# --- Hot probing (optional) --- # Papers with a date within this many months are probed every 30 min. HOT_LOOKBACK_MONTHS=6 # Revisions ahead of the known latest to probe for hot papers. HOT_REVISION_DEPTH=2 -# Cold probing (distributed across cold_cycle_divisor cycles ≈ once per day) +# --- Cold probing (optional) --- COLD_REVISION_DEPTH=1 # 48 × 30 min = 24 h; every P-number gets probed once within this window. COLD_CYCLE_DIVISOR=48 @@ -46,27 +46,30 @@ COLD_CYCLE_DIVISOR=48 # Gap / unknown numbers (no index entry): probe R0 through this revision. GAP_MAX_REV=1 -# Timestamp-based alerting +# --- Timestamp-based alerting (optional) --- # Only notify for drafts where the server's Last-Modified header is within # this many hours of now. Falls back to "alert" when the header is absent. ALERT_MODIFIED_HOURS=24 -# HTTP client +# --- HTTP client (optional) --- HTTP_CONCURRENCY=20 HTTP_TIMEOUT_SECONDS=10 HTTP_USE_HTTP2=true -# Notifications +# --- Notifications (optional) --- +# Slack channel ID for draft notifications; empty = channel posts disabled. NOTIFICATION_CHANNEL= +# Slack channel ID for ops alerts (stale poll); empty = disabled. +OPS_ALERT_CHANNEL= NOTIFY_ON_FRONTIER_HIT=true NOTIFY_ON_ANY_DRAFT=true NOTIFY_ON_DP_TRANSITION=true -# Storage +# --- Storage (optional) --- DATA_DIR=./data CACHE_TTL_HOURS=1 -# Logging +# --- Logging (optional) --- # Log level for both console and file (DEBUG|INFO|WARNING|ERROR). LOG_LEVEL=INFO LOG_RETENTION_DAYS=7 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6aed6e7..201ac8a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,7 +16,9 @@ jobs: name: Lockfile (uv) runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Install uv uses: astral-sh/setup-uv@v7 @@ -31,7 +33,9 @@ jobs: runs-on: ubuntu-latest needs: lockfile steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Install uv uses: astral-sh/setup-uv@v7 @@ -61,7 +65,9 @@ jobs: python-version: ["3.10", "3.11", "3.12"] steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Install uv uses: astral-sh/setup-uv@v7 @@ -98,7 +104,9 @@ jobs: runs-on: ubuntu-latest needs: lockfile steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Install uv uses: astral-sh/setup-uv@v7 @@ -111,3 +119,25 @@ jobs: - name: Run probe benchmark (mock HTTP) run: uv run pytest benchmarks/ -m benchmark -v --tb=short + + docker: + name: Docker image (build + tests in container) + runs-on: ubuntu-latest + needs: lockfile + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false + + - name: Build test image + run: docker build --target test -t paperscout:ci . + + - name: Run tests in container + run: | + docker run --rm --entrypoint python \ + -e _PAPERSCOUT_TESTING=1 \ + -e SLACK_BOT_TOKEN=xoxb-ci-placeholder \ + -e SLACK_SIGNING_SECRET=ci-placeholder-secret \ + -e COVERAGE_FILE=/tmp/.coverage \ + paperscout:ci \ + -m pytest tests/ -q --cov=paperscout --cov-fail-under=90 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b55e701..320a0f7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -51,6 +51,41 @@ Runtime and dev dependencies are pinned in **`uv.lock`**, generated from [`pypro **To verify locally before pushing:** `uv lock --check` +### Docker image rebuild + +Production images install from [`uv.lock`](uv.lock) via `uv sync --frozen` in the multi-stage [`Dockerfile`](Dockerfile) (not a floating `pip install .`). The base `python:3.12-slim` image is pinned by digest in the Dockerfile. + +**After changing dependencies** (`pyproject.toml` / `uv.lock`): + +1. Commit the updated lockfile. +2. Rebuild: `docker compose build --no-cache` or `docker build --target production -t paperscout:production .` + +**When upgrading the Python base image:** + +```bash +docker pull python:3.12-slim +docker inspect --format='{{index .RepoDigests 0}}' python:3.12-slim +``` + +Update both `FROM` lines in the Dockerfile with the new digest, then rebuild. + +**Verify tests inside the image** (no live Postgres required): + +```bash +docker build --target test -t paperscout:test . +docker run --rm --entrypoint python \ + -e _PAPERSCOUT_TESTING=1 \ + -e SLACK_BOT_TOKEN=xoxb-test \ + -e SLACK_SIGNING_SECRET=test-secret \ + -e COVERAGE_FILE=/tmp/.coverage \ + paperscout:test \ + -m pytest tests/ -q --cov=paperscout --cov-fail-under=90 +``` + +Production deploys use the default image target (runtime only, no dev dependencies). + +See also [deploy/SERVER_SETUP.md](deploy/SERVER_SETUP.md) for operator rebuild steps on the server. + ### Tests and coverage ```bash diff --git a/Dockerfile b/Dockerfile index 98566fb..c223da3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,6 @@ # Reproducible install: dependency graph from uv.lock (--frozen) with bytecode compile. -FROM python:3.12-slim AS builder +# Update python digest when intentionally upgrading the base image (see CONTRIBUTING.md). +FROM python:3.12-slim@sha256:401f6e1a67dad31a1bd78e9ad22d0ee0a3b52154e6bd30e90be696bb6a3d7461 AS builder RUN apt-get update && apt-get install -y --no-install-recommends \ gcc libpq-dev \ @@ -14,8 +15,13 @@ COPY src/ src/ ENV UV_COMPILE_BYTECODE=1 RUN uv sync --frozen --no-dev --no-editable +# Dev deps for CI in-container pytest (target: test). +FROM builder AS test-builder +COPY tests/ tests/ +RUN uv sync --frozen --extra dev --no-editable -FROM python:3.12-slim + +FROM python:3.12-slim@sha256:401f6e1a67dad31a1bd78e9ad22d0ee0a3b52154e6bd30e90be696bb6a3d7461 AS runtime RUN apt-get update && apt-get install -y --no-install-recommends \ libpq5 \ @@ -37,3 +43,20 @@ USER paperscout EXPOSE 3000 8080 ENTRYPOINT ["python", "-m", "paperscout"] + + +FROM runtime AS test + +USER root +COPY --from=test-builder /build/.venv /app/.venv +COPY --from=test-builder /build/tests /app/tests +# pytest / coverage read [tool.pytest] and [tool.coverage] from pyproject.toml +COPY --from=test-builder /build/pyproject.toml /app/pyproject.toml +RUN chown -R paperscout:paperscout /app +USER paperscout +ENV COVERAGE_FILE=/tmp/.coverage +# CI runs pytest via `docker run … python -m pytest`; do not inherit paperscout ENTRYPOINT. +ENTRYPOINT [] + +# Default image for production (must remain the final stage). +FROM runtime AS production diff --git a/README.md b/README.md index be3bbc0..cc821ab 100644 --- a/README.md +++ b/README.md @@ -242,7 +242,6 @@ All parameters are configurable via environment variables or a `.env` file. See | `POLL_INTERVAL_MINUTES` | `30` | Main polling cycle interval | | `POLL_OVERRUN_COOLDOWN_SECONDS` | `300` | Minimum sleep after a poll cycle that overran the interval (avoids tight loops when work or errors stretch a cycle) | | `ENABLE_BULK_WG21` | `true` | Fetch wg21.link/index.json each cycle | -| `ENABLE_BULK_OPENSTD` | `true` | Reserved for open-std.org scraping (not yet scheduled) | | `ENABLE_ISO_PROBE` | `true` | Run isocpp.org HEAD probing each cycle | ### Probe Prefixes / Extensions diff --git a/deploy/SERVER_SETUP.md b/deploy/SERVER_SETUP.md index c54229e..17a5058 100644 --- a/deploy/SERVER_SETUP.md +++ b/deploy/SERVER_SETUP.md @@ -226,6 +226,12 @@ curl -sf http://localhost:9101/health | python3 -m json.tool docker compose logs -f paperscout ``` +### Rebuilding after updates + +Production deploys typically run `git pull` followed by `docker compose up -d --build paperscout` (see CD workflow). Dependencies are frozen in `uv.lock` and installed with `uv sync --frozen` during the image build; the base Python image is digest-pinned in the Dockerfile. + +When upgrading dependencies or the base image locally, see **Docker image rebuild** in [CONTRIBUTING.md](../CONTRIBUTING.md). + ### Example: staging-style host If you use a **separate** staging deployment (second clone path and GitHub Environment `staging`), typical placeholders are: diff --git a/docker-compose.yml b/docker-compose.yml index 28f450d..c4c7386 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,9 @@ +# Image built from digest-pinned Dockerfile + frozen uv.lock (see CONTRIBUTING.md). services: paperscout: - build: . + build: + context: . + target: production ports: - "127.0.0.1:9100:3000" - "127.0.0.1:9101:8080" diff --git a/docs/handoff.md b/docs/handoff.md index 100c92e..c862a6a 100644 --- a/docs/handoff.md +++ b/docs/handoff.md @@ -49,7 +49,7 @@ Outbound Slack messages go through a **background queue** (see [`scout.py`](../s ## Open TODOs and deferred items -- **`ENABLE_BULK_OPENSTD` / open-std.org** — Code paths exist in [`sources.py`](../src/paperscout/sources.py); bulk open-std scheduling is **not** integrated into the main poll loop yet (README notes “not yet scheduled”). +- **open-std.org bulk fetch** — Code paths exist in [`sources.py`](../src/paperscout/sources.py); bulk open-std scheduling is **not** integrated into the main poll loop yet (no env toggle). - **Eval / roadmap items** — If your org keeps a separate eval or ticket backlog, link it here; this repo does not ship a frozen “eval” document. ## Related documents diff --git a/docs/onboarding.md b/docs/onboarding.md index 3a03c35..4a025d6 100644 --- a/docs/onboarding.md +++ b/docs/onboarding.md @@ -147,7 +147,6 @@ Every key from [`.env.example`](../.env.example) is listed below. Names in `.env | `POLL_INTERVAL_MINUTES` | `30` | Target wall-clock spacing between poll cycles (see [Scheduling](#scheduling-asyncio-loop) below). | | `POLL_OVERRUN_COOLDOWN_SECONDS` | `300` | **Minimum** sleep after any cycle that ran longer than one interval — avoids hammering the network if a cycle overruns. | | `ENABLE_BULK_WG21` | `true` | Fetch and parse wg21.link index each cycle when enabled. | -| `ENABLE_BULK_OPENSTD` | `true` | Reserved for open-std.org bulk fetch (not yet wired into the scheduler). | | `ENABLE_ISO_PROBE` | `true` | Run isocpp.org HEAD probing each cycle when enabled. | ### Probe prefixes / extensions diff --git a/src/paperscout/__main__.py b/src/paperscout/__main__.py index e282f10..2303661 100644 --- a/src/paperscout/__main__.py +++ b/src/paperscout/__main__.py @@ -5,7 +5,6 @@ import asyncio import logging import logging.handlers -import os import sys import threading from datetime import datetime, timezone @@ -94,16 +93,6 @@ async def _async_main() -> None: log.error("DATABASE_URL is not set — cannot start") sys.exit(1) - if os.environ.get("_PAPERSCOUT_TESTING") != "1" and ( - not (settings.slack_bot_token or "").strip() - or not (settings.slack_signing_secret or "").strip() - ): - log.error( - "Slack is not configured: SLACK_BOT_TOKEN and SLACK_SIGNING_SECRET must be " - "non-empty — cannot start" - ) - sys.exit(1) - launch_time = datetime.now(timezone.utc) pool = init_pool(settings.database_url) diff --git a/src/paperscout/config.py b/src/paperscout/config.py index 057980d..ac83213 100644 --- a/src/paperscout/config.py +++ b/src/paperscout/config.py @@ -34,7 +34,6 @@ class Settings(BaseSettings): # poll_interval_minutes). Acts as a short cooldown before the next cycle. poll_overrun_cooldown_seconds: int = Field(default=300, ge=1) # 5 min enable_bulk_wg21: bool = True - enable_bulk_openstd: bool = True enable_iso_probe: bool = True # -- Paper prefixes / extensions (globals used for gap/unknown numbers) -- diff --git a/src/paperscout/db.py b/src/paperscout/db.py index 98068dd..02a627b 100644 --- a/src/paperscout/db.py +++ b/src/paperscout/db.py @@ -8,9 +8,6 @@ log = logging.getLogger(__name__) -# Module-level pool; set by __main__ before anything else runs. -pool: pg_pool.ThreadedConnectionPool | None = None - _DDL = """ CREATE TABLE IF NOT EXISTS paper_cache ( key TEXT PRIMARY KEY, diff --git a/tests/conftest.py b/tests/conftest.py index 5691682..a19f546 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -217,7 +217,6 @@ def make_test_settings(**overrides) -> Settings: poll_interval_minutes=30, poll_overrun_cooldown_seconds=300, enable_bulk_wg21=True, - enable_bulk_openstd=True, enable_iso_probe=True, probe_prefixes=["D", "P"], probe_extensions=[".pdf"],