dxworks · casianaoprut · Apr 2, 2026 · Apr 3, 2026 · Apr 7, 2026 · Apr 9, 2026
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -45,13 +45,18 @@ jobs:
         id: assets
         run: |
           set -euo pipefail
-          mkdir -p codeframe
+          mkdir -p codeframe/templates
           # Use constant fat jar name produced by Shadow config
           JAR_PATH="build/libs/codeframe.jar"
           cp "$JAR_PATH" codeframe/codeframe.jar
           cp instrument.yml codeframe/instrument.yml
+          cp instrument.v2.yml codeframe/instrument.v2.yml
           cp .ignore codeframe/.ignore
           cp codeframe-config.yml codeframe/codeframe-config.yml
+          cp codeframe-summary.py codeframe/codeframe-summary.py
+          cp summary_extract.py codeframe/summary_extract.py
+          cp summary_render.py codeframe/summary_render.py
+          cp templates/summary.html codeframe/templates/summary.html
           # Voyager archive name
           ARCHIVE_NAME="codeframe-${{ steps.tag.outputs.suffix }}"
           echo "archive_name=$ARCHIVE_NAME" >> $GITHUB_OUTPUT

diff --git a/AGENTS.md b/AGENTS.md
@@ -0,0 +1,167 @@
+# AGENTS.md
+
+Agent playbook for working in `codeframe`.
+
+## 1) Project at a Glance
+
+- Language/runtime: Java 17, Gradle 8.x.
+- Main entry point: `org.dxworks.codeframe.App`.
+- Packaging: Shadow JAR output is `build/libs/codeframe.jar`.
+- Core purpose: parse multi-language source files and emit JSONL analysis.
+- Parser stack:
+  - Tree-sitter: Java, JavaScript, TypeScript, Python, C#, PHP, Ruby, Rust.
+  - Hybrid: SQL (JSqlParser + ANTLR), COBOL (ANTLR), Markdown (commonmark).
+
+## 2) Environment Requirements
+
+- Required JDK: 17+.
+- If Gradle fails with "build uses a Java 11 JVM", set `JAVA_HOME` to JDK 17.
+- Gradle wrapper is included; prefer wrapper over system Gradle.
+- Cross-platform command variants:
+  - Unix/macOS: `./gradlew ...`
+  - Windows: `./gradlew.bat ...`
+
+## 3) Build, Test, and Run Commands
+
+### Build
+
+- Full build (includes tests + shadow jar):
+  - `./gradlew build`
+- Clean build:
+  - `./gradlew clean build`
+- Build shadow jar directly:
+  - `./gradlew shadowJar`
+
+### Test
+
+- Run all tests:
+  - `./gradlew test`
+- Run a single test class:
+  - `./gradlew test --tests "*JavaAnalyzeApprovalTest"`
+- Run a single test method (most common agent workflow):
+  - `./gradlew test --tests "*JavaAnalyzeApprovalTest.analyze_Java_GenericsSample"`
+- Run a package slice:
+  - `./gradlew test --tests "org.dxworks.codeframe.analyzer.sql.*"`
+- Useful debugging flags:
+  - `./gradlew test --stacktrace --info`
+
+### Lint / Static Checks
+
+- There is no dedicated Checkstyle/SpotBugs/Spotless task configured.
+- Use `./gradlew check` for standard Gradle verification lifecycle.
+- In this repo, test suites are the primary quality gate.
+
+### Run Application
+
+- Run via Gradle:
+  - `./gradlew run --args="<input-path> <output-file>"`
+- Run packaged jar:
+  - `java -jar build/libs/codeframe.jar <input-path> <output-file>`
+- Typical local example:
+  - `./gradlew run --args="src codeframe-out/analysis.jsonl"`
+
+### Grammar Generation (ANTLR)
+
+- Generate SQL + COBOL grammars:
+  - `./gradlew generateAllGrammarSource`
+- `compileJava` and `test` already depend on grammar generation.
+
+## 4) CI and Release Behavior
+
+- CI workflow (`.github/workflows/build-and-test.yml`) runs:
+  - `./gradlew --no-daemon --stacktrace test`
+- Release workflow (`.github/workflows/release.yml`) runs:
+  - `./gradlew --no-daemon clean shadowJar`
+- Release artifact zip contains:
+  - `codeframe.jar`, `instrument.yml`, `.ignore`, `codeframe-config.yml`.
+
+## 5) Test Conventions (ApprovalTests)
+
+- Framework: JUnit Jupiter + ApprovalTests.
+- Approval snapshots live beside tests as `*.approved.txt`.
+- On failure, inspect `*.received.txt` and confirm expected behavior.
+- To accept intentional output changes, replace approved files with received files.
+- Commit code and corresponding approved-output updates together.
+
+## 6) Code Style and Structure
+
+### Formatting
+
+- Use 4-space indentation, UTF-8 source files.
+- Keep methods focused and small where practical.
+- Prefer early returns for guard clauses.
+- Preserve deterministic output ordering when possible.
+
+### Imports
+
+- Prefer explicit imports over wildcard imports.
+- Keep `java.*` imports grouped and readable.
+- Use static imports only for well-scoped utility usage.
+
+### Types and Data Modeling
+
+- Use concrete model classes under `org.dxworks.codeframe.model`.
+- Public model fields are used intentionally in this project; follow existing model style.
+- Use `Optional` only where APIs already expose optionality (e.g., language detection).
+- Favor immutable/unmodifiable collections for configuration and registries.
+
+### Naming
+
+- Classes/interfaces/enums: PascalCase.
+- Methods/fields/local vars: camelCase.
+- Constants: UPPER_SNAKE_CASE.
+- Tests use descriptive snake-like method names with scenario context
+  (example: `analyze_Java_GenericsSample`).
+
+### Analyzer-Specific Design
+
+- Implement new analyzers via `LanguageAnalyzer` and register in `LanguageRegistry`.
+- Reuse `TreeSitterHelper` utilities rather than duplicating traversal logic.
+- Prefer AST-driven extraction over regex.
+- Keep extraction syntactic/factual; avoid semantic inference.
+- Ensure stable sorting/deduplication for method call outputs.
+
+### Error Handling
+
+- Fail fast for invalid startup conditions (e.g., missing input path).
+- During file analysis, catch exceptions per file and emit error JSONL records.
+- Prefer partial results over hard failure when parsing problematic inputs.
+- Do not add side effects (writing extra files, network calls) inside analyzers.
+
+### Backward Compatibility and Complexity
+
+- Do not add complexity solely for backward compatibility unless requested.
+- Do not ship hacks; understand grammar/tree shape and solve at parser level.
+- Use as little regex as practical; regex is a fallback, not default strategy.
+
+## 7) Extraction Contract (Must Follow)
+
+- Extract facts only.
+- Keep output deterministic.
+- Avoid analyzer side effects.
+- Return partial results on parse errors when feasible.
+- Keep analyzers simple and fast; enrichment belongs elsewhere.
+
+## 8) Repository-Specific Agent Rules
+
+- Relevant guidance source files:
+  - `docs/CONTRIBUTING.md`
+  - `docs/ARCHITECTURE.md`
+  - `README.md`
+  - `docs/AGENTS.md` (legacy short guidance)
+- Cursor rules:
+  - No `.cursor/rules/` directory found.
+  - No `.cursorrules` file found.
+- Copilot rules:
+  - No `.github/copilot-instructions.md` file found.
+
+## 9) Practical Agent Workflow
+
+- Before changing analyzers:
+  - Read matching analyzer tests and approved snapshots first.
+- After changes:
+  - Run targeted single-test command.
+  - Run broader language test class.
+  - Run full `./gradlew test` if scope is cross-cutting.
+- If output shape changes:
+  - Update approved files intentionally and review diffs carefully.
diff --git a/README.md b/README.md
@@ -88,6 +88,23 @@ docker run --rm -it -v "$PWD:/workspace" -v "/path/to/code:/src:ro" -w /workspac
 
 The analysis results are written to the path you pass as the second argument (e.g., `/workspace/.out/analysis.jsonl`) in **JSONL format** (JSON Lines - one JSON object per line). Parent directories for the output file are created automatically, and `.out/` is gitignored by default.
 
+### Summary artifacts
+
+CodeFrame includes a Voyager summary generator that parses `results/*.jsonl` and emits:
+
+- `results/summary.md`
+- `results/summary.html`
+
+Run it with:
+
+```bash
+# Unix/macOS
+python3 codeframe-summary.py results
+
+# Windows
+py -3 codeframe-summary.py results
+```
+
 ### Ignore patterns (.ignore)
 
 - Location: project root `.ignore` (included in releases).

diff --git a/codeframe-summary.py b/codeframe-summary.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+from summary_extract import extract_codeframe_summary
+from summary_render import render_summary
+
+
+def build_missing_payload() -> dict[str, object]:
+    return {
+        'tool': 'codeframe',
+        'status': 'missing',
+        'metadata': {},
+        'markdown': '\n'.join([
+            '## CodeFrame',
+            '',
+            '- Summary input is missing',
+        ]),
+        'templateModel': {
+            'isMissing': True,
+        },
+    }
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        prog='codeframe-summary.py',
+        description='Generates codeframe summary artifacts for Voyager',
+    )
+    parser.add_argument('results_directory', nargs='?', default='results')
+    args = parser.parse_args()
+
+    target_directory = Path(args.results_directory).resolve()
+
+    try:
+        jsonl_files = list(target_directory.glob('*.jsonl'))
+        if len(jsonl_files) == 0:
+            print(
+                "summary input missing for codeframe: expected '*.jsonl' files "
+                f"in '{target_directory}'; generating missing summary artifacts"
+            )
+            payload = build_missing_payload()
+        else:
+            payload = extract_codeframe_summary(target_directory)
+        rendered = render_summary(target_directory, payload)
+
+        print(f"Generated summary markdown at {rendered['summaryMdPath']}")
+        print(f"Generated summary html at {rendered['summaryHtmlPath']}")
+        return 0
+    except Exception as error:
+        print(f"summary generation failed for '{target_directory}': {error}")
+        return 1
+
+
+if __name__ == '__main__':
+    raise SystemExit(main())
diff --git a/instrument.v2.yml b/instrument.v2.yml
@@ -0,0 +1,35 @@
+name: codeframe
+id: codeframe
+version: 0.6.3
+
+actions:
+  start:
+    commands:
+      run-codeframe:
+        id: run-codeframe
+        dir: ${instrumentPath}
+        command: >-
+          java -jar codeframe.jar "${repo}" "${instrumentPath}/results/${repoName}.jsonl"
+
+  summary:
+    md-file: results/summary.md
+    html-file: results/summary.html
+    category: Structural Relations
+    commands:
+      generate-summary:
+        id: generate-summary
+        dir: ${instrumentPath}
+        command:
+          windows: py -3 codeframe-summary.py "${instrumentPath}/results"
+          unix: python3 codeframe-summary.py "${instrumentPath}/results"
+
+  clean:
+    with:
+      locations:
+        - source: ${instrumentDir}/results
+
+  pack:
+    with:
+      locations:
+        - source: ${instrumentDir}/results
+          destination: /results