Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions doc/artifacts.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Derived-data artifacts

The compiler publishes a bundle's data as a versioned set of artifacts, so
that consumers (notably the viewer) can load precomputed deductions instead of
re-running the prover, and can load only the slices of data a page actually
needs. Schemas and serialization live in `packages/core/src/Artifacts.ts`.

The legacy monolithic `bundle.json` continues to be published alongside these
artifacts and remains the stable public data API. Consumers without artifact
support (or reading a bundle predating them) fall back to it and derive traits
client-side.

## Artifact set

| Path | Contents |
| ------------------ | ------------------------------------------------------------ |
| `manifest.json` | Format version, data version (`ref`/`sha`), artifact paths |
| `core.json` | Deduction-minimal data: uids, names, aliases, theorem formulas, asserted trait values |
| `spaces/{id}.json` | Traits derivable for one space, with proofs |
| `text.json` | Descriptions and refs for all entities and asserted traits |

Paths are relative to the manifest's location and are declared in the
manifest, so consumers should discover them from `manifest.json` rather than
hard-coding. `paths.spaces` is a template in which `{id}` stands for a space's
uid.

Rationale for the split: descriptions and refs are ~85% of bundle bytes but
are only read on detail pages, while `core.json` alone supports lists, search,
and full client-side deduction. Derived traits with proofs are far too large
to ship whole (~8 MB raw) but shard to tens of KB per space.

## Format versioning

`manifest.json` carries a `format` number (`artifacts.FORMAT`). It is bumped
whenever the serialized shape of any artifact changes incompatibly; consumers
should check it before assuming they can read the other paths, and fall back
to `bundle.json` when it is unrecognized.

## Determinism

Serialization is canonical: the same logical input produces byte-identical
artifacts, regardless of source file enumeration order. Artifact diffs and
sha-keyed caching rely on this, and `test/Artifacts.test.ts` enforces it.

Two mechanisms provide it:

1. **Pinned prover inputs.** Proof content depends on the order theorems and
traits are fed to the prover, so `artifacts.implications` and
`artifacts.deduceSpace` sort both by uid. Derived artifacts must be
produced through these entry points.
2. **Canonical serialization.** Every collection in every artifact is sorted
(entities by uid, traits by space then property) and object keys are
emitted in schema order.
13 changes: 9 additions & 4 deletions packages/compile/src/load.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export default async function load(
version?: Version,
): Promise<{
bundle?: B.Bundle
deductions?: Validations.Deductions
errors?: Map<string, string[]>
}> {
return validate({
Expand Down Expand Up @@ -51,7 +52,10 @@ export function validate({
return format(Validations.bundle(bundle))
} catch (e) {
if (e instanceof ValidationError) {
return format({ value: bundle, errors: e.messages })
return format({
value: bundle && { bundle, deductions: new Map() },
errors: e.messages,
})
} else {
throw e
}
Expand Down Expand Up @@ -82,8 +86,9 @@ function checkAll<I, O>(
return result.value
}

function format({ value, errors }: Validations.Result<B.Bundle>): {
function format({ value, errors }: Validations.Result<Validations.Deduced>): {
bundle?: B.Bundle
deductions?: Validations.Deductions
errors?: Map<string, string[]>
} {
if (errors.length > 0) {
Expand All @@ -95,8 +100,8 @@ function format({ value, errors }: Validations.Result<B.Bundle>): {
grouped.get(path).push(message)
})

return { bundle: value, errors: grouped }
return { ...value, errors: grouped }
} else {
return { bundle: value }
return { ...value }
}
}
52 changes: 50 additions & 2 deletions packages/compile/src/main.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { beforeEach, afterAll, expect, it } from 'vitest'
import { spawnSync } from 'node:child_process'
import { readFile, stat, unlink } from 'node:fs/promises'
import { readFile, rm, stat, unlink } from 'node:fs/promises'
import { join } from 'node:path'

const main = join(__dirname, '..', 'src', 'main.ts')
Expand Down Expand Up @@ -38,6 +38,11 @@ async function cleanup() {
if (await exists(out)) {
await unlink(out)
}
await rm(join(repo, 'valid', 'artifacts'), { recursive: true, force: true })
}

async function readJson(path: string) {
return JSON.parse((await readFile(path)).toString())
}

beforeEach(cleanup)
Expand All @@ -48,14 +53,57 @@ it('builds a bundle', async () => {
expect(output).toContain(`::debug Compiling repo=${repo}/valid to out=`)
expect(error).toBe(false)

const bundle = JSON.parse((await readFile(`${repo}/valid/${out}`)).toString())
const bundle = await readJson(`${repo}/valid/${out}`)

expect(bundle.properties.length).toEqual(3)
expect(bundle.spaces.length).toEqual(2)
expect(bundle.theorems.length).toEqual(1)
expect(bundle.traits.length).toEqual(3)
})

it('emits the derived-data artifact set', async () => {
const { error } = await run('valid')
expect(error).toBe(false)

const dir = join(repo, 'valid', 'artifacts')

const manifest = await readJson(join(dir, 'manifest.json'))
expect(manifest.format).toEqual(1)
expect(manifest.version.sha).toEqual(
'c74d99cf46f6ed23e742f2617e9908294b4a608b',
)

const core = await readJson(join(dir, manifest.paths.core))
expect(core.properties.length).toEqual(3)
expect(core.spaces.length).toEqual(2)
expect(core.theorems.length).toEqual(1)
expect(core.traits).toEqual([
{ space: 'S000001', property: 'P000016', value: true },
{ space: 'S000001', property: 'P000036', value: false },
{ space: 'S000002', property: 'P000036', value: false },
])

const text = await readJson(join(dir, manifest.paths.text))
expect(text.properties.length).toEqual(3)
expect(text.properties.every((p: any) => p.description.length > 0)).toBe(true)

const shard = await readJson(
join(dir, manifest.paths.spaces.replace('{id}', 'S000001')),
)
expect(shard.traits).toEqual([
{
property: 'P000019',
value: true,
proof: { theorems: ['T000001'], properties: ['P000016'] },
},
])

const empty = await readJson(
join(dir, manifest.paths.spaces.replace('{id}', 'S000002')),
)
expect(empty.traits).toEqual([])
})

it('writes error messages for invalid bundles', async () => {
const { output, error } = await run('invalid')

Expand Down
33 changes: 28 additions & 5 deletions packages/compile/src/main.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
import * as fs from 'fs'
import * as path from 'path'
import * as process from 'process'
import { bundle as B } from '@pi-base/core'
import { artifacts as A, bundle as B } from '@pi-base/core'

import load from './load.js'

async function run(repo = '.', out = 'bundle.json'): Promise<void> {
log(`Compiling repo=${repo} to out=${out}`, 'debug')
async function run(
repo = '.',
out = 'bundle.json',
artifactsOut = 'artifacts',
): Promise<void> {
log(`Compiling repo=${repo} to out=${out} artifacts=${artifactsOut}`, 'debug')

const { bundle, errors } = await load(repo)
const { bundle, deductions, errors } = await load(repo)

if (errors) {
errors.forEach((messages, path) => {
Expand All @@ -17,12 +22,30 @@ async function run(repo = '.', out = 'bundle.json'): Promise<void> {
})
}

if (errors || !bundle) {
if (errors || !bundle || !deductions) {
log('Compilation finished with errors', 'error')
process.exit(1)
}

fs.writeFileSync(`${repo}/${out}`, JSON.stringify(B.serialize(bundle)))
writeArtifacts(`${repo}/${artifactsOut}`, A.serialize(bundle, deductions))
}

function writeArtifacts(dir: string, artifacts: A.Artifacts): void {
const write = (relative: string, data: unknown) => {
const file = path.join(dir, relative)
fs.mkdirSync(path.dirname(file), { recursive: true })
fs.writeFileSync(file, JSON.stringify(data))
}

const { manifest, core, text, spaces } = artifacts

write('manifest.json', manifest)
write(manifest.paths.core, core)
write(manifest.paths.text, text)
spaces.forEach((artifact, space) => {
write(A.spacePath(manifest, space), artifact)
})
}

type Level = 'debug' | 'info' | 'error'
Expand Down
26 changes: 14 additions & 12 deletions packages/compile/src/validations.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@ describe('bundle', () => {
])
})

it('adds deductions', () => {
const { value: b, errors } = bundle({
it('returns the derivations for each space', () => {
const { value, errors } = bundle({
properties: [
F.property({ uid: 'P1' }),
F.property({ uid: 'P2' }),
Expand Down Expand Up @@ -157,15 +157,17 @@ describe('bundle', () => {

expect(errors).toEqual([])

// TODO
// const { value, proof } = b!.traits.get(
// traitId({ space: 'S1', property: 'P3' }),
// )!

// expect(value).toEqual(true)
// expect(proof).toEqual({
// properties: ['P1'],
// theorems: ['T1', 'T2'],
// })
expect(value!.deductions.get('S1')!.all()).toEqual([
{
property: 'P2',
value: true,
proof: { theorems: ['T1'], properties: ['P1'] },
},
{
property: 'P3',
value: true,
proof: { theorems: ['T1', 'T2'], properties: ['P1'] },
},
])
})
})
56 changes: 18 additions & 38 deletions packages/compile/src/validations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,42 +3,21 @@ import yaml from 'yaml-front-matter'

import {
type Bundle,
ImplicationIndex,
type Derivations,
type Property,
type Space,
deduceTraits,
artifacts,
formula as Formula,
schemas,
} from '@pi-base/core'

import type { File } from './fs.js'

// FIXME
type CheckResult =
| { kind: 'bundle'; bundle: Bundle }
| {
kind: 'contradiction'
contradiction: { theorems: unknown[]; properties: unknown[] }
}
function check(bundle: Bundle, space: Space): CheckResult {
const implications = new ImplicationIndex(
Array.from(bundle.theorems.values()).map(({ uid, when, then }) => ({
id: uid,
when,
then,
})),
)
const traits = new Map(
Array.from(bundle.traits.values())
.filter(trait => trait.space === space.uid)
.map(trait => [trait.property, trait.value]),
)

const result = deduceTraits(implications, traits)
if (result.kind === 'contradiction') {
return result
}
return { kind: 'bundle', bundle }
// Traits derivable from each space's asserted traits, keyed by space uid
export type Deductions = Map<string, Derivations<string, string>>

export type Deduced = {
bundle: Bundle
deductions: Deductions
}

export type Message = {
Expand Down Expand Up @@ -282,7 +261,7 @@ export function trait(input: File) {
})
}

export function bundle(bundle: Bundle): Result<Bundle> {
export function bundle(bundle: Bundle): Result<Deduced> {
return validate('', error => {
const duplicatePropertyNames = duplicated(
Array.from(bundle.properties.values()).map(s => s.name),
Expand Down Expand Up @@ -333,24 +312,25 @@ export function bundle(bundle: Bundle): Result<Bundle> {
}
})

let result = bundle
const deductions: Deductions = new Map()
const implications = artifacts.implications(bundle)

for (const space of result.spaces.values()) {
for (const space of bundle.spaces.values()) {
const key = paths.space(space.uid)
const checked = check(bundle, space)
switch (checked.kind) {
case 'bundle':
result = checked.bundle
const result = artifacts.deduceSpace(bundle, space.uid, implications)
switch (result.kind) {
case 'derivations':
deductions.set(space.uid, result.derivations)
break
case 'contradiction':
error(
`properties=${checked.contradiction.properties} contradict theorems=${checked.contradiction.theorems}`,
`properties=${result.contradiction.properties} contradict theorems=${result.contradiction.theorems}`,
key,
)
break
}
}

return result
return { bundle, deductions }
})
}
Loading
Loading