From 0f34aedc2a898f3d5dba53c81e7460d656f2efb0 Mon Sep 17 00:00:00 2001 From: barslev Date: Thu, 30 Apr 2026 12:40:53 +0200 Subject: [PATCH 1/4] refactor: brotli-compress .socket.facts.json on upload Compress `.socket.facts.json` with brotli at upload time, just before `fetchCreateOrgFullScan` POSTs the multipart `/full-scans` body. Coana keeps writing plain JSON; the local readers (`extractTier1ReachabilityScanId`, `extractReachabilityErrors`) keep reading plain JSON; only the on-the-wire bytes between socket-cli and depscan change. depscan transparently decompresses at the api-v0 multipart ingest boundary in a coordinated change. Why: * Mono-repo `.socket.facts.json` files routinely exceed 60MB. On a representative simple-npm fixture, brotli compresses 150,748 bytes to 19,971 (~86.8% reduction); production mono-repos see a much larger absolute saving. * Coana has a second consumer downstream - teaching coana to write `.br` would break it. Brotli on the wire-only path keeps coana's contract invariant. * `tier1ReachabilityScanId` and reachability-error reporting still read plain JSON locally; no brotli round-trip on those paths. * Compression is a transport detail owned by the upload site; cleanup is one `finally` block. Implementation: * `src/utils/coana.mts` - new exported `compressSocketFactsForUpload(scanPaths)` returning `{ paths, cleanup }`. Per `.socket.facts.json` entry, `mkdtempSync` a fresh `.socket-br-XXX/` directory inside the source's parent dir (NOT under `os.tmpdir()` - see below), `brotliCompressSync` bytes to `${td}/.socket.facts.json.br`, swap the path. Other paths pass through. Missing facts paths pass through. Cleanup is idempotent with `force: true`. * `src/commands/scan/handle-create-new-scan.mts` - wraps the `fetchCreateOrgFullScan` call in `try { compress; upload; } finally { cleanup(); }`. Cleanup runs on success, throw, or any abort path. Why the temp lives next to the source: The SDK computes `path.relative(cwd, brPath)` for the multipart name field. depscan's multipart ingest (`addStreamEntry`) checks `containsTraversal(unixified)` and bails on any `..` segment. A tmpdir-based path resolves to `../../../var/folders/...`, gets dropped to `unmatchedFiles`, and the SocketFacts content never lands in the scan. Putting the temp inside `path.dirname(originalFactsPath)` produces a relative path like `.socket-br-XXX/.socket.facts.json.br` - traversal-free, ingests cleanly. Tests: * `src/utils/coana.test.mts` - 16 cases. - `compressSocketFactsForUpload` x 5: round-trip JSON via `brotliDecompressSync`, basename swap to `.socket.facts.json.br`, non-facts paths pass through, missing facts paths pass through, cleanup idempotent, mixed-entry order. Pins the contract that the temp lives in a subdir of the source's parent (traversal-free). - `extractTier1ReachabilityScanId` x 7: plain JSON, missing file, missing field, null, empty/blank, trim, numeric coercion. - `extractReachabilityErrors` x 4: extraction, missing file, no-components, no-inner-reachability. This change requires the matching depscan multipart decompression patch on the receiving side; that change ships first. --- src/commands/scan/handle-create-new-scan.mts | 57 ++-- src/utils/coana.mts | 82 ++++++ src/utils/coana.test.mts | 278 +++++++++++++++++++ 3 files changed, 395 insertions(+), 22 deletions(-) create mode 100644 src/utils/coana.test.mts diff --git a/src/commands/scan/handle-create-new-scan.mts b/src/commands/scan/handle-create-new-scan.mts index 33d8b1df6..6c76abc3c 100644 --- a/src/commands/scan/handle-create-new-scan.mts +++ b/src/commands/scan/handle-create-new-scan.mts @@ -15,6 +15,7 @@ import { outputCreateNewScan } from './output-create-new-scan.mts' import { performReachabilityAnalysis } from './perform-reachability-analysis.mts' import constants from '../../constants.mts' import { checkCommandInput } from '../../utils/check-input.mts' +import { compressSocketFactsForUpload } from '../../utils/coana.mts' import { findSocketYmlSync } from '../../utils/config.mts' import { getPackageFilesForScan } from '../../utils/path-resolve.mts' import { readOrDefaultSocketJson } from '../../utils/socket-json.mts' @@ -279,28 +280,40 @@ export async function handleCreateNewScan({ tier1ReachabilityScanId = reachResult.data?.tier1ReachabilityScanId } - const fullScanCResult = await fetchCreateOrgFullScan( - scanPaths, - orgSlug, - { - commitHash, - commitMessage, - committers, - pullRequest, - repoName, - branchName, - scanType: reach.runReachabilityAnalysis - ? constants.SCAN_TYPE_SOCKET_TIER1 - : constants.SCAN_TYPE_SOCKET, - workspace, - }, - { - cwd, - defaultBranch, - pendingHead, - tmp, - }, - ) + // Brotli-compress any .socket.facts.json paths in scanPaths just before + // upload. depscan's api-v0 multipart boundary streams brotli decode based + // on the .br filename suffix. Coana keeps writing plain .socket.facts.json + // on disk, so the local read paths (extractTier1ReachabilityScanId, + // extractReachabilityErrors) stay correct. The cleanup() in the finally + // block removes the temp dirs whether the upload succeeded or threw. + const compressed = await compressSocketFactsForUpload(scanPaths) + let fullScanCResult: Awaited> + try { + fullScanCResult = await fetchCreateOrgFullScan( + compressed.paths, + orgSlug, + { + commitHash, + commitMessage, + committers, + pullRequest, + repoName, + branchName, + scanType: reach.runReachabilityAnalysis + ? constants.SCAN_TYPE_SOCKET_TIER1 + : constants.SCAN_TYPE_SOCKET, + workspace, + }, + { + cwd, + defaultBranch, + pendingHead, + tmp, + }, + ) + } finally { + await compressed.cleanup() + } const scanId = fullScanCResult.ok ? fullScanCResult.data?.id : undefined diff --git a/src/utils/coana.mts b/src/utils/coana.mts index c80d66fe4..4c2737f31 100644 --- a/src/utils/coana.mts +++ b/src/utils/coana.mts @@ -3,6 +3,11 @@ * Manages reachability analysis via Coana tech CLI. * * Key Functions: + * - compressSocketFactsForUpload: Brotli-compress any .socket.facts.json + * entries in scanPaths just before upload, returning swapped paths plus a + * cleanup callback. Coana keeps writing plain JSON; the on-the-wire form + * to depscan is brotli (api-v0 decodes at the multipart boundary). + * - extractReachabilityErrors: Extract per-component reachability errors * - extractTier1ReachabilityScanId: Extract scan ID from socket facts file * * Integration: @@ -11,8 +16,85 @@ * - Extracts tier 1 reachability scan identifiers */ +import { createReadStream, createWriteStream, existsSync } from 'node:fs' +import { mkdtemp, rm } from 'node:fs/promises' +import path from 'node:path' +import { pipeline } from 'node:stream/promises' +import { createBrotliCompress } from 'node:zlib' + import { readJsonSync } from '@socketsecurity/registry/lib/fs' +import constants from '../constants.mts' + +const { DOT_SOCKET_DOT_FACTS_JSON } = constants + +export type CompressedScanPaths = { + paths: string[] + cleanup: () => Promise +} + +/** + * For each `.socket.facts.json` in `scanPaths`, stream-brotli-compress a + * `.socket.facts.json.br` into a fresh temp dir SIDE-BY-SIDE with the + * original file (inside the source's parent directory) and swap its path + * in. Other paths pass through unchanged. Missing files also pass through + * unchanged (the upload will fail downstream with the same error it would + * have). + * + * Streaming + worker-thread compression keeps the event loop responsive: + * default brotli quality (11) on a 60+MB facts file takes multiple seconds + * of CPU, which would otherwise freeze the spinner / signal handlers / + * any concurrent work. + * + * The temp dir lives next to the source rather than under the OS temp dir + * because depscan's multipart ingest (`addStreamEntry`) rejects entries + * whose names contain `..` traversal segments. The SDK computes the + * multipart entry name via `path.relative(cwd, brPath)`, so an OS-tmpdir + * temp path turns into `../../../var/folders/...` and gets dropped as + * `unmatchedFiles`. Keeping the temp file inside cwd-or-below keeps the + * relative path traversal-free. + * + * Each compressed file gets its own `mkdtemp` directory so the filename + * stays `.socket.facts.json.br` (no index/suffix collision) and + * concurrent scans against the same source dir don't race on the same + * path. The api-v0 boundary uses the `.br` filename suffix to trigger + * streaming decode. + * + * Caller MUST `await cleanup()` (typically in a `finally` block) once the + * upload completes — successful or not — to remove the temp dirs. + */ +export async function compressSocketFactsForUpload( + scanPaths: string[], +): Promise { + const tmpDirs: string[] = [] + const paths = await Promise.all( + scanPaths.map(async p => { + if (path.basename(p) !== DOT_SOCKET_DOT_FACTS_JSON) { + return p + } + if (!existsSync(p)) { + return p + } + const td = await mkdtemp(path.join(path.dirname(p), '.socket-br-')) + tmpDirs.push(td) + const brPath = path.join(td, `${DOT_SOCKET_DOT_FACTS_JSON}.br`) + await pipeline( + createReadStream(p), + createBrotliCompress(), + createWriteStream(brPath), + ) + return brPath + }), + ) + const cleanup = async () => { + const dirs = tmpDirs.splice(0) + await Promise.all( + dirs.map(d => rm(d, { recursive: true, force: true })), + ) + } + return { paths, cleanup } +} + export type ReachabilityError = { componentName: string componentVersion: string diff --git a/src/utils/coana.test.mts b/src/utils/coana.test.mts new file mode 100644 index 000000000..39d405e27 --- /dev/null +++ b/src/utils/coana.test.mts @@ -0,0 +1,278 @@ +/** + * Unit tests for Coana facts-file utilities. + * + * Test Coverage: + * - compressSocketFactsForUpload: swaps .socket.facts.json paths for + * brotli-compressed .br temps, leaves other paths alone, cleans up. + * - extractTier1ReachabilityScanId: plain JSON + edge cases. + * - extractReachabilityErrors: plain JSON + missing + malformed. + * + * Related Files: + * - utils/coana.mts (implementation) + */ + +import { + existsSync, + mkdtempSync, + readFileSync, + rmSync, + writeFileSync, +} from 'node:fs' +import { tmpdir } from 'node:os' +import path from 'node:path' +import { brotliDecompressSync } from 'node:zlib' + +import { afterAll, beforeAll, describe, expect, it } from 'vitest' + +import { + compressSocketFactsForUpload, + extractReachabilityErrors, + extractTier1ReachabilityScanId, +} from './coana.mts' + +describe('coana facts-file utils', () => { + let tmpDir: string + + beforeAll(() => { + tmpDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-facts-')) + }) + + afterAll(() => { + rmSync(tmpDir, { recursive: true, force: true }) + }) + + function writePlain(name: string, body: unknown): string { + const filePath = path.join(tmpDir, name) + writeFileSync(filePath, JSON.stringify(body)) + return filePath + } + + describe('compressSocketFactsForUpload', () => { + it('swaps a .socket.facts.json path for a brotli .br temp file', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const inputPath = path.join(wrapDir, '.socket.facts.json') + const payload = { tier1ReachabilityScanId: 'compress-test', a: 1, b: 2 } + writeFileSync(inputPath, JSON.stringify(payload)) + + const result = await compressSocketFactsForUpload([inputPath]) + const swappedPath = result.paths[0]! + try { + expect(result.paths).toHaveLength(1) + expect(swappedPath).not.toBe(inputPath) + expect(path.basename(swappedPath)).toBe('.socket.facts.json.br') + expect(existsSync(swappedPath)).toBe(true) + // Temp file lives in a subdir of the source's parent directory, NOT + // under tmpdir(). This keeps `path.relative(cwd, swappedPath)` + // traversal-free so depscan's multipart ingest accepts the entry. + expect(path.dirname(path.dirname(swappedPath))).toBe(wrapDir) + // The temp file is real brotli that round-trips to the original JSON. + const roundTripped = brotliDecompressSync( + readFileSync(swappedPath), + ).toString('utf8') + expect(JSON.parse(roundTripped)).toEqual(payload) + } finally { + await result.cleanup() + rmSync(wrapDir, { recursive: true, force: true }) + } + // Cleanup removes the temp .br file. + expect(existsSync(swappedPath)).toBe(false) + }) + + it('leaves non-facts paths unchanged', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const lock = path.join(wrapDir, 'package-lock.json') + const pkg = path.join(wrapDir, 'package.json') + writeFileSync(lock, '{}') + writeFileSync(pkg, '{}') + + const result = await compressSocketFactsForUpload([lock, pkg]) + try { + expect(result.paths).toEqual([lock, pkg]) + } finally { + await result.cleanup() + rmSync(wrapDir, { recursive: true, force: true }) + } + }) + + it('leaves a missing .socket.facts.json path unchanged', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const missingFacts = path.join(wrapDir, '.socket.facts.json') + // Note: no writeFileSync — file does not exist. + + const result = await compressSocketFactsForUpload([missingFacts]) + try { + expect(result.paths).toEqual([missingFacts]) + } finally { + await result.cleanup() + rmSync(wrapDir, { recursive: true, force: true }) + } + }) + + it('mixes facts and non-facts entries correctly', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const facts = path.join(wrapDir, '.socket.facts.json') + const lock = path.join(wrapDir, 'package-lock.json') + writeFileSync(facts, JSON.stringify({ tier1ReachabilityScanId: 'mix' })) + writeFileSync(lock, '{"name":"x"}') + + const result = await compressSocketFactsForUpload([lock, facts]) + try { + expect(result.paths[0]).toBe(lock) + expect(result.paths[1]).not.toBe(facts) + expect(path.basename(result.paths[1]!)).toBe('.socket.facts.json.br') + const roundTripped = JSON.parse( + brotliDecompressSync(readFileSync(result.paths[1]!)).toString('utf8'), + ) + expect(roundTripped.tier1ReachabilityScanId).toBe('mix') + } finally { + await result.cleanup() + rmSync(wrapDir, { recursive: true, force: true }) + } + }) + + it('cleanup is idempotent (safe to call twice)', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const facts = path.join(wrapDir, '.socket.facts.json') + writeFileSync(facts, JSON.stringify({ tier1ReachabilityScanId: 'idem' })) + + const result = await compressSocketFactsForUpload([facts]) + await result.cleanup() + await expect(result.cleanup()).resolves.not.toThrow() + rmSync(wrapDir, { recursive: true, force: true }) + }) + }) + + describe('extractTier1ReachabilityScanId', () => { + it('reads scan ID from plain JSON file', () => { + const file = writePlain('plain-id.json', { + tier1ReachabilityScanId: 'scan-123', + }) + + expect(extractTier1ReachabilityScanId(file)).toBe('scan-123') + }) + + it('returns undefined for missing file', () => { + expect( + extractTier1ReachabilityScanId(path.join(tmpDir, 'missing.json')), + ).toBeUndefined() + }) + + it('returns undefined when tier1ReachabilityScanId is missing', () => { + const file = writePlain('missing-field.json', { otherField: 'value' }) + + expect(extractTier1ReachabilityScanId(file)).toBeUndefined() + }) + + it('returns undefined for null scan ID', () => { + const file = writePlain('null-id.json', { tier1ReachabilityScanId: null }) + + expect(extractTier1ReachabilityScanId(file)).toBeUndefined() + }) + + it('returns undefined for empty / whitespace scan ID', () => { + const blank = writePlain('blank-id.json', { + tier1ReachabilityScanId: ' ', + }) + const empty = writePlain('empty-id.json', { + tier1ReachabilityScanId: '', + }) + + expect(extractTier1ReachabilityScanId(blank)).toBeUndefined() + expect(extractTier1ReachabilityScanId(empty)).toBeUndefined() + }) + + it('trims whitespace from scan ID', () => { + const file = writePlain('padded-id.json', { + tier1ReachabilityScanId: ' scan-456 ', + }) + + expect(extractTier1ReachabilityScanId(file)).toBe('scan-456') + }) + + it('coerces numeric scan ID to string', () => { + const file = writePlain('numeric-id.json', { + tier1ReachabilityScanId: 12345, + }) + + expect(extractTier1ReachabilityScanId(file)).toBe('12345') + }) + }) + + describe('extractReachabilityErrors', () => { + const errorComponentsBody = { + components: [ + { + name: 'lodash', + version: '4.17.21', + reachability: [ + { + ghsa_id: 'GHSA-aaaa-bbbb-cccc', + reachability: [ + { type: 'error', subprojectPath: 'packages/web' }, + { type: 'reachable', subprojectPath: 'packages/api' }, + ], + }, + ], + }, + { + name: 'axios', + version: '1.4.0', + reachability: [ + { + ghsa_id: 'GHSA-xxxx-yyyy-zzzz', + reachability: [{ type: 'error', subprojectPath: 'packages/api' }], + }, + ], + }, + ], + } + + const expectedErrors = [ + { + componentName: 'lodash', + componentVersion: '4.17.21', + ghsaId: 'GHSA-aaaa-bbbb-cccc', + subprojectPath: 'packages/web', + }, + { + componentName: 'axios', + componentVersion: '1.4.0', + ghsaId: 'GHSA-xxxx-yyyy-zzzz', + subprojectPath: 'packages/api', + }, + ] + + it('extracts errors from plain JSON', () => { + const file = writePlain('errors-plain.json', errorComponentsBody) + + expect(extractReachabilityErrors(file)).toEqual(expectedErrors) + }) + + it('returns empty array for missing file', () => { + expect( + extractReachabilityErrors(path.join(tmpDir, 'missing-errors.json')), + ).toEqual([]) + }) + + it('returns empty array when components is missing', () => { + const file = writePlain('errors-no-components.json', { other: true }) + + expect(extractReachabilityErrors(file)).toEqual([]) + }) + + it('skips components with no reachability arrays', () => { + const file = writePlain('errors-skip.json', { + components: [ + { name: 'just-name', version: '1.0.0' }, + { + name: 'no-inner', + version: '1.0.0', + reachability: [{ ghsa_id: 'GHSA-1' }], + }, + ], + }) + + expect(extractReachabilityErrors(file)).toEqual([]) + }) + }) +}) From 3ac4350826444cfc6aebd67da734354793019e0a Mon Sep 17 00:00:00 2001 From: barslev Date: Fri, 1 May 2026 11:01:30 +0200 Subject: [PATCH 2/4] fix: write brotli .br as sibling of source, not in temp subdir Previous form wrapped each `.br` in a per-scan `mkdtempSync` subdir under the source dir for concurrency isolation. That created a directory-handling asymmetry on depscan's side: a wire path of `dirA/.socket.facts.json.br` flattened to `.socket.facts.json` at root via depscan's basename-strip, while plain `dirA/.socket.facts.json` preserved the dir. depscan dropped the basename-strip in the corresponding PR; switch to writing `.br` as a sibling of the source so wire and storage paths match for both branches. Net effect: a brotli upload from `/.socket.facts.json` lands at `/.socket.facts.json` in the manifest tarball - identical to plain. Concurrent-scan note: coana already writes to a single source path, so the source `.socket.facts.json` itself is racy under concurrent runs against the same dir; the sibling `.br` doesn't introduce a new race that wasn't already there. * `src/utils/coana.mts`: `compressSocketFactsForUpload` writes `${p}.br` next to the source; `cleanup()` does `rm(brPath, { force: true })` per file. Drops `mkdtemp` import that's no longer used. * `src/utils/coana.test.mts`: directory-shape assertion replaced with `swappedPath === ${input}.br` (sibling). First test now also asserts the source survives `cleanup()`. 16 cases. --- src/utils/coana.mts | 41 ++++++++++++++++++++-------------------- src/utils/coana.test.mts | 28 +++++++++++++-------------- 2 files changed, 33 insertions(+), 36 deletions(-) diff --git a/src/utils/coana.mts b/src/utils/coana.mts index 4c2737f31..aa8800695 100644 --- a/src/utils/coana.mts +++ b/src/utils/coana.mts @@ -17,7 +17,7 @@ */ import { createReadStream, createWriteStream, existsSync } from 'node:fs' -import { mkdtemp, rm } from 'node:fs/promises' +import { rm } from 'node:fs/promises' import path from 'node:path' import { pipeline } from 'node:stream/promises' import { createBrotliCompress } from 'node:zlib' @@ -35,38 +35,38 @@ export type CompressedScanPaths = { /** * For each `.socket.facts.json` in `scanPaths`, stream-brotli-compress a - * `.socket.facts.json.br` into a fresh temp dir SIDE-BY-SIDE with the - * original file (inside the source's parent directory) and swap its path - * in. Other paths pass through unchanged. Missing files also pass through - * unchanged (the upload will fail downstream with the same error it would - * have). + * sibling `.socket.facts.json.br` next to the original file and swap its + * path in. Other paths pass through unchanged. Missing files also pass + * through unchanged (the upload will fail downstream with the same error + * it would have). * * Streaming + worker-thread compression keeps the event loop responsive: * default brotli quality (11) on a 60+MB facts file takes multiple seconds * of CPU, which would otherwise freeze the spinner / signal handlers / * any concurrent work. * - * The temp dir lives next to the source rather than under the OS temp dir + * The `.br` lives next to the source rather than under the OS temp dir * because depscan's multipart ingest (`addStreamEntry`) rejects entries * whose names contain `..` traversal segments. The SDK computes the * multipart entry name via `path.relative(cwd, brPath)`, so an OS-tmpdir * temp path turns into `../../../var/folders/...` and gets dropped as - * `unmatchedFiles`. Keeping the temp file inside cwd-or-below keeps the - * relative path traversal-free. + * `unmatchedFiles`. Sibling-write keeps the relative path inside cwd, and + * keeps the directory shape symmetric with the plain `.socket.facts.json` + * upload (depscan strips only the `.br` suffix at ingest, so + * `/.socket.facts.json.br` and `/.socket.facts.json` resolve to + * the same storage path). * - * Each compressed file gets its own `mkdtemp` directory so the filename - * stays `.socket.facts.json.br` (no index/suffix collision) and - * concurrent scans against the same source dir don't race on the same - * path. The api-v0 boundary uses the `.br` filename suffix to trigger - * streaming decode. + * Concurrent scans against the same source directory are already racy on + * `.socket.facts.json` itself (coana writes to a single path), so the + * sibling `.br` doesn't introduce a new race. * * Caller MUST `await cleanup()` (typically in a `finally` block) once the - * upload completes — successful or not — to remove the temp dirs. + * upload completes — successful or not — to remove the sibling files. */ export async function compressSocketFactsForUpload( scanPaths: string[], ): Promise { - const tmpDirs: string[] = [] + const brPaths: string[] = [] const paths = await Promise.all( scanPaths.map(async p => { if (path.basename(p) !== DOT_SOCKET_DOT_FACTS_JSON) { @@ -75,21 +75,20 @@ export async function compressSocketFactsForUpload( if (!existsSync(p)) { return p } - const td = await mkdtemp(path.join(path.dirname(p), '.socket-br-')) - tmpDirs.push(td) - const brPath = path.join(td, `${DOT_SOCKET_DOT_FACTS_JSON}.br`) + const brPath = `${p}.br` await pipeline( createReadStream(p), createBrotliCompress(), createWriteStream(brPath), ) + brPaths.push(brPath) return brPath }), ) const cleanup = async () => { - const dirs = tmpDirs.splice(0) + const targets = brPaths.splice(0) await Promise.all( - dirs.map(d => rm(d, { recursive: true, force: true })), + targets.map(t => rm(t, { force: true })), ) } return { paths, cleanup } diff --git a/src/utils/coana.test.mts b/src/utils/coana.test.mts index 39d405e27..7b2ad30f8 100644 --- a/src/utils/coana.test.mts +++ b/src/utils/coana.test.mts @@ -48,34 +48,33 @@ describe('coana facts-file utils', () => { } describe('compressSocketFactsForUpload', () => { - it('swaps a .socket.facts.json path for a brotli .br temp file', async () => { + it('writes brotli .br as a sibling of the source file', async () => { const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) const inputPath = path.join(wrapDir, '.socket.facts.json') const payload = { tier1ReachabilityScanId: 'compress-test', a: 1, b: 2 } writeFileSync(inputPath, JSON.stringify(payload)) - const result = await compressSocketFactsForUpload([inputPath]) - const swappedPath = result.paths[0]! try { + const result = await compressSocketFactsForUpload([inputPath]) + const swappedPath = result.paths[0]! + expect(result.paths).toHaveLength(1) - expect(swappedPath).not.toBe(inputPath) - expect(path.basename(swappedPath)).toBe('.socket.facts.json.br') + expect(swappedPath).toBe(`${inputPath}.br`) expect(existsSync(swappedPath)).toBe(true) - // Temp file lives in a subdir of the source's parent directory, NOT - // under tmpdir(). This keeps `path.relative(cwd, swappedPath)` - // traversal-free so depscan's multipart ingest accepts the entry. - expect(path.dirname(path.dirname(swappedPath))).toBe(wrapDir) - // The temp file is real brotli that round-trips to the original JSON. + // The sibling file is real brotli that round-trips to the original + // JSON. const roundTripped = brotliDecompressSync( readFileSync(swappedPath), ).toString('utf8') expect(JSON.parse(roundTripped)).toEqual(payload) - } finally { + + // Cleanup removes the sibling .br file but leaves the source intact. await result.cleanup() + expect(existsSync(swappedPath)).toBe(false) + expect(existsSync(inputPath)).toBe(true) + } finally { rmSync(wrapDir, { recursive: true, force: true }) } - // Cleanup removes the temp .br file. - expect(existsSync(swappedPath)).toBe(false) }) it('leaves non-facts paths unchanged', async () => { @@ -118,8 +117,7 @@ describe('coana facts-file utils', () => { const result = await compressSocketFactsForUpload([lock, facts]) try { expect(result.paths[0]).toBe(lock) - expect(result.paths[1]).not.toBe(facts) - expect(path.basename(result.paths[1]!)).toBe('.socket.facts.json.br') + expect(result.paths[1]).toBe(`${facts}.br`) const roundTripped = JSON.parse( brotliDecompressSync(readFileSync(result.paths[1]!)).toString('utf8'), ) From 75855f00a8875277226bf9f41c20d13e53764bc8 Mon Sep 17 00:00:00 2001 From: barslev Date: Wed, 20 May 2026 05:30:19 +0200 Subject: [PATCH 3/4] fix: clean up partial brotli siblings on compress failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Cursor Bugbot finding on src/utils/coana.mts: if any brotli pipeline in the `Promise.all` batch rejects, the helper rejected before returning its `cleanup` callback, so already-completed sibling `.br` files were orphaned next to their source. Three changes that together make the helper failure-safe: * Track the intended `.br` path BEFORE the pipeline starts. Previous code pushed to `brPaths` after `await pipeline(...)`, so a sibling that the pipeline started writing but didn't finish was invisible to cleanup. `rm({ force: true })` no-ops on missing paths, so tracking the intent is safe. * Switch the parallel batch from `Promise.all` to `Promise.allSettled`. With `all`, the first rejection bubbled out while sibling pipelines were still writing bytes; calling `cleanup()` then would race the live writes and could `rm` a sibling only to have it re-created immediately. `allSettled` waits for every pipeline to settle, so cleanup sees a stable set of files to remove. * On batch failure, run `cleanup()` internally before re-throwing so the caller (which only gets a chance to call cleanup when the helper resolves) doesn't have to. Add `recursive: true` to the rm call so a defensively-occupied directory at a `.br` path doesn't halt cleanup partway through the list. Test added: `removes partial .br siblings if compression fails mid-batch` — pre-occupies entry B's `.br` destination with a directory so its `createWriteStream` rejects with EISDIR, then asserts that entry A's `.br` is not orphaned on disk after the batch rejects and both source files survive untouched. --- src/utils/coana.mts | 35 ++++++++++++++++++++++++++++------- src/utils/coana.test.mts | 27 +++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/src/utils/coana.mts b/src/utils/coana.mts index aa8800695..e1777cd57 100644 --- a/src/utils/coana.mts +++ b/src/utils/coana.mts @@ -67,7 +67,20 @@ export async function compressSocketFactsForUpload( scanPaths: string[], ): Promise { const brPaths: string[] = [] - const paths = await Promise.all( + const cleanup = async () => { + const targets = brPaths.splice(0) + // `recursive: true` defends against the (defensive) case where a sibling + // path was somehow created as a directory — `rm` would otherwise throw + // on the first such entry and skip the rest. `force: true` no-ops on + // missing paths so the function stays idempotent. + await Promise.all(targets.map(t => rm(t, { recursive: true, force: true }))) + } + // Use `allSettled` (not `all`) so a failure in one entry doesn't leak the + // others' in-flight pipelines past our `catch`. If we used `all`, the + // first rejection would bubble out while sibling pipelines were still + // writing bytes — `cleanup()` would race with those writes and could + // remove a `.br` only to have it re-created after we returned. + const results = await Promise.allSettled( scanPaths.map(async p => { if (path.basename(p) !== DOT_SOCKET_DOT_FACTS_JSON) { return p @@ -76,21 +89,29 @@ export async function compressSocketFactsForUpload( return p } const brPath = `${p}.br` + // Track the sibling path BEFORE the pipeline starts so a + // partially-written `.br` is removed even if the pipeline rejects. + // `rm({ force: true })` no-ops on missing files, so tracking before + // creation is safe. + brPaths.push(brPath) await pipeline( createReadStream(p), createBrotliCompress(), createWriteStream(brPath), ) - brPaths.push(brPath) return brPath }), ) - const cleanup = async () => { - const targets = brPaths.splice(0) - await Promise.all( - targets.map(t => rm(t, { force: true })), - ) + const failure = results.find( + (r): r is PromiseRejectedResult => r.status === 'rejected', + ) + if (failure) { + // All pipelines have settled, so cleanup() can safely remove every + // `.br` we tracked (succeeded or partial) without racing live writes. + await cleanup() + throw failure.reason } + const paths = results.map(r => (r as PromiseFulfilledResult).value) return { paths, cleanup } } diff --git a/src/utils/coana.test.mts b/src/utils/coana.test.mts index 7b2ad30f8..528d84dbc 100644 --- a/src/utils/coana.test.mts +++ b/src/utils/coana.test.mts @@ -138,6 +138,33 @@ describe('coana facts-file utils', () => { await expect(result.cleanup()).resolves.not.toThrow() rmSync(wrapDir, { recursive: true, force: true }) }) + + it('removes partial .br siblings if compression fails mid-batch', async () => { + // Two facts files; B's `.br` destination is pre-occupied by a + // directory so `createWriteStream` open() fails with EISDIR. The + // whole batch rejects, but A's `.br` must not be orphaned. + const { mkdirSync } = await import('node:fs') + const wrapDirA = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-a-')) + const wrapDirB = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-b-')) + const factsA = path.join(wrapDirA, '.socket.facts.json') + const factsB = path.join(wrapDirB, '.socket.facts.json') + writeFileSync(factsA, JSON.stringify({ tier1ReachabilityScanId: 'A' })) + writeFileSync(factsB, JSON.stringify({ tier1ReachabilityScanId: 'B' })) + mkdirSync(`${factsB}.br`, { recursive: true }) + + try { + await expect( + compressSocketFactsForUpload([factsA, factsB]), + ).rejects.toThrow() + // A's sibling must not be on disk; sources must still exist. + expect(existsSync(`${factsA}.br`)).toBe(false) + expect(existsSync(factsA)).toBe(true) + expect(existsSync(factsB)).toBe(true) + } finally { + rmSync(wrapDirA, { recursive: true, force: true }) + rmSync(wrapDirB, { recursive: true, force: true }) + } + }) }) describe('extractTier1ReachabilityScanId', () => { From 084f311a79073a63c51e2d0b465b8fc919ef09c4 Mon Sep 17 00:00:00 2001 From: barslev Date: Wed, 20 May 2026 09:30:06 +0200 Subject: [PATCH 4/4] chore(release): 1.1.98 Bump version to 1.1.98 and add changelog entry for the brotli upload-compression refactor in this PR. --- CHANGELOG.md | 5 +++++ package.json | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2514e0a2..d9d527f2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - **`socket manifest bazel [beta]`** — Generate Bazel JVM SBOM manifests by running `bazel query` against discovered Maven repos in a Bazel workspace. Closes the inline-Maven-declaration gap that lockfile-only parsing misses for repos like envoy, ray, tensorflow, tink-java, and or-tools. Auto-detects Bzlmod and legacy `WORKSPACE`. - **`socket scan create --auto-manifest`** now covers Bazel workspaces in addition to Gradle/Scala/Kotlin/Conda. Repos with `MODULE.bazel`, `WORKSPACE`, or `WORKSPACE.bazel` are detected automatically and their Maven dependencies extracted as part of the standard scan-create flow. +## [1.1.98](https://github.com/SocketDev/socket-cli/releases/tag/v1.1.98) - 2026-05-20 + +### Changed +- `socket scan create --reach` now uploads the reachability facts file as brotli on the wire, shrinking mono-repo upload sizes by roughly 85% with no change to the on-disk or stored format. Faster scan submissions on slow connections. + ## [1.1.97](https://github.com/SocketDev/socket-cli/releases/tag/v1.1.97) - 2026-05-18 ### Changed diff --git a/package.json b/package.json index 9a63add5e..1b2127153 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "socket", - "version": "1.1.97", + "version": "1.1.98", "description": "CLI for Socket.dev", "homepage": "https://github.com/SocketDev/socket-cli", "license": "MIT AND OFL-1.1",