Skip to content

Commit c9b5045

Browse files
committed
Add orchestrator and image encoder infrastructure
Introduces a new orchestrator layer for PDF engines, including task queueing, priority scheduling, and visibility-aware task ranking. Adds a dedicated image encoder worker pool to offload image encoding from the main PDFium worker, with supporting modules for worker management and encoding. Refactors PDFium engine integration to use the orchestrator and encoder pool, and removes legacy test files. Updates related plugin and shared code to support the new architecture.
1 parent 66f2e7c commit c9b5045

27 files changed

Lines changed: 2817 additions & 866 deletions

.husky/pre-commit

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,13 @@
11
#!/usr/bin/env sh
2-
./node_modules/.bin/lint-staged
2+
# Cross-platform pre-commit for GitHub Desktop, macOS, Windows (Git Bash), Linux.
3+
4+
# Make Node visible for GUI apps (harmless elsewhere)
5+
export PATH="/opt/homebrew/bin:/usr/local/bin:$PATH"
6+
7+
# Load nvm if present (no-op if missing)
8+
export NVM_DIR="$HOME/.nvm"
9+
[ -s "$NVM_DIR/nvm.sh" ] && . "$NVM_DIR/nvm.sh"
10+
11+
# Prefer Node's built-in npx so we don't rely on pnpm/yarn on PATH
12+
# --no-install ensures it uses the local devDependency
13+
exec npx --no-install lint-staged
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/**
2+
* Dedicated worker for image encoding operations
3+
* Offloads OffscreenCanvas.convertToBlob() from the main PDFium worker
4+
*/
5+
6+
export interface EncodeImageRequest {
7+
id: string;
8+
type: 'encode';
9+
data: {
10+
imageData: {
11+
data: Uint8ClampedArray;
12+
width: number;
13+
height: number;
14+
};
15+
imageType: 'image/png' | 'image/jpeg' | 'image/webp';
16+
quality?: number;
17+
};
18+
}
19+
20+
export interface EncodeImageResponse {
21+
id: string;
22+
type: 'result' | 'error';
23+
data: Blob | { message: string };
24+
}
25+
26+
/**
27+
* Encode ImageData to Blob using OffscreenCanvas
28+
*/
29+
async function encodeImage(
30+
imageData: { data: Uint8ClampedArray; width: number; height: number },
31+
imageType: 'image/png' | 'image/jpeg' | 'image/webp',
32+
quality?: number,
33+
): Promise<Blob> {
34+
if (typeof OffscreenCanvas === 'undefined') {
35+
throw new Error('OffscreenCanvas is not available in this worker environment');
36+
}
37+
38+
const { data, width, height } = imageData;
39+
40+
// Create ImageData from the raw data
41+
// Need to create a new Uint8ClampedArray to avoid type issues with ArrayBufferLike
42+
const imgData = new ImageData(new Uint8ClampedArray(data), width, height);
43+
44+
// Create OffscreenCanvas and render
45+
const canvas = new OffscreenCanvas(width, height);
46+
const ctx = canvas.getContext('2d');
47+
48+
if (!ctx) {
49+
throw new Error('Failed to get 2D context from OffscreenCanvas');
50+
}
51+
52+
ctx.putImageData(imgData, 0, 0);
53+
54+
// Convert to blob
55+
return canvas.convertToBlob({ type: imageType, quality });
56+
}
57+
58+
/**
59+
* Handle incoming encoding requests
60+
*/
61+
self.onmessage = async (event: MessageEvent<EncodeImageRequest>) => {
62+
const request = event.data;
63+
64+
if (request.type !== 'encode') {
65+
return;
66+
}
67+
68+
try {
69+
const { imageData, imageType, quality } = request.data;
70+
71+
// Perform the encoding
72+
const blob = await encodeImage(imageData, imageType, quality);
73+
74+
// Send back the result
75+
const response: EncodeImageResponse = {
76+
id: request.id,
77+
type: 'result',
78+
data: blob,
79+
};
80+
81+
self.postMessage(response);
82+
} catch (error) {
83+
// Send back error
84+
const response: EncodeImageResponse = {
85+
id: request.id,
86+
type: 'error',
87+
data: {
88+
message: error instanceof Error ? error.message : String(error),
89+
},
90+
};
91+
92+
self.postMessage(response);
93+
}
94+
};
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
export * from './worker-pool';
2+
export type { EncodeImageRequest, EncodeImageResponse } from './image-encoder-worker';
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
import { Logger, NoopLogger } from '@embedpdf/models';
2+
import type { EncodeImageRequest, EncodeImageResponse } from './image-encoder-worker';
3+
4+
const LOG_SOURCE = 'ImageEncoderPool';
5+
const LOG_CATEGORY = 'Encoder';
6+
7+
interface EncodingTask {
8+
resolve: (blob: Blob) => void;
9+
reject: (error: Error) => void;
10+
}
11+
12+
/**
13+
* Pool of image encoding workers to offload OffscreenCanvas operations
14+
* from the main PDFium worker thread
15+
*/
16+
export class ImageEncoderWorkerPool {
17+
private workers: Worker[] = [];
18+
private pendingTasks = new Map<string, EncodingTask>();
19+
private nextWorkerId = 0;
20+
private requestCounter = 0;
21+
private logger: Logger;
22+
23+
/**
24+
* Create a pool of image encoding workers
25+
* @param poolSize - Number of workers to create (default: 2)
26+
* @param workerUrl - URL to the worker script
27+
* @param logger - Logger instance
28+
*/
29+
constructor(
30+
private poolSize: number = 2,
31+
private workerUrl: string,
32+
logger?: Logger,
33+
) {
34+
this.logger = logger ?? new NoopLogger();
35+
this.initialize();
36+
}
37+
38+
/**
39+
* Initialize the worker pool
40+
*/
41+
private initialize() {
42+
this.logger.debug(
43+
LOG_SOURCE,
44+
LOG_CATEGORY,
45+
`Creating worker pool with ${this.poolSize} workers`,
46+
);
47+
48+
for (let i = 0; i < this.poolSize; i++) {
49+
try {
50+
const worker = new Worker(this.workerUrl, { type: 'module' });
51+
worker.onmessage = this.handleWorkerMessage.bind(this);
52+
worker.onerror = this.handleWorkerError.bind(this);
53+
this.workers.push(worker);
54+
55+
this.logger.debug(LOG_SOURCE, LOG_CATEGORY, `Worker ${i} created successfully`);
56+
} catch (error) {
57+
this.logger.error(LOG_SOURCE, LOG_CATEGORY, `Failed to create worker ${i}:`, error);
58+
}
59+
}
60+
}
61+
62+
/**
63+
* Handle messages from workers
64+
*/
65+
private handleWorkerMessage(event: MessageEvent<EncodeImageResponse>) {
66+
const response = event.data;
67+
const task = this.pendingTasks.get(response.id);
68+
69+
if (!task) {
70+
this.logger.warn(
71+
LOG_SOURCE,
72+
LOG_CATEGORY,
73+
`Received response for unknown task: ${response.id}`,
74+
);
75+
return;
76+
}
77+
78+
this.pendingTasks.delete(response.id);
79+
80+
if (response.type === 'result') {
81+
task.resolve(response.data as Blob);
82+
} else {
83+
const errorData = response.data as { message: string };
84+
task.reject(new Error(errorData.message));
85+
}
86+
}
87+
88+
/**
89+
* Handle worker errors
90+
*/
91+
private handleWorkerError(error: ErrorEvent) {
92+
this.logger.error(LOG_SOURCE, LOG_CATEGORY, 'Worker error:', error.message);
93+
}
94+
95+
/**
96+
* Get the next available worker using round-robin
97+
*/
98+
private getNextWorker(): Worker | null {
99+
if (this.workers.length === 0) {
100+
return null;
101+
}
102+
103+
const worker = this.workers[this.nextWorkerId];
104+
this.nextWorkerId = (this.nextWorkerId + 1) % this.workers.length;
105+
return worker;
106+
}
107+
108+
/**
109+
* Encode ImageData to Blob using a worker from the pool
110+
* @param imageData - Raw image data
111+
* @param imageType - Target image format
112+
* @param quality - Image quality (0-1) for lossy formats
113+
* @returns Promise that resolves to encoded Blob
114+
*/
115+
encode(
116+
imageData: { data: Uint8ClampedArray; width: number; height: number },
117+
imageType: 'image/png' | 'image/jpeg' | 'image/webp' = 'image/webp',
118+
quality?: number,
119+
): Promise<Blob> {
120+
return new Promise((resolve, reject) => {
121+
const worker = this.getNextWorker();
122+
123+
if (!worker) {
124+
reject(new Error('No workers available in the pool'));
125+
return;
126+
}
127+
128+
const requestId = `encode-${Date.now()}-${this.requestCounter++}`;
129+
this.pendingTasks.set(requestId, { resolve, reject });
130+
131+
const request: EncodeImageRequest = {
132+
id: requestId,
133+
type: 'encode',
134+
data: {
135+
imageData: {
136+
data: imageData.data,
137+
width: imageData.width,
138+
height: imageData.height,
139+
},
140+
imageType,
141+
quality,
142+
},
143+
};
144+
145+
this.logger.debug(
146+
LOG_SOURCE,
147+
LOG_CATEGORY,
148+
`Sending encoding request ${requestId} (${imageData.width}x${imageData.height})`,
149+
);
150+
151+
// Transfer the buffer for better performance
152+
worker.postMessage(request, [imageData.data.buffer]);
153+
});
154+
}
155+
156+
/**
157+
* Destroy all workers in the pool
158+
*/
159+
destroy() {
160+
this.logger.debug(LOG_SOURCE, LOG_CATEGORY, 'Destroying worker pool');
161+
162+
// Reject all pending tasks
163+
this.pendingTasks.forEach((task, id) => {
164+
task.reject(new Error('Worker pool destroyed'));
165+
this.logger.debug(LOG_SOURCE, LOG_CATEGORY, `Rejected pending task: ${id}`);
166+
});
167+
this.pendingTasks.clear();
168+
169+
// Terminate all workers
170+
this.workers.forEach((worker, index) => {
171+
worker.terminate();
172+
this.logger.debug(LOG_SOURCE, LOG_CATEGORY, `Worker ${index} terminated`);
173+
});
174+
this.workers = [];
175+
}
176+
177+
/**
178+
* Get the number of active workers in the pool
179+
*/
180+
get activeWorkers(): number {
181+
return this.workers.length;
182+
}
183+
184+
/**
185+
* Get the number of pending encoding tasks
186+
*/
187+
get pendingTasksCount(): number {
188+
return this.pendingTasks.size;
189+
}
190+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
/**
2+
* Orchestrator layer for PDF engines
3+
*
4+
* This module provides the "smart" orchestration layer that wraps
5+
* the "dumb" executor (PdfiumNative or RemoteExecutor) with:
6+
* - Priority-based task scheduling
7+
* - Visibility-aware task ranking
8+
* - Parallel image encoding
9+
* - Multi-page operation orchestration
10+
*
11+
* @packageDocumentation
12+
*/
13+
14+
export * from './task-queue';
15+
export * from './pdf-engine';
16+
export * from './remote-executor';
17+
export * from './pdfium-native-runner';

0 commit comments

Comments
 (0)