Skip to content

Commit 2af550d

Browse files
author
Andy Weiss
committed
zlib: add support for brotli compression dictionary
This change adds JS API support for custom compression dictionaries with Brotli in the zlib library. The underlying Brotli dependency already supports this and zstd exposes something similar. This follows the zstd approach for using a custom dictionary but for Brotli. Fixes: #52250
1 parent 4dc0d20 commit 2af550d

3 files changed

Lines changed: 168 additions & 11 deletions

File tree

lib/zlib.js

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -834,7 +834,12 @@ function Brotli(opts, mode) {
834834
new binding.BrotliDecoder(mode) : new binding.BrotliEncoder(mode);
835835

836836
this._writeState = new Uint32Array(2);
837-
handle.init(brotliInitParamsArray, this._writeState, processCallback);
837+
handle.init(
838+
brotliInitParamsArray,
839+
this._writeState,
840+
processCallback,
841+
opts?.dictionary && isArrayBufferView(opts.dictionary) ? opts.dictionary : undefined,
842+
);
838843

839844
ZlibBase.call(this, opts, mode, handle, brotliDefaultOpts);
840845
}

src/node_zlib.cc

Lines changed: 90 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040

4141
#include "brotli/decode.h"
4242
#include "brotli/encode.h"
43+
#include "brotli/shared_dictionary.h"
4344
#include "zlib.h"
4445
#include "zstd.h"
4546
#include "zstd_errors.h"
@@ -256,7 +257,7 @@ class BrotliEncoderContext final : public BrotliContext {
256257
public:
257258
void Close();
258259
void DoThreadPoolWork();
259-
CompressionError Init();
260+
CompressionError Init(std::string_view dictionary = {});
260261
CompressionError ResetStream();
261262
CompressionError SetParams(int key, uint32_t value);
262263
CompressionError GetErrorInfo() const;
@@ -268,13 +269,17 @@ class BrotliEncoderContext final : public BrotliContext {
268269
private:
269270
bool last_result_ = false;
270271
DeleteFnPtr<BrotliEncoderState, BrotliEncoderDestroyInstance> state_;
272+
DeleteFnPtr<BrotliEncoderPreparedDictionary,
273+
BrotliEncoderDestroyPreparedDictionary> prepared_dictionary_;
274+
// Dictionary data must remain valid while the prepared dictionary is alive.
275+
std::vector<uint8_t> dictionary_;
271276
};
272277

273278
class BrotliDecoderContext final : public BrotliContext {
274279
public:
275280
void Close();
276281
void DoThreadPoolWork();
277-
CompressionError Init();
282+
CompressionError Init(std::string_view dictionary = {});
278283
CompressionError ResetStream();
279284
CompressionError SetParams(int key, uint32_t value);
280285
CompressionError GetErrorInfo() const;
@@ -288,6 +293,8 @@ class BrotliDecoderContext final : public BrotliContext {
288293
BrotliDecoderErrorCode error_ = BROTLI_DECODER_NO_ERROR;
289294
std::string error_string_;
290295
DeleteFnPtr<BrotliDecoderState, BrotliDecoderDestroyInstance> state_;
296+
// Dictionary data must remain valid for the lifetime of the decoder.
297+
std::vector<uint8_t> dictionary_;
291298
};
292299

293300
class ZstdContext : public MemoryRetainer {
@@ -830,7 +837,8 @@ class BrotliCompressionStream final :
830837
static void Init(const FunctionCallbackInfo<Value>& args) {
831838
BrotliCompressionStream* wrap;
832839
ASSIGN_OR_RETURN_UNWRAP(&wrap, args.This());
833-
CHECK(args.Length() == 3 && "init(params, writeResult, writeCallback)");
840+
CHECK((args.Length() == 3 || args.Length() == 4) &&
841+
"init(params, writeResult, writeCallback[, dictionary])");
834842

835843
CHECK(args[1]->IsUint32Array());
836844
CHECK_GE(args[1].As<Uint32Array>()->Length(), 2);
@@ -841,7 +849,19 @@ class BrotliCompressionStream final :
841849
wrap->InitStream(write_result, write_js_callback);
842850

843851
AllocScope alloc_scope(wrap);
844-
CompressionError err = wrap->context()->Init();
852+
std::string_view dictionary;
853+
ArrayBufferViewContents<char> contents;
854+
if (args.Length() == 4 && !args[3]->IsUndefined()) {
855+
if (!args[3]->IsArrayBufferView()) {
856+
THROW_ERR_INVALID_ARG_TYPE(
857+
wrap->env(), "dictionary must be an ArrayBufferView if provided");
858+
return;
859+
}
860+
contents.ReadValue(args[3]);
861+
dictionary = std::string_view(contents.data(), contents.length());
862+
}
863+
864+
CompressionError err = wrap->context()->Init(dictionary);
845865
if (err.IsError()) {
846866
wrap->EmitError(err);
847867
// TODO(addaleax): Sometimes we generate better error codes in C++ land,
@@ -1387,23 +1407,60 @@ void BrotliEncoderContext::DoThreadPoolWork() {
13871407

13881408
void BrotliEncoderContext::Close() {
13891409
state_.reset();
1410+
prepared_dictionary_.reset();
1411+
dictionary_.clear();
13901412
mode_ = NONE;
13911413
}
13921414

1393-
CompressionError BrotliEncoderContext::Init() {
1415+
CompressionError BrotliEncoderContext::Init(std::string_view dictionary) {
13941416
brotli_alloc_func alloc = CompressionStreamMemoryOwner::AllocForBrotli;
13951417
brotli_free_func free = CompressionStreamMemoryOwner::FreeForZlib;
13961418
void* opaque =
13971419
CompressionStream<BrotliEncoderContext>::AllocatorOpaquePointerForContext(
13981420
this);
1421+
1422+
// Clean up any previous dictionary state before re-initializing.
1423+
prepared_dictionary_.reset();
1424+
dictionary_.clear();
1425+
13991426
state_.reset(BrotliEncoderCreateInstance(alloc, free, opaque));
14001427
if (!state_) {
14011428
return CompressionError("Could not initialize Brotli instance",
14021429
"ERR_ZLIB_INITIALIZATION_FAILED",
14031430
-1);
1404-
} else {
1405-
return CompressionError {};
14061431
}
1432+
1433+
if (!dictionary.empty()) {
1434+
// The dictionary data must remain valid for the lifetime of the prepared
1435+
// dictionary, so copy it into a member vector.
1436+
dictionary_.assign(
1437+
reinterpret_cast<const uint8_t*>(dictionary.data()),
1438+
reinterpret_cast<const uint8_t*>(dictionary.data()) +
1439+
dictionary.size());
1440+
1441+
prepared_dictionary_.reset(BrotliEncoderPrepareDictionary(
1442+
BROTLI_SHARED_DICTIONARY_RAW,
1443+
dictionary_.size(),
1444+
dictionary_.data(),
1445+
BROTLI_MAX_QUALITY,
1446+
alloc,
1447+
free,
1448+
opaque));
1449+
if (!prepared_dictionary_) {
1450+
return CompressionError("Failed to prepare brotli dictionary",
1451+
"ERR_ZLIB_DICTIONARY_LOAD_FAILED",
1452+
-1);
1453+
}
1454+
1455+
if (!BrotliEncoderAttachPreparedDictionary(state_.get(),
1456+
prepared_dictionary_.get())) {
1457+
return CompressionError("Failed to attach brotli dictionary",
1458+
"ERR_ZLIB_DICTIONARY_LOAD_FAILED",
1459+
-1);
1460+
}
1461+
}
1462+
1463+
return CompressionError {};
14071464
}
14081465

14091466
CompressionError BrotliEncoderContext::ResetStream() {
@@ -1435,6 +1492,7 @@ CompressionError BrotliEncoderContext::GetErrorInfo() const {
14351492

14361493
void BrotliDecoderContext::Close() {
14371494
state_.reset();
1495+
dictionary_.clear();
14381496
mode_ = NONE;
14391497
}
14401498

@@ -1455,20 +1513,42 @@ void BrotliDecoderContext::DoThreadPoolWork() {
14551513
}
14561514
}
14571515

1458-
CompressionError BrotliDecoderContext::Init() {
1516+
CompressionError BrotliDecoderContext::Init(std::string_view dictionary) {
14591517
brotli_alloc_func alloc = CompressionStreamMemoryOwner::AllocForBrotli;
14601518
brotli_free_func free = CompressionStreamMemoryOwner::FreeForZlib;
14611519
void* opaque =
14621520
CompressionStream<BrotliDecoderContext>::AllocatorOpaquePointerForContext(
14631521
this);
1522+
1523+
// Clean up any previous dictionary state before re-initializing.
1524+
dictionary_.clear();
1525+
14641526
state_.reset(BrotliDecoderCreateInstance(alloc, free, opaque));
14651527
if (!state_) {
14661528
return CompressionError("Could not initialize Brotli instance",
14671529
"ERR_ZLIB_INITIALIZATION_FAILED",
14681530
-1);
1469-
} else {
1470-
return CompressionError {};
14711531
}
1532+
1533+
if (!dictionary.empty()) {
1534+
// The dictionary data must remain valid for the lifetime of the decoder,
1535+
// so copy it into a member vector.
1536+
dictionary_.assign(
1537+
reinterpret_cast<const uint8_t*>(dictionary.data()),
1538+
reinterpret_cast<const uint8_t*>(dictionary.data()) +
1539+
dictionary.size());
1540+
1541+
if (!BrotliDecoderAttachDictionary(state_.get(),
1542+
BROTLI_SHARED_DICTIONARY_RAW,
1543+
dictionary_.size(),
1544+
dictionary_.data())) {
1545+
return CompressionError("Failed to attach brotli dictionary",
1546+
"ERR_ZLIB_DICTIONARY_LOAD_FAILED",
1547+
-1);
1548+
}
1549+
}
1550+
1551+
return CompressionError {};
14721552
}
14731553

14741554
CompressionError BrotliDecoderContext::ResetStream() {
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
'use strict';
2+
3+
const common = require('../common');
4+
const assert = require('assert');
5+
const zlib = require('zlib');
6+
7+
const dictionary = Buffer.from(
8+
`Lorem ipsum dolor sit amet, consectetur adipiscing elit.
9+
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
10+
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.`
11+
);
12+
13+
const input = Buffer.from(
14+
`Lorem ipsum dolor sit amet, consectetur adipiscing elit.
15+
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
16+
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
17+
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
18+
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.`
19+
);
20+
21+
// Test with convenience methods (async).
22+
zlib.brotliCompress(input, { dictionary }, common.mustSucceed((compressed) => {
23+
assert(compressed.length < input.length,
24+
'compressed data should be smaller with dictionary');
25+
zlib.brotliDecompress(compressed, { dictionary }, common.mustSucceed((decompressed) => {
26+
assert.strictEqual(decompressed.toString(), input.toString());
27+
}));
28+
}));
29+
30+
// Test with streaming API.
31+
{
32+
const encoder = zlib.createBrotliCompress({ dictionary });
33+
const decoder = zlib.createBrotliDecompress({ dictionary });
34+
35+
const chunks = [];
36+
decoder.on('data', (chunk) => chunks.push(chunk));
37+
decoder.on('end', common.mustCall(() => {
38+
const result = Buffer.concat(chunks);
39+
assert.strictEqual(result.toString(), input.toString());
40+
}));
41+
42+
encoder.pipe(decoder);
43+
encoder.end(input);
44+
}
45+
46+
// Test that dictionary improves compression ratio.
47+
{
48+
const withDict = zlib.brotliCompressSync(input, { dictionary });
49+
const withoutDict = zlib.brotliCompressSync(input);
50+
51+
// Dictionary-based compression should be at least as good as without.
52+
assert(withDict.length <= withoutDict.length,
53+
`Dictionary compression (${withDict.length}) should not be ` +
54+
`larger than non-dictionary compression (${withoutDict.length})`);
55+
56+
// Verify decompression with dictionary works.
57+
const decompressed = zlib.brotliDecompressSync(withDict, { dictionary });
58+
assert.strictEqual(decompressed.toString(), input.toString());
59+
}
60+
61+
// Test that decompression without matching dictionary fails.
62+
{
63+
const compressed = zlib.brotliCompressSync(input, { dictionary });
64+
assert.throws(() => {
65+
zlib.brotliDecompressSync(compressed);
66+
}, (err) => {
67+
// The exact error may vary, but decoding should fail without the
68+
// matching dictionary.
69+
return err.code === 'ERR_BROTLI_COMPRESSION_FAILED' ||
70+
err instanceof Error;
71+
});
72+
}

0 commit comments

Comments
 (0)