Skip to content

Commit 08626d8

Browse files
authored
Add a setting for writing PDF files in a more compact way (#72)
1 parent 1d2ed46 commit 08626d8

7 files changed

Lines changed: 418 additions & 71 deletions

File tree

src/annotations.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -796,10 +796,13 @@ mod tests {
796796
#[test]
797797
fn test_annotations() {
798798
test!(
799-
crate::tests::slice(|w| {
800-
w.annotation(Ref::new(1)).rect(Rect::new(0.0, 0.0, 1.0, 1.0));
801-
w.annotation(Ref::new(2)).rect(Rect::new(1.0, 1.0, 0.0, 0.0));
802-
}),
799+
crate::tests::slice(
800+
|w| {
801+
w.annotation(Ref::new(1)).rect(Rect::new(0.0, 0.0, 1.0, 1.0));
802+
w.annotation(Ref::new(2)).rect(Rect::new(1.0, 1.0, 0.0, 0.0));
803+
},
804+
Settings::default()
805+
),
803806
b"1 0 obj",
804807
b"<<",
805808
b" /Type /Annot",

src/chunk.rs

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,25 @@
11
use super::*;
22

3+
/// Settings that should be applied while writing a PDF file.
4+
#[derive(Debug, Clone, Copy)]
5+
pub struct Settings {
6+
/// Whether to enable pretty-writing. In this case, `pdf-writer` will
7+
/// serialize PDFs in such a way that they are easier to read by humans by
8+
/// applying more padding and indentation, at the cost of larger file sizes.
9+
/// If disabled, `pdf-writer` will serialize objects as compactly as
10+
/// possible, leading to better file sizes but making it harder to inspect
11+
/// the file manually.
12+
///
13+
/// _Default value_: `true`.
14+
pub pretty: bool,
15+
}
16+
17+
impl Default for Settings {
18+
fn default() -> Self {
19+
Self { pretty: true }
20+
}
21+
}
22+
323
/// A builder for a collection of indirect PDF objects.
424
///
525
/// This type holds written top-level indirect PDF objects. Typically, you won't
@@ -14,18 +34,37 @@ use super::*;
1434
pub struct Chunk {
1535
pub(crate) buf: Buf,
1636
pub(crate) offsets: Vec<(Ref, usize)>,
37+
pub(crate) settings: Settings,
1738
}
1839

1940
impl Chunk {
20-
/// Create a new chunk with the default capacity (currently 1 KB).
41+
/// Create a new chunk with the default settings and buffer capacity
42+
/// (currently 1 KB).
2143
#[allow(clippy::new_without_default)]
2244
pub fn new() -> Self {
23-
Self::with_capacity(1024)
45+
Self::with_settings(Settings::default())
2446
}
2547

26-
/// Create a new chunk with the specified initial capacity.
48+
/// Create a new chunk with the given settings and the default buffer
49+
/// capacity (currently 1 KB).
50+
pub fn with_settings(settings: Settings) -> Self {
51+
Self::with_settings_and_capacity(settings, 1204)
52+
}
53+
54+
/// Create a new chunk with the default settings and the specified initial
55+
/// capacity.
2756
pub fn with_capacity(capacity: usize) -> Self {
28-
Self { buf: Buf::with_capacity(capacity), offsets: vec![] }
57+
Self::with_settings_and_capacity(Settings::default(), capacity)
58+
}
59+
60+
/// Create a new chunk with the given settings and the specified initial
61+
/// buffer capacity.
62+
pub fn with_settings_and_capacity(settings: Settings, capacity: usize) -> Self {
63+
Self {
64+
buf: Buf::with_capacity(capacity),
65+
offsets: vec![],
66+
settings,
67+
}
2968
}
3069

3170
/// The number of bytes that were written so far.
@@ -35,6 +74,11 @@ impl Chunk {
3574
self.buf.len()
3675
}
3776

77+
/// Reserve an additional number of bytes in the buffer.
78+
pub fn reserve(&mut self, additional: usize) {
79+
self.buf.reserve(additional);
80+
}
81+
3882
/// The bytes already written so far.
3983
pub fn as_bytes(&self) -> &[u8] {
4084
self.buf.as_slice()
@@ -148,7 +192,7 @@ impl Chunk {
148192
/// Start writing an indirectly referenceable object.
149193
pub fn indirect(&mut self, id: Ref) -> Obj<'_> {
150194
self.offsets.push((id, self.buf.len()));
151-
Obj::indirect(&mut self.buf, id)
195+
Obj::indirect(&mut self.buf, id, self.settings)
152196
}
153197

154198
/// Start writing an indirectly referenceable stream.

src/content.rs

Lines changed: 89 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
use super::*;
2-
use crate::object::TextStrLike;
2+
use crate::chunk::Settings;
3+
use crate::object::{is_delimiter_character, TextStrLike};
34

45
/// A builder for a content stream.
56
pub struct Content {
67
buf: Buf,
8+
settings: Settings,
79
q_depth: usize,
810
}
911

@@ -16,15 +18,26 @@ impl Content {
1618
Self::with_capacity(1024)
1719
}
1820

21+
/// Create a new content stream with the given settings.
22+
pub fn with_settings(settings: Settings) -> Self {
23+
let mut content = Self::new();
24+
content.settings = settings;
25+
content
26+
}
27+
1928
/// Create a new content stream with the specified initial buffer capacity.
2029
pub fn with_capacity(capacity: usize) -> Self {
21-
Self { buf: Buf::with_capacity(capacity), q_depth: 0 }
30+
Self {
31+
buf: Buf::with_capacity(capacity),
32+
q_depth: 0,
33+
settings: Default::default(),
34+
}
2235
}
2336

2437
/// Start writing an arbitrary operation.
2538
#[inline]
2639
pub fn op<'a>(&'a mut self, operator: &'a str) -> Operation<'a> {
27-
Operation::start(&mut self.buf, operator)
40+
Operation::start(&mut self.buf, operator, self.settings)
2841
}
2942

3043
/// Return the buffer of the content stream.
@@ -51,12 +64,13 @@ pub struct Operation<'a> {
5164
buf: &'a mut Buf,
5265
op: &'a str,
5366
first: bool,
67+
settings: Settings,
5468
}
5569

5670
impl<'a> Operation<'a> {
5771
#[inline]
58-
pub(crate) fn start(buf: &'a mut Buf, op: &'a str) -> Self {
59-
Self { buf, op, first: true }
72+
pub(crate) fn start(buf: &'a mut Buf, op: &'a str, settings: Settings) -> Self {
73+
Self { buf, op, first: true, settings }
6074
}
6175

6276
/// Write a primitive operand.
@@ -79,25 +93,47 @@ impl<'a> Operation<'a> {
7993
self
8094
}
8195

82-
/// Start writing an an arbitrary object operand.
96+
/// Start writing an arbitrary object operand.
8397
#[inline]
8498
pub fn obj(&mut self) -> Obj<'_> {
85-
if !self.first {
86-
self.buf.push(b' ');
87-
}
99+
// In case we are writing the first object, we want a newline to
100+
// separate it from previous operations (looks nicer). Otherwise, a
101+
// space is sufficient.
102+
let pad_byte = if self.first { b'\n' } else { b' ' };
103+
104+
// Similarly to how chunks are handled, we always add padding when
105+
// pretty-writing is enabled, and only lazily add padding depending on
106+
// whether it's really necessary if not.
107+
let needs_padding = if self.settings.pretty {
108+
if !self.buf.is_empty() {
109+
self.buf.push(pad_byte);
110+
}
111+
112+
false
113+
} else {
114+
true
115+
};
116+
88117
self.first = false;
89-
Obj::direct(self.buf, 0)
118+
Obj::direct(self.buf, 0, self.settings, needs_padding)
90119
}
91120
}
92121

93122
impl Drop for Operation<'_> {
94123
#[inline]
95124
fn drop(&mut self) {
96-
if !self.first {
97-
self.buf.push(b' ');
125+
let pad_byte = if self.first { b'\n' } else { b' ' };
126+
127+
// For example, in case we previously wrote a BT operator and then a
128+
// `[]` operand in the next operation, we don't need to pad them.
129+
if (self.settings.pretty
130+
|| self.buf.last().is_some_and(|b| !is_delimiter_character(*b)))
131+
&& !self.buf.is_empty()
132+
{
133+
self.buf.push(pad_byte);
98134
}
135+
99136
self.buf.extend(self.op.as_bytes());
100-
self.buf.push(b'\n');
101137
}
102138
}
103139

@@ -1708,4 +1744,44 @@ mod tests {
17081744
b"/F1 12 Tf\nBT\n[] TJ\n[(AB) 2 (CD)] TJ\nET"
17091745
);
17101746
}
1747+
1748+
#[test]
1749+
fn test_content_array_no_pretty() {
1750+
let mut content = Content::with_settings(Settings { pretty: false });
1751+
1752+
content.set_font(Name(b"F1"), 12.0);
1753+
content.set_font(Name(b"F2"), 15.0);
1754+
content.begin_text();
1755+
content.show_positioned().items();
1756+
content
1757+
.show_positioned()
1758+
.items()
1759+
.show(Str(b"AB"))
1760+
.adjust(2.0)
1761+
.show(Str(b"CD"))
1762+
.adjust(4.0)
1763+
.show(Str(b"EF"));
1764+
content.end_text();
1765+
1766+
assert_eq!(
1767+
content.finish().into_vec(),
1768+
b"/F1 12 Tf/F2 15 Tf\nBT[]TJ[(AB)2(CD)4(EF)]TJ\nET"
1769+
);
1770+
}
1771+
1772+
#[test]
1773+
fn test_content_dict_no_pretty() {
1774+
let mut content = Content::with_settings(Settings { pretty: false });
1775+
1776+
let mut mc = content.begin_marked_content_with_properties(Name(b"Test"));
1777+
let mut properties = mc.properties();
1778+
properties.actual_text(TextStr("Actual")).identify(1);
1779+
properties.artifact().kind(ArtifactType::Background);
1780+
mc.finish();
1781+
1782+
assert_eq!(
1783+
content.finish().into_vec(),
1784+
b"/Test<</ActualText(Actual)/MCID 1/Type/Background>>BDC"
1785+
);
1786+
}
17111787
}

src/lib.rs

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ pub mod types {
191191
}
192192

193193
pub use self::buf::{Buf, Limits};
194-
pub use self::chunk::Chunk;
194+
pub use self::chunk::{Chunk, Settings};
195195
pub use self::content::Content;
196196
pub use self::object::{
197197
Array, Date, Dict, Filter, Finish, LanguageIdentifier, Name, Null, Obj, Primitive,
@@ -221,15 +221,29 @@ pub struct Pdf {
221221
}
222222

223223
impl Pdf {
224-
/// Create a new PDF with the default buffer capacity (currently 8 KB).
224+
/// Create a new PDF with the default settings and buffer capacity
225+
/// (currently 8 KB).
225226
#[allow(clippy::new_without_default)]
226227
pub fn new() -> Self {
227-
Self::with_capacity(8 * 1024)
228+
Self::with_settings(Settings::default())
228229
}
229230

230-
/// Create a new PDF with the specified initial buffer capacity.
231+
/// Create a new PDF with the given settings and the default buffer capacity
232+
/// (currently 8 KB).
233+
pub fn with_settings(settings: Settings) -> Self {
234+
Self::with_settings_and_capacity(settings, 8 * 1024)
235+
}
236+
237+
/// Create a new PDF with the default settings and the specified initial
238+
/// buffer capacity.
231239
pub fn with_capacity(capacity: usize) -> Self {
232-
let mut chunk = Chunk::with_capacity(capacity);
240+
Self::with_settings_and_capacity(Settings::default(), capacity)
241+
}
242+
243+
/// Create a new PDF with the given settings and the specified initial
244+
/// buffer capacity.
245+
pub fn with_settings_and_capacity(settings: Settings, capacity: usize) -> Self {
246+
let mut chunk = Chunk::with_settings_and_capacity(settings, capacity);
233247
chunk.buf.extend(b"%PDF-1.7\n%\x80\x80\x80\x80\n\n");
234248
Self {
235249
chunk,
@@ -298,7 +312,7 @@ impl Pdf {
298312
///
299313
/// Panics if any indirect reference id was used twice.
300314
pub fn finish(self) -> Vec<u8> {
301-
let Chunk { mut buf, mut offsets } = self.chunk;
315+
let Chunk { mut buf, mut offsets, settings } = self.chunk;
302316

303317
offsets.sort();
304318

@@ -346,7 +360,7 @@ impl Pdf {
346360
// Write the trailer dictionary.
347361
buf.extend(b"trailer\n");
348362

349-
let mut trailer = Obj::direct(&mut buf, 0).dict();
363+
let mut trailer = Obj::direct(&mut buf, 0, settings, false).dict();
350364
trailer.pair(Name(b"Size"), xref_len);
351365

352366
if let Some(catalog_id) = self.catalog_id {
@@ -412,11 +426,11 @@ mod tests {
412426
}
413427

414428
/// Return the slice of bytes written during the execution of `f`.
415-
pub fn slice<F>(f: F) -> Vec<u8>
429+
pub fn slice<F>(f: F, settings: Settings) -> Vec<u8>
416430
where
417431
F: FnOnce(&mut Pdf),
418432
{
419-
let mut w = Pdf::new();
433+
let mut w = Pdf::with_settings(settings);
420434
let start = w.len();
421435
f(&mut w);
422436
let end = w.len();
@@ -425,12 +439,16 @@ mod tests {
425439
}
426440

427441
/// Return the slice of bytes written for an object.
428-
pub fn slice_obj<F>(f: F) -> Vec<u8>
442+
pub fn slice_obj<F>(f: F, settings: Settings) -> Vec<u8>
429443
where
430444
F: FnOnce(Obj<'_>),
431445
{
432-
let buf = slice(|w| f(w.indirect(Ref::new(1))));
433-
buf[8..buf.len() - 9].to_vec()
446+
let buf = slice(|w| f(w.indirect(Ref::new(1))), settings);
447+
if settings.pretty {
448+
buf[8..buf.len() - 9].to_vec()
449+
} else {
450+
buf[8..buf.len() - 8].to_vec()
451+
}
434452
}
435453

436454
#[test]

src/macros.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,15 @@ macro_rules! test {
1919
#[cfg(test)]
2020
macro_rules! test_obj {
2121
(|$obj:ident| $write:expr, $($tts:tt)*) => {{
22-
test!(crate::tests::slice_obj(|$obj| { $write; }), $($tts)*)
22+
test!(crate::tests::slice_obj(|$obj| { $write; }, crate::Settings::default()), $($tts)*)
23+
}}
24+
}
25+
26+
/// Test how an object is written, without pretty-printing.
27+
#[cfg(test)]
28+
macro_rules! test_obj_no_pretty {
29+
(|$obj:ident| $write:expr, $($tts:tt)*) => {{
30+
test!(crate::tests::slice_obj(|$obj| { $write; }, crate::Settings { pretty: false }), $($tts)*)
2331
}}
2432
}
2533

0 commit comments

Comments
 (0)