Skip to content

Commit 4a6d8d2

Browse files
committed
Add support for writing xref streams
1 parent 1d2ed46 commit 4a6d8d2

1 file changed

Lines changed: 209 additions & 67 deletions

File tree

src/lib.rs

Lines changed: 209 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -215,9 +215,7 @@ use self::writers::*;
215215
/// type dereferences to.
216216
pub struct Pdf {
217217
chunk: Chunk,
218-
catalog_id: Option<Ref>,
219-
info_id: Option<Ref>,
220-
file_id: Option<(Vec<u8>, Vec<u8>)>,
218+
trailer_data: TrailerData,
221219
}
222220

223221
impl Pdf {
@@ -231,12 +229,7 @@ impl Pdf {
231229
pub fn with_capacity(capacity: usize) -> Self {
232230
let mut chunk = Chunk::with_capacity(capacity);
233231
chunk.buf.extend(b"%PDF-1.7\n%\x80\x80\x80\x80\n\n");
234-
Self {
235-
chunk,
236-
catalog_id: None,
237-
info_id: None,
238-
file_id: None,
239-
}
232+
Self { chunk, trailer_data: TrailerData::default() }
240233
}
241234

242235
/// Set the binary marker in the header of the PDF.
@@ -271,15 +264,15 @@ impl Pdf {
271264
/// the same for a document, the second should change for each revision. It
272265
/// is optional, but recommended. In PDF/A, this is required. PDF 1.1+.
273266
pub fn set_file_id(&mut self, id: (Vec<u8>, Vec<u8>)) {
274-
self.file_id = Some(id);
267+
self.trailer_data.file_id = Some(id);
275268
}
276269

277270
/// Start writing the document catalog. Required.
278271
///
279272
/// This will also register the document catalog with the file trailer,
280273
/// meaning that you don't need to provide the given `id` anywhere else.
281274
pub fn catalog(&mut self, id: Ref) -> Catalog<'_> {
282-
self.catalog_id = Some(id);
275+
self.trailer_data.catalog_id = Some(id);
283276
self.indirect(id).start()
284277
}
285278

@@ -289,7 +282,7 @@ impl Pdf {
289282
/// file trailer, meaning that you don't need to provide the given `id`
290283
/// anywhere else.
291284
pub fn document_info(&mut self, id: Ref) -> DocumentInfo<'_> {
292-
self.info_id = Some(id);
285+
self.trailer_data.info_id = Some(id);
293286
self.indirect(id).start()
294287
}
295288

@@ -298,81 +291,120 @@ impl Pdf {
298291
///
299292
/// Panics if any indirect reference id was used twice.
300293
pub fn finish(self) -> Vec<u8> {
301-
let Chunk { mut buf, mut offsets } = self.chunk;
294+
let Chunk { mut buf, offsets } = self.chunk;
295+
let trailer_data = self.trailer_data;
296+
let xref_offset = buf.len();
297+
298+
let mut writer = PlainXRefWriter::new(&mut buf);
299+
let xref_len = write_offsets(offsets, &mut writer);
302300

303-
offsets.sort();
301+
// Write the trailer dictionary.
302+
buf.extend(b"trailer\n");
303+
let mut trailer = Obj::direct(&mut buf, 0).dict();
304+
trailer_data.write_into_dict(&mut trailer, xref_len);
305+
trailer.finish();
306+
307+
finish_trailer(buf, xref_offset)
308+
}
304309

305-
let xref_len = 1 + offsets.last().map_or(0, |p| p.0.get());
310+
/// TODO
311+
pub fn finish_with_xref_stream(
312+
self,
313+
xref_id: Ref,
314+
hook: Option<Box<dyn FnOnce(&[u8]) -> (Vec<u8>, Filter)>>,
315+
) -> Vec<u8> {
316+
let Chunk { mut buf, mut offsets } = self.chunk;
317+
let trailer_data = self.trailer_data;
318+
319+
// Include the reference of the xref stream in the offsets as well!
306320
let xref_offset = buf.len();
321+
offsets.push((xref_id, xref_offset));
322+
let field_width = determine_field_width(xref_offset);
323+
324+
let mut writer = XRefStreamWriter::new(field_width);
325+
let xref_len = write_offsets(offsets, &mut writer);
307326

308-
buf.extend(b"xref\n0 ");
309-
buf.push_int(xref_len);
310-
buf.push(b'\n');
327+
let (xref_data, filter) = if let Some(hook) = hook {
328+
let (xref_data, filter) = hook(&writer.buf);
329+
(xref_data, Some(filter))
330+
} else {
331+
(writer.buf, None)
332+
};
311333

312-
if offsets.is_empty() {
313-
write!(buf.inner, "0000000000 65535 f\r\n").unwrap();
334+
let mut stream = Stream::start(Obj::indirect(&mut buf, xref_id), &xref_data);
335+
336+
stream.pair(Name(b"Type"), Name(b"XRef"));
337+
338+
if let Some(filter) = filter {
339+
stream.filter(filter);
314340
}
315341

316-
let mut written = 0;
317-
for (i, (object_id, offset)) in offsets.iter().enumerate() {
318-
if written > object_id.get() {
319-
panic!("duplicate indirect reference id: {}", object_id.get());
320-
}
342+
trailer_data.write_into_dict(stream.deref_mut(), xref_len);
321343

322-
// Fill in free list.
323-
let start = written;
324-
for free_id in start..object_id.get() {
325-
let mut next = free_id + 1;
326-
if next == object_id.get() {
327-
// Find next free id.
328-
for (used_id, _) in &offsets[i..] {
329-
if next < used_id.get() {
330-
break;
331-
} else {
332-
next = used_id.get() + 1;
333-
}
334-
}
335-
}
344+
stream
345+
.insert(Name(b"W"))
346+
.array()
347+
.item(1)
348+
.item(field_width as i32)
349+
.item(2);
336350

337-
let gen = if free_id == 0 { "65535" } else { "00000" };
338-
write!(buf.inner, "{:010} {} f\r\n", next % xref_len, gen).unwrap();
339-
written += 1;
340-
}
351+
stream.finish();
341352

342-
write!(buf.inner, "{offset:010} 00000 n\r\n").unwrap();
343-
written += 1;
344-
}
353+
finish_trailer(buf, xref_offset)
354+
}
355+
}
345356

346-
// Write the trailer dictionary.
347-
buf.extend(b"trailer\n");
357+
fn finish_trailer(mut buf: Buf, xref_offset: usize) -> Vec<u8> {
358+
// Write startxref pointing to the xref stream
359+
buf.extend(b"\nstartxref\n");
360+
write!(buf.inner, "{}", xref_offset).unwrap();
348361

349-
let mut trailer = Obj::direct(&mut buf, 0).dict();
350-
trailer.pair(Name(b"Size"), xref_len);
362+
// Write EOF marker
363+
buf.extend(b"\n%%EOF");
364+
buf.into_vec()
365+
}
351366

352-
if let Some(catalog_id) = self.catalog_id {
353-
trailer.pair(Name(b"Root"), catalog_id);
354-
}
367+
fn write_offsets(mut offsets: Vec<(Ref, usize)>, writer: &mut impl XRefWriter) -> i32 {
368+
offsets.sort();
355369

356-
if let Some(info_id) = self.info_id {
357-
trailer.pair(Name(b"Info"), info_id);
358-
}
370+
let xref_len = 1 + offsets.last().map_or(0, |p| p.0.get());
371+
writer.prologue(xref_len);
359372

360-
if let Some(file_id) = self.file_id {
361-
let mut ids = trailer.insert(Name(b"ID")).array();
362-
ids.item(Str(&file_id.0));
363-
ids.item(Str(&file_id.1));
373+
if offsets.is_empty() {
374+
writer.write_free_entry(0, 65535);
375+
}
376+
377+
let mut written = 0;
378+
for (i, (object_id, offset)) in offsets.iter().enumerate() {
379+
if written > object_id.get() {
380+
panic!("duplicate indirect reference id: {}", object_id.get());
364381
}
365382

366-
trailer.finish();
383+
// Fill in free list.
384+
let start = written;
385+
for free_id in start..object_id.get() {
386+
let mut next = free_id + 1;
387+
if next == object_id.get() {
388+
// Find next free id.
389+
for (used_id, _) in &offsets[i..] {
390+
if next < used_id.get() {
391+
break;
392+
} else {
393+
next = used_id.get() + 1;
394+
}
395+
}
396+
}
367397

368-
// Write where the cross-reference table starts.
369-
buf.extend(b"\nstartxref\n");
370-
write!(buf.inner, "{xref_offset}").unwrap();
398+
let gen = if free_id == 0 { 65535 } else { 0 };
399+
writer.write_free_entry((next % xref_len) as usize, gen);
400+
written += 1;
401+
}
371402

372-
// Write the end of file marker.
373-
buf.extend(b"\n%%EOF");
374-
buf.into_vec()
403+
writer.write_occupied_entry(*offset, 0);
404+
written += 1;
375405
}
406+
407+
xref_len
376408
}
377409

378410
impl Debug for Pdf {
@@ -395,6 +427,106 @@ impl DerefMut for Pdf {
395427
}
396428
}
397429

430+
#[derive(Default)]
431+
struct TrailerData {
432+
catalog_id: Option<Ref>,
433+
info_id: Option<Ref>,
434+
file_id: Option<(Vec<u8>, Vec<u8>)>,
435+
}
436+
437+
impl TrailerData {
438+
fn write_into_dict(&self, dict: &mut Dict, xref_len: i32) {
439+
dict.pair(Name(b"Size"), xref_len);
440+
441+
if let Some(catalog_id) = self.catalog_id {
442+
dict.pair(Name(b"Root"), catalog_id);
443+
}
444+
445+
if let Some(info_id) = self.info_id {
446+
dict.pair(Name(b"Info"), info_id);
447+
}
448+
449+
if let Some(file_id) = &self.file_id {
450+
let mut ids = dict.insert(Name(b"ID")).array();
451+
ids.item(Str(&file_id.0));
452+
ids.item(Str(&file_id.1));
453+
}
454+
}
455+
}
456+
457+
trait XRefWriter {
458+
fn prologue(&mut self, xref_len: i32);
459+
fn write_free_entry(&mut self, offset: usize, gen_number: u16);
460+
fn write_occupied_entry(&mut self, offset: usize, gen_number: u16);
461+
}
462+
463+
struct XRefStreamWriter {
464+
buf: Vec<u8>,
465+
field_width: u32,
466+
}
467+
468+
impl XRefStreamWriter {
469+
fn new(field_width: u32) -> Self {
470+
Self { buf: Vec::new(), field_width }
471+
}
472+
}
473+
474+
impl XRefStreamWriter {
475+
fn write(&mut self, entry_type: u8, offset: usize, gen_number: u16) {
476+
let offset_bytes = (offset as u64).to_be_bytes();
477+
478+
self.buf.push(entry_type);
479+
self.buf.extend(
480+
offset_bytes
481+
.iter()
482+
.skip(offset_bytes.len() - self.field_width as usize),
483+
);
484+
self.buf.extend_from_slice(&gen_number.to_be_bytes());
485+
}
486+
}
487+
488+
impl XRefWriter for XRefStreamWriter {
489+
fn prologue(&mut self, _: i32) {}
490+
491+
fn write_free_entry(&mut self, offset: usize, gen_number: u16) {
492+
self.write(0, offset, gen_number);
493+
}
494+
495+
fn write_occupied_entry(&mut self, offset: usize, gen_number: u16) {
496+
self.write(1, offset, gen_number);
497+
}
498+
}
499+
500+
struct PlainXRefWriter<'a> {
501+
buf: &'a mut Buf,
502+
}
503+
504+
impl<'a> PlainXRefWriter<'a> {
505+
fn new(buf: &'a mut Buf) -> Self {
506+
Self { buf }
507+
}
508+
}
509+
510+
impl<'a> XRefWriter for PlainXRefWriter<'a> {
511+
fn prologue(&mut self, xref_len: i32) {
512+
self.buf.extend(b"xref\n0 ");
513+
self.buf.push_int(xref_len);
514+
self.buf.push(b'\n');
515+
}
516+
517+
fn write_free_entry(&mut self, offset: usize, gen_number: u16) {
518+
write!(self.buf.inner, "{offset:010} {gen_number:05} f\r\n").unwrap();
519+
}
520+
521+
fn write_occupied_entry(&mut self, offset: usize, gen_number: u16) {
522+
write!(self.buf.inner, "{offset:010} {gen_number:05} n\r\n").unwrap();
523+
}
524+
}
525+
526+
fn determine_field_width(offset: usize) -> u32 {
527+
(usize::BITS - offset.leading_zeros()).div_ceil(8)
528+
}
529+
398530
#[cfg(test)]
399531
mod tests {
400532
use super::*;
@@ -517,4 +649,14 @@ mod tests {
517649
b"startxref\n16\n%%EOF",
518650
);
519651
}
652+
653+
#[test]
654+
fn field_width() {
655+
assert_eq!(determine_field_width(128), 1);
656+
assert_eq!(determine_field_width(255), 1);
657+
assert_eq!(determine_field_width(256), 2);
658+
assert_eq!(determine_field_width(u16::MAX as usize), 2);
659+
assert_eq!(determine_field_width(u16::MAX as usize + 1), 3);
660+
assert_eq!(determine_field_width(u32::MAX as usize), 4);
661+
}
520662
}

0 commit comments

Comments
 (0)