Skip to content

Commit e99004d

Browse files
authored
Add language escaping for text strings (#65)
This commit adds language escape sequences for Text Strings (see Section 7.9.2.2 of ISO 32000-1:2008). Language escape sequences allow for text strings whose language differs from the document's `Lang` entry or contain multiple natural languages. Each text string that could be user-facing can now accept both types through the `TextStrLike` trait. The spec is a bit weird here. According to the PDF 2.0 spec, the escape characters should be encoded in UTF-16BE just like all other characters in the string. This is less clear for the language ID strings, but I assume the same encoding. This is inline with this [PDF Association blog post on multilingual alternative text](https://pdfa.org/two-languages-in-one-alt-try-that-with-html/). Anyways, nothing produces a good output in Acrobat's Document Information viewer where I tried it. Preview just ignores all bytes between two escape characters. And the [PDF Association article on Unicode text strings](https://pdfa.org/understanding-utf-8-in-pdf-2-0/) seems to have forgotten the trailing escape altogether, however, they claim that the feature is usable with `PDFDocEncoding` which it is not.
1 parent 2bd7f43 commit e99004d

11 files changed

Lines changed: 220 additions & 51 deletions

File tree

src/annotations.rs

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use super::*;
2+
use crate::object::TextStrLike;
23

34
/// Writer for an _annotation dictionary_.
45
///
@@ -30,7 +31,7 @@ impl Annotation<'_> {
3031

3132
/// Write the `/Contents` attribute. This is the content or alt-text,
3233
/// depending on the [`AnnotationType`].
33-
pub fn contents(&mut self, text: TextStr) -> &mut Self {
34+
pub fn contents(&mut self, text: impl TextStrLike) -> &mut Self {
3435
self.pair(Name(b"Contents"), text);
3536
self
3637
}
@@ -45,7 +46,7 @@ impl Annotation<'_> {
4546

4647
/// Write the `/NM` attribute. This uniquely identifies the annotation on the
4748
/// page. PDF 1.3+.
48-
pub fn name(&mut self, text: TextStr) -> &mut Self {
49+
pub fn name(&mut self, text: impl TextStrLike) -> &mut Self {
4950
self.pair(Name(b"NM"), text);
5051
self
5152
}
@@ -175,14 +176,14 @@ impl Annotation<'_> {
175176

176177
/// Write the `/T` attribute. This is in the title bar of markup annotations
177178
/// and should be the name of the annotation author. PDF 1.1+.
178-
pub fn author(&mut self, text: TextStr) -> &mut Self {
179+
pub fn author(&mut self, text: impl TextStrLike) -> &mut Self {
179180
self.pair(Name(b"T"), text);
180181
self
181182
}
182183

183184
/// Write the `/Subj` attribute. This is the subject of the annotation.
184185
/// PDF 1.5+.
185-
pub fn subject(&mut self, text: TextStr) -> &mut Self {
186+
pub fn subject(&mut self, text: impl TextStrLike) -> &mut Self {
186187
self.pair(Name(b"Subj"), text);
187188
self
188189
}
@@ -464,21 +465,25 @@ impl AppearanceCharacteristics<'_> {
464465

465466
/// Write the `/CA` attribute. This sets the widget annotation's normal
466467
/// caption. Only permissible for button fields.
467-
pub fn normal_caption(&mut self, caption: TextStr) -> &mut Self {
468+
pub fn normal_caption(&mut self, caption: impl TextStrLike) -> &mut Self {
468469
self.pair(Name(b"CA"), caption);
469470
self
470471
}
471472

472473
/// Write the `/RC` attribute. This sets the widget annotation's rollover
473474
/// (hover) caption. Only permissible for push button fields.
474-
pub fn rollover_caption(&mut self, caption: TextStr) -> &mut Self {
475+
///
476+
/// Note that this may be a Rich Text string depending on the annotation
477+
/// type, so you may be able to use some basic XHTML and XFA attributes.
478+
/// In these cases, untrusted input must be properly escaped.
479+
pub fn rollover_caption(&mut self, caption: impl TextStrLike) -> &mut Self {
475480
self.pair(Name(b"RC"), caption);
476481
self
477482
}
478483

479484
/// Write the `/AC` attribute. This sets the widget annotation's alternate
480485
/// (down) caption. Only permissible for push button fields.
481-
pub fn alternate_caption(&mut self, caption: TextStr) -> &mut Self {
486+
pub fn alternate_caption(&mut self, caption: impl TextStrLike) -> &mut Self {
482487
self.pair(Name(b"AC"), caption);
483488
self
484489
}

src/attributes.rs

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
use crate::types::{ArtifactSubtype, ArtifactType};
2-
31
use super::*;
2+
use crate::object::TextStrLike;
3+
use crate::types::{ArtifactSubtype, ArtifactType};
44

55
/// Writer for an _attribute dictionary_. PDF 1.4+
66
///
@@ -90,7 +90,7 @@ writer!(UserProperty: |obj| Self { dict: obj.dict() });
9090

9191
impl UserProperty<'_> {
9292
/// Write the `/N` attribute to set the name of the property.
93-
pub fn name(&mut self, name: TextStr) -> &mut Self {
93+
pub fn name(&mut self, name: impl TextStrLike) -> &mut Self {
9494
self.dict.pair(Name(b"N"), name);
9595
self
9696
}
@@ -101,12 +101,13 @@ impl UserProperty<'_> {
101101
}
102102

103103
/// Write the `/F` attribute to set the format of the property.
104-
pub fn format(&mut self, format: TextStr) -> &mut Self {
104+
pub fn format(&mut self, format: impl TextStrLike) -> &mut Self {
105105
self.dict.pair(Name(b"F"), format);
106106
self
107107
}
108108

109-
/// Write the `/H` attribute to determine whether this property is hidden.
109+
/// Write the `/H` attribute to determine whether this property is hidden in
110+
/// the user interface.
110111
pub fn hidden(&mut self, hide: bool) -> &mut Self {
111112
self.dict.pair(Name(b"H"), hide);
112113
self
@@ -930,7 +931,7 @@ impl<'a> FieldAttributes<'a> {
930931
}
931932

932933
/// Write the `/Desc` attribute to set the description of the form control.
933-
pub fn description(&mut self, desc: TextStr) -> &mut Self {
934+
pub fn description(&mut self, desc: impl TextStrLike) -> &mut Self {
934935
self.dict.pair(Name(b"Desc"), desc);
935936
self
936937
}
@@ -1031,14 +1032,14 @@ impl<'a> TableAttributes<'a> {
10311032

10321033
/// Write the `/Summary` attribute to set the summary of the table. PDF
10331034
/// 1.7+.
1034-
pub fn summary(&mut self, summary: TextStr) -> &mut Self {
1035+
pub fn summary(&mut self, summary: impl TextStrLike) -> &mut Self {
10351036
self.dict.pair(Name(b"Summary"), summary);
10361037
self
10371038
}
10381039

10391040
/// Write the `/Short` attribute to set a short form of the table header's
10401041
/// content. PDF 2.0+.
1041-
pub fn short(&mut self, short: TextStr) -> &mut Self {
1042+
pub fn short(&mut self, short: impl TextStrLike) -> &mut Self {
10421043
self.dict.pair(Name(b"Short"), short);
10431044
self
10441045
}

src/color.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use super::*;
2+
use crate::object::TextStrLike;
23

34
/// CIE XYZ coordinates of the D65 noon daylight white.
45
const CIE_D65: [f32; 3] = [0.9505, 1.0, 1.0888];
@@ -1261,15 +1262,18 @@ impl OutputIntent<'_> {
12611262
/// Write the `/OutputCondition` attribute.
12621263
///
12631264
/// A human-readable description of the output condition.
1264-
pub fn output_condition(&mut self, condition: TextStr) -> &mut Self {
1265+
pub fn output_condition(&mut self, condition: impl TextStrLike) -> &mut Self {
12651266
self.dict.pair(Name(b"OutputCondition"), condition);
12661267
self
12671268
}
12681269

12691270
/// Write the `/OutputConditionIdentifier` attribute.
12701271
///
12711272
/// A well-known identifier for the output condition.
1272-
pub fn output_condition_identifier(&mut self, identifier: TextStr) -> &mut Self {
1273+
pub fn output_condition_identifier(
1274+
&mut self,
1275+
identifier: impl TextStrLike,
1276+
) -> &mut Self {
12731277
self.dict.pair(Name(b"OutputConditionIdentifier"), identifier);
12741278
self
12751279
}
@@ -1285,7 +1289,7 @@ impl OutputIntent<'_> {
12851289
/// Write the `/Info` attribute.
12861290
///
12871291
/// A human-readable string with additional info about the intended output device.
1288-
pub fn info(&mut self, info: TextStr) -> &mut Self {
1292+
pub fn info(&mut self, info: impl TextStrLike) -> &mut Self {
12891293
self.dict.pair(Name(b"Info"), info);
12901294
self
12911295
}

src/content.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use super::*;
2+
use crate::object::TextStrLike;
23

34
/// A builder for a content stream.
45
pub struct Content {
@@ -995,7 +996,7 @@ impl<'a> PropertyList<'a> {
995996
/// Write the `/ActualText` attribute to indicate the text replacement of
996997
/// this marked content sequence. PDF 1.5+.
997998
#[inline]
998-
pub fn actual_text(&mut self, text: TextStr) -> &mut Self {
999+
pub fn actual_text(&mut self, text: impl TextStrLike) -> &mut Self {
9991000
self.pair(Name(b"ActualText"), text);
10001001
self
10011002
}

src/files.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use super::*;
2+
use crate::object::TextStrLike;
23

34
/// Writer for a _file specification dictionary_.
45
///
@@ -44,7 +45,7 @@ impl FileSpec<'_> {
4445
}
4546

4647
/// Write the `/Desc` attribute to set a file description. PDF 1.6+.
47-
pub fn description(&mut self, desc: TextStr) -> &mut Self {
48+
pub fn description(&mut self, desc: impl TextStrLike) -> &mut Self {
4849
self.pair(Name(b"Desc"), desc);
4950
self
5051
}

src/forms.rs

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use super::*;
2+
use crate::object::TextStrLike;
23
use crate::types::AnnotationType;
34

45
/// Writer for an _interactive forms dictionary_. PDF 1.2+.
@@ -131,7 +132,7 @@ impl<'a> Field<'a> {
131132
/// messages). This text is also useful when extracting the document's
132133
/// contents in support of accessibility to users with disabilities or for
133134
/// other purposes. PDF 1.3+.
134-
pub fn alternate_name(&mut self, alternate: TextStr) -> &mut Self {
135+
pub fn alternate_name(&mut self, alternate: impl TextStrLike) -> &mut Self {
135136
self.pair(Name(b"TU"), alternate);
136137
self
137138
}
@@ -291,14 +292,14 @@ impl Field<'_> {
291292

292293
/// Write the `/V` attribute to set the value of this text field.
293294
/// Only permissible on text fields.
294-
pub fn text_value(&mut self, value: TextStr) -> &mut Self {
295+
pub fn text_value(&mut self, value: impl TextStrLike) -> &mut Self {
295296
self.pair(Name(b"V"), value);
296297
self
297298
}
298299

299300
/// Start writing the `/DV` attribute to set the default value of this text
300301
/// field. Only permissible on text fields.
301-
pub fn text_default_value(&mut self, value: TextStr) -> &mut Self {
302+
pub fn text_default_value(&mut self, value: impl TextStrLike) -> &mut Self {
302303
self.pair(Name(b"DV"), value);
303304
self
304305
}
@@ -332,7 +333,11 @@ impl Field<'_> {
332333

333334
/// Write the `/RV` attribute to set the value of this variable text field.
334335
/// Only permissible on fields containing variable text. PDF 1.5+.
335-
pub fn vartext_rich_value(&mut self, value: TextStr) -> &mut Self {
336+
///
337+
/// Note that this is a Rich Text string, so you can use some basic XHTML
338+
/// and XFA attributes. That also means that untrusted input must be
339+
/// properly escaped.
340+
pub fn vartext_rich_value(&mut self, value: impl TextStrLike) -> &mut Self {
336341
self.pair(Name(b"RV"), value);
337342
self
338343
}
@@ -443,30 +448,38 @@ writer!(ChoiceOptions: |obj| Self { array: obj.array() });
443448

444449
impl ChoiceOptions<'_> {
445450
/// Add an option with the given value.
446-
pub fn option(&mut self, value: TextStr) -> &mut Self {
451+
pub fn option(&mut self, value: impl TextStrLike) -> &mut Self {
447452
self.array.item(value);
448453
self
449454
}
450455

451456
/// Add options with the given values.
452-
pub fn options<'b>(
457+
pub fn options(
453458
&mut self,
454-
values: impl IntoIterator<Item = TextStr<'b>>,
459+
values: impl IntoIterator<Item = impl TextStrLike>,
455460
) -> &mut Self {
456461
self.array.items(values);
457462
self
458463
}
459464

460465
/// Add an option with the given value and export value.
461-
pub fn export(&mut self, value: TextStr, export_value: TextStr) -> &mut Self {
462-
self.array.push().array().items([export_value, value]);
466+
pub fn export(
467+
&mut self,
468+
value: impl TextStrLike,
469+
export_value: TextStr,
470+
) -> &mut Self {
471+
{
472+
let mut array = self.array.push().array();
473+
array.item(export_value);
474+
array.item(value);
475+
}
463476
self
464477
}
465478

466479
/// Add options with the given pairs of value and export value.
467480
pub fn exports<'b>(
468481
&mut self,
469-
values: impl IntoIterator<Item = (TextStr<'b>, TextStr<'b>)>,
482+
values: impl IntoIterator<Item = (impl TextStrLike, TextStr<'b>)>,
470483
) -> &mut Self {
471484
for (value, export) in values {
472485
self.export(value, export);

src/lib.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,9 @@ pub use self::buf::{Buf, Limits};
194194
pub use self::chunk::Chunk;
195195
pub use self::content::Content;
196196
pub use self::object::{
197-
Array, Date, Dict, Filter, Finish, Name, Null, Obj, Primitive, Rect, Ref, Rewrite,
198-
Str, Stream, TextStr, TypedArray, TypedDict, Writer,
197+
Array, Date, Dict, Filter, Finish, LanguageIdentifier, Name, Null, Obj, Primitive,
198+
Rect, Ref, Rewrite, Str, Stream, TextStr, TextStrLike, TextStrWithLang, TypedArray,
199+
TypedDict, Writer,
199200
};
200201

201202
use std::fmt::{self, Debug, Formatter};

0 commit comments

Comments
 (0)