diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 813b3b7..a19c0c7 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -18,7 +18,7 @@ jobs:
#- windows-latest
raku-version:
- 'latest'
- - '2021.12'
+ - '2022.07'
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
diff --git a/README.md b/README.md
index 7c2f3c1..d260958 100644
--- a/README.md
+++ b/README.md
@@ -188,12 +188,10 @@ my PDF::XObject::Image $img .= open: "t/images/lightbulb.gif";
my $figure = $doc.Figure: $gfx, $img, :position[50, 70], :Alt("A light-bulb");
```
-An [PDF::XObject::Form](https://pdf-raku.github.io/PDF-Class-raku/PDF/XObject/Form) may be associated with a marked content
-sub-tree. This is achieved by marking the form against a document fragment, then calling `do` to repeatably
-render the form, while inserting the fragment, as demonstrated below:
+A [PDF::XObject::Form](https://pdf-raku.github.io/PDF-Class-raku/PDF/XObject/Form) may be associated with a document fragment. The
+form can then be rendered, and the fragment inserted into the document, by repeatedly calling `do` on the fragment, as demonstrated below:
```raku
-
use PDF::Tags;
use PDF::Tags::Elem;
use PDF::Class;
@@ -222,20 +220,11 @@ $page.graphics: -> $gfx {
};
}
- # multiple rendering of the form, and insertion of its structure tree
+ # multiple rendering of the form, and insertion into the structure tree
$doc.do($gfx, $form-frag, :position[150, 70]);
$doc.do($gfx, $form-frag, :position[150, 20]);
}
-
```
-
-To insert an XObject Form that has marked content:
-
-1. Create a new fragment element.
-2. Create the Form XObject, marking content against the fragment
-3. The `do` method can then be used to both render and insert
-a copy of the fragment into the structure tree.
-
### Links
Links are usually contained in a block element, such as a `Paragraph`. If
diff --git a/docs/PDF/Tags/Mark.md b/docs/PDF/Tags/Mark.md
index 8b363e6..e2b0ce2 100644
--- a/docs/PDF/Tags/Mark.md
+++ b/docs/PDF/Tags/Mark.md
@@ -92,7 +92,7 @@ The Marked Content ID within the content stream. These are usually numbered in s
method value() returns PDF::Content::Tag
-The low-level [PDF::Content::Tag](https://pdf-raku.github.io/PDF-Content-raku) object, which contains further details on the tag:
+The low-level [PDF::Content::Tag](https://pdf-raku.github.io/PDF-Content-raku/PDF/Content/Tag) object, which contains further details on the tag:
* `canvas` - The owner of the content stream; a PDF::Page or PDF::XObject::Form object.
diff --git a/docs/PDF/Tags/Node.md b/docs/PDF/Tags/Node.md
index d8320de..cee287a 100644
--- a/docs/PDF/Tags/Node.md
+++ b/docs/PDF/Tags/Node.md
@@ -13,7 +13,7 @@ Methods
### method cos
-Returns the underlying [PDF::Class](https://pdf-raku.github.io/PDF-Class-raku) or [PDF::Content](https://pdf-raku.github.io/PDF-Content-raku) object. The [PDF::Tags::Node](https://pdf-raku.github.io/PDF-Tags-raku/PDF/Tags/Node) subclass and [PDF::COS](https://pdf-raku.github.io/PDF-raku) type are mapped as follows:
+Returns the underlying [PDF::Class](https://pdf-raku.github.io/PDF-Class-raku) or [PDF::Content](https://pdf-raku.github.io/PDF-Content-raku/PDF/Content) object. The [PDF::Tags::Node](https://pdf-raku.github.io/PDF-Tags-raku/PDF/Tags/Node) subclass and [PDF::COS](https://pdf-raku.github.io/PDF-raku) type are mapped as follows:
diff --git a/docs/PDF/Tags/XML-Writer.md b/docs/PDF/Tags/XML-Writer.md
index 26749c2..7135a57 100644
--- a/docs/PDF/Tags/XML-Writer.md
+++ b/docs/PDF/Tags/XML-Writer.md
@@ -12,10 +12,10 @@ Synopsis
--------
use PDF::Class;
- use PDF::Tags;
+ use PDF::Tags::Reader;
use PDF::Tags::XML-Writer;
my PDF::Class $pdf .= open: "t/write-tags.pdf";
- my PDF::Tags $tags .= read: :$pdf;
+ my PDF::Tags::Reader $tags .= read: :$pdf;
my PDF::Tags::XML-Writer $xml-writer .= new: :debug, :root-tag;
# atomic write
say $xml-writer.Str($tags);
diff --git a/docs/index.md b/docs/index.md
index 7c2f3c1..d260958 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -188,12 +188,10 @@ my PDF::XObject::Image $img .= open: "t/images/lightbulb.gif";
my $figure = $doc.Figure: $gfx, $img, :position[50, 70], :Alt("A light-bulb");
```
-An [PDF::XObject::Form](https://pdf-raku.github.io/PDF-Class-raku/PDF/XObject/Form) may be associated with a marked content
-sub-tree. This is achieved by marking the form against a document fragment, then calling `do` to repeatably
-render the form, while inserting the fragment, as demonstrated below:
+A [PDF::XObject::Form](https://pdf-raku.github.io/PDF-Class-raku/PDF/XObject/Form) may be associated with a document fragment. The
+form can then be rendered, and the fragment inserted into the document, by repeatedly calling `do` on the fragment, as demonstrated below:
```raku
-
use PDF::Tags;
use PDF::Tags::Elem;
use PDF::Class;
@@ -222,20 +220,11 @@ $page.graphics: -> $gfx {
};
}
- # multiple rendering of the form, and insertion of its structure tree
+ # multiple rendering of the form, and insertion into the structure tree
$doc.do($gfx, $form-frag, :position[150, 70]);
$doc.do($gfx, $form-frag, :position[150, 20]);
}
-
```
-
-To insert an XObject Form that has marked content:
-
-1. Create a new fragment element.
-2. Create the Form XObject, marking content against the fragment
-3. The `do` method can then be used to both render and insert
-a copy of the fragment into the structure tree.
-
### Links
Links are usually contained in a block element, such as a `Paragraph`. If
diff --git a/lib/PDF/Tags/Mark.rakumod b/lib/PDF/Tags/Mark.rakumod
index aea027b..fdf2953 100644
--- a/lib/PDF/Tags/Mark.rakumod
+++ b/lib/PDF/Tags/Mark.rakumod
@@ -84,13 +84,23 @@ class PDF::Tags::Mark
fail "todo: update marked content attributes";
callsame();
}
+
+ sub sanitize(Str $_) {
+ # actual text sometimes have backspaces, etc?
+ .subst(
+ /<[ \x0..\x8 ]>/,
+ '',
+ :g
+ );
+ }
+
method ActualText {
$.attributes unless $!atts-built;
- $!actual-text //= PDF::COS::TextString.COERCE: $_
+ $!actual-text //= sanitize PDF::COS::TextString.COERCE: $_
with %!attributes;
$!actual-text;
}
- method remove-actual-text {
+ method remove-actual-text is DEPRECATED {
with $.ActualText {
$!actual-text = Nil;
$!value.attributes:delete;
diff --git a/lib/PDF/Tags/Node.rakumod b/lib/PDF/Tags/Node.rakumod
index cdd5133..f448197 100644
--- a/lib/PDF/Tags/Node.rakumod
+++ b/lib/PDF/Tags/Node.rakumod
@@ -76,7 +76,7 @@ class PDF::Tags::Node {
Returns the underlying L or L object. The L subclass and L type are mapped as follows:
=begin table
-PDF::Tags::Node object | PDF::Class object |Base class | Notes
+PDF::Tags::Node object | PDF::Class object |Base class | Notes
=================================================
PDF::Tags | PDF::StructTreeRoot | PDF::Tags::Node::Parent | PDF structure tree root
PDF::Tags::Elem | PDF::StructElem | PDF::Tags::Node::Parent | Intermediate structure element node
diff --git a/lib/PDF/Tags/Node/Parent.rakumod b/lib/PDF/Tags/Node/Parent.rakumod
index 7522023..5608040 100644
--- a/lib/PDF/Tags/Node/Parent.rakumod
+++ b/lib/PDF/Tags/Node/Parent.rakumod
@@ -16,7 +16,7 @@ class PDF::Tags::Node::Parent
has UInt $!elems;
has $.style is rw; # Computed CSS style
- method elems is also {
+ method elems(::?CLASS:D:) is also {
$!elems //= do with $.cos.kids {
when Hash { 1 }
default { .elems }
diff --git a/lib/PDF/Tags/XML-Writer.rakumod b/lib/PDF/Tags/XML-Writer.rakumod
index f37e242..7e34c65 100644
--- a/lib/PDF/Tags/XML-Writer.rakumod
+++ b/lib/PDF/Tags/XML-Writer.rakumod
@@ -49,16 +49,15 @@ method !chunk(Str $s is copy, UInt $depth = 0) {
method !line(|c) { $!feed = True; self!chunk(|c); $!feed = True; }
method !frag(|c) { $*inline ?? self!chunk(|c) !! self!line(|c) }
-sub html-escape(Str $_) {
+sub xml-escape(Str $_) {
.trans:
/\&/ => '&',
/\ => '<',
/\>/ => '>',
-
}
multi sub str-escape(@a) { @a.map(&str-escape).join: ' '; }
multi sub str-escape(Str $_) {
- html-escape($_).trans: /\"/ => '"e;';
+ xml-escape($_).trans: /\"/ => '"e;';
}
multi sub str-escape(Pair $_) { str-escape(.value) }
multi sub str-escape($_) is default { str-escape(.Str) }
@@ -233,7 +232,7 @@ multi method stream-xml(PDF::Tags::Elem $node, UInt :$depth is copy = 0) {
}
}
- given html-escape($_) {
+ given xml-escape($_) {
my $frag = do {
when $omit-tag.so { $_ }
when .so { '<%s%s>%s%s>'.sprintf($name, $att, $_, $name) }
@@ -287,7 +286,7 @@ multi method stream-xml(PDF::Tags::Mark $node, :$depth!) {
multi method stream-xml(PDF::Tags::Text $_, :$depth!) {
if .Str -> $text {
- self!chunk(html-escape($text), $depth);
+ self!chunk(xml-escape($text), $depth);
}
}
@@ -297,7 +296,7 @@ method !marked-content(PDF::Tags::Mark $node, :$depth!) {
when PDF::Tags::Mark {
my $text = self!marked-content($_, :$depth);
}
- when PDF::Tags::Text { html-escape(.Str) }
+ when PDF::Tags::Text { xml-escape(.Str) }
default { die "unhandled tagged content: {.WHAT.raku}"; }
}
@text.join;
@@ -319,10 +318,10 @@ method !marked-content(PDF::Tags::Mark $node, :$depth!) {
=head2 Synopsis
use PDF::Class;
- use PDF::Tags;
+ use PDF::Tags::Reader;
use PDF::Tags::XML-Writer;
my PDF::Class $pdf .= open: "t/write-tags.pdf";
- my PDF::Tags $tags .= read: :$pdf;
+ my PDF::Tags::Reader $tags .= read: :$pdf;
my PDF::Tags::XML-Writer $xml-writer .= new: :debug, :root-tag;
# atomic write
say $xml-writer.Str($tags);