Skip to content

Commit 1d8ee00

Browse files
committed
epub: fix fatal errors while parsing EPUB files
After generating the EPUB file for the Elixir docs with this version, and reviewing the result with `epubcheck`, I got the following summary: ```console $ epubcheck doc/elixir/Elixir.epub --json elixir_docs.json (base) Check finished with errors Messages: 0 fatals / 141 errors / 0 warnings / 0 infos ``` If you compare the previous result with what we had on #1851 ``` Messages: 9 fatals / 425 errors / 0 warnings / 0 infos ``` you can see that now we don't have messages with `fatal` severity and we have reduced considerably the number of errors =) I manually checked the generated EPUB on Apple Books and the previous truncated sections are solved, I don't see the banner _Below is a rendering of the page up to the first error_ and also the links to anchor different anchor seems to work. Fixes: #1851
1 parent 67e03ea commit 1d8ee00

File tree

3 files changed

+49
-2
lines changed

3 files changed

+49
-2
lines changed

lib/ex_doc/formatter/epub.ex

+27-2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,23 @@ defmodule ExDoc.Formatter.EPUB do
5050
Path.relative_to_cwd(epub)
5151
end
5252

53+
@doc """
54+
Helper that replaces anchor names and links that could potentially cause problems on EPUB documents
55+
56+
This helper replaces all the `&` with `&` found in anchors like
57+
`Kernel.xhtml#&&/2` or `<h2 id="&&&/2-examples" class="section-heading">...</h2>`
58+
59+
These anchor names cause a fatal error while EPUB readers parse the files,
60+
resulting in truncated content.
61+
62+
For more details, see: https://github.com/elixir-lang/ex_doc/issues/1851
63+
"""
64+
def fix_anchors(content) do
65+
content
66+
|> String.replace(~r{id="&+/\d+[^"]*}, &String.replace(&1, "&", "&amp;"))
67+
|> String.replace(~r{href="[^#"]*#&+/\d+[^"]*}, &String.replace(&1, "&", "&amp;"))
68+
end
69+
5370
defp normalize_config(config) do
5471
output =
5572
config.output
@@ -63,7 +80,11 @@ defmodule ExDoc.Formatter.EPUB do
6380
for {_title, extras} <- config.extras do
6481
Enum.each(extras, fn %{id: id, title: title, title_content: title_content, content: content} ->
6582
output = "#{config.output}/OEBPS/#{id}.xhtml"
66-
html = Templates.extra_template(config, title, title_content, content)
83+
84+
html =
85+
config
86+
|> Templates.extra_template(title, title_content, content)
87+
|> fix_anchors()
6788

6889
if File.regular?(output) do
6990
ExDoc.Utils.warn("file #{Path.relative_to_cwd(output)} already exists", [])
@@ -157,7 +178,11 @@ defmodule ExDoc.Formatter.EPUB do
157178
end
158179

159180
defp generate_module_page(module_node, config) do
160-
content = Templates.module_page(config, module_node)
181+
content =
182+
config
183+
|> Templates.module_page(module_node)
184+
|> fix_anchors()
185+
161186
File.write("#{config.output}/OEBPS/#{module_node.id}.xhtml", content)
162187
end
163188

test/ex_doc/formatter/epub_test.exs

+18
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,9 @@ defmodule ExDoc.Formatter.EPUBTest do
151151
assert content =~
152152
~r{<a href="TypesAndSpecs.Sub.xhtml"><code(\sclass="inline")?>TypesAndSpecs.Sub</code></a>}
153153

154+
assert content =~
155+
~r{<a href="https://hexdocs.pm/elixir/Kernel.html#&amp;&amp;/2"><code(\sclass="inline")?>&amp;&amp;/2</code></a>}
156+
154157
content = File.read!(tmp_dir <> "/epub/OEBPS/nav.xhtml")
155158
assert content =~ ~r{<li><a href="readme.xhtml">README</a></li>}
156159
end
@@ -248,4 +251,19 @@ defmodule ExDoc.Formatter.EPUBTest do
248251
after
249252
File.rm_rf!("test/tmp/epub_assets")
250253
end
254+
255+
describe "fix_anchors/1" do
256+
test "adapts anchor names to avoid parsing errors from EPUB readers" do
257+
for {source, expected} <- [
258+
{~S|<a href="Kernel.SpecialForms.xhtml#&/1">its documentation</a>|,
259+
~S|<a href="Kernel.SpecialForms.xhtml#&amp;/1">its documentation</a>|},
260+
{~S|<a href="Kernel.xhtml#&&/2"><code class="inline">&amp;&amp;/2</code></a>|,
261+
~S|<a href="Kernel.xhtml#&amp;&amp;/2"><code class="inline">&amp;&amp;/2</code></a>|},
262+
{~S|<h2 id="&&&/2-examples" class="section-heading">title</h2>|,
263+
~S|<h2 id="&amp;&amp;&amp;/2-examples" class="section-heading">title</h2>|}
264+
] do
265+
assert ExDoc.Formatter.EPUB.fix_anchors(source) == expected
266+
end
267+
end
268+
end
251269
end

test/fixtures/README.md

+4
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,7 @@ hello
1515
## more > than
1616

1717
<p><strong>raw content</strong></p>
18+
19+
The following text includes a reference to an anchor that causes problems in EPUB documents.
20+
21+
To remove this anti-pattern, we can replace `&&/2`, `||/2`, and `!/1` by `and/2`, `or/2`, and `not/1` respectively.

0 commit comments

Comments
 (0)