Skip to content

Commit

Permalink
Better handling of internal tabs in list items.
Browse files Browse the repository at this point in the history
  • Loading branch information
mkende committed Apr 6, 2024
1 parent 2bac801 commit 52cf880
Show file tree
Hide file tree
Showing 7 changed files with 76 additions and 46 deletions.
37 changes: 21 additions & 16 deletions lib/Markdown/Perl/BlockParser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ sub new {
last_pos => 0,
line_ending => '',
continuation_re => qr//,
linkrefs => {}
linkrefs => {},
matched_prefix_size => 0,
}, $class;
lock_keys_plus(%{$this}, qw(forced_line));

Expand Down Expand Up @@ -267,9 +268,12 @@ sub _test_lazy_continuation {

sub _count_matching_blocks {
my ($this, $lr) = @_; # $lr is a scalar *reference* to the current line text.
$this->{matched_prefix_size} += 0;
for my $i (0 .. $#{$this->{blocks_stack}}) {
local *::_ = $lr;
return $i unless $this->{blocks_stack}[$i]{cond}();
my $r = $this->{blocks_stack}[$i]{cond}();
$this->{matched_prefix_size} += $r if $r;
return $i unless $r;
}
return @{$this->{blocks_stack}};
}
Expand Down Expand Up @@ -426,7 +430,8 @@ sub _do_indented_code_block {
if (@{$this->{paragraph}} || $l !~ m/${indented_code_re}/) {
return;
}
my @code_lines = remove_prefix_spaces(4, $l.$this->line_ending(), $this->get_preserve_tabs);
my $preserve_tabs = !$this->get_code_blocks_convert_tabs_to_spaces;
my @code_lines = scalar(remove_prefix_spaces(4, $l.$this->line_ending(), $preserve_tabs));
my $count = 1; # The number of lines we have read
my $valid_count = 1; # The number of lines we know are in the code block.
my $valid_pos = $this->get_pos();
Expand All @@ -437,10 +442,10 @@ sub _do_indented_code_block {
$valid_pos = $this->get_pos();
$valid_count = $count;
push @code_lines,
remove_prefix_spaces(4, $nl.$this->line_ending(), $this->get_preserve_tabs);
scalar(remove_prefix_spaces(4, $nl.$this->line_ending(), $preserve_tabs));
} elsif ($nl eq '') {
push @code_lines,
remove_prefix_spaces(4, $nl.$this->line_ending(), $this->get_preserve_tabs);
scalar(remove_prefix_spaces(4, $nl.$this->line_ending(), $preserve_tabs));
} else {
last;
}
Expand Down Expand Up @@ -482,7 +487,7 @@ sub _do_fenced_code_block {
last;
} else {
# We’re adding one line to the fenced code block
push @code_lines, remove_prefix_spaces($indent, $nl.$this->line_ending());
push @code_lines, scalar(remove_prefix_spaces($indent, $nl.$this->line_ending()));
}
} else {
# We’re out of our enclosing block and we haven’t seen the end of the
Expand Down Expand Up @@ -544,11 +549,7 @@ sub _do_html_block {
while (defined (my $nl = $this->next_line())) {
if ($this->_all_blocks_match(\$nl)) {
if ($nl !~ m/${html_end_condition}/) {
if ($this->get_preserve_tabs) {
push @html_lines, $nl.$this->line_ending();
} else {
push @html_lines, remove_prefix_spaces(0, $nl.$this->line_ending(), 0);
}
push @html_lines, $nl.$this->line_ending();
} elsif ($nl eq '') {
# This can only happen for rules 6 and 7 where the end condition
# line is not part of the HTML block.
Expand Down Expand Up @@ -582,14 +583,16 @@ sub _do_block_quotes {
# the case of a line like '>\t\tfoo' where we need to retain the 6
# spaces of indentation, to produce a code block starting with two
# spaces.
$_ = remove_prefix_spaces(length($1) + 1, $_);
return 1;
my $m;
($_, $m) = remove_prefix_spaces(length($1) + 1, $_);
# Returns the matched horizontal size.
return $m;
}
return $this->_test_lazy_continuation($_);
};
{
local *::_ = \$l;
$cond->();
$this->{matched_prefix_size} += $cond->();
}
$this->{skip_next_block_matching} = 1;
$this->_enter_child_block({type => 'quotes'}, $cond, qr/ {0,3}(?:> ?)?/, $l);
Expand All @@ -609,7 +612,7 @@ sub _do_list_item {
# compute the tab stops. This is better than nothing but won’t work inside
# other container blocks. In all cases, using tabs instead of space should not
# be encouraged.
my $text_indent = indent_size($text, $indent_marker);
my $text_indent = indent_size($text, $indent_marker + $this->{matched_prefix_size});
# When interrupting a paragraph, the rules are stricter.
my $mode = $this->get_lists_can_interrupt_paragraph;
if (@{$this->{paragraph}}) {
Expand Down Expand Up @@ -637,7 +640,8 @@ sub _do_list_item {
}
if (indent_size($_) >= $indent) {
$_ = remove_prefix_spaces($indent, $_);
return 1;
# Returns the matched horizontal size.
return $indent;
}
# TODO: we probably don’t need to test the list_item_re case here, just
# the lazy continuation and the emptiness is enough.
Expand All @@ -650,6 +654,7 @@ sub _do_list_item {
# processing the condition and to correctly handle the case where the
# list marker was followed by tabs.
$forced_next_line = remove_prefix_spaces($indent, (' ' x $indent_marker).$text);
$this->{matched_prefix_size} = $indent;
$this->{skip_next_block_matching} = 1;
}
# Note that we are handling the creation of the lists themselves in the
Expand Down
4 changes: 0 additions & 4 deletions lib/Markdown/Perl/Inlines.pm
Original file line number Diff line number Diff line change
Expand Up @@ -516,10 +516,6 @@ sub process_styles {
my $delim = delim_characters($that);
my %max_delim_run_length = %{$that->get_inline_delimiters_max_run_length};
while (my @match = $tree->find_in_text(qr/([${delim}])\1*/, $current_child, 0)) {
# TODO: add an option to prevent some delimiters to be part of long run
# (e.g. max_delimiter_run_length), typically for ~ which can only be in run
# of lengths 2 according to GitHub spec (to not collide with code block
# probably).
# We extract the delimiter run into a new node, that will be at $index.
my ($delim_tree, $index) = $tree->extract($match[0], $match[1], $match[0], $match[2]);
# We use the type literal so that if we do nothing with the delimiter it
Expand Down
24 changes: 12 additions & 12 deletions lib/Markdown/Perl/Options.pm
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,18 @@ _make_option(code_blocks_info => 'language', _enum(qw(ignored language)));

=pod
=head3 B<code_blocks_convert_tabs_to_spaces> I<(boolean, default: false)>
By default, tabs are preserved inside codeblocks. With this option, all tabs (at
the beginning of the lines or inside) are turned into spaces, aligned with the
tab stops (currently always a multiple of 4).
=cut

_make_option(code_blocks_convert_tabs_to_spaces => 0, _boolean, (markdown => 1));

=pod
=head3 B<table_blocks_have_cells_for_missing_data> I<(boolean, default: false)>
Whether a table will have a cell in HTML for a missing cell in the markdown
Expand Down Expand Up @@ -633,18 +645,6 @@ _make_option(force_final_new_line => 0, _boolean, (markdown => 1));

=pod
=head3 B<preserve_tabs> I<(boolean, default: true)>
When removing prefix spaces in front of some constructs (typically indented code
blocks), pmarkdown will try to preserve tabs when they are used instead of
space. If this option is set to false, prefix tabs will be turned into spaces.
=cut

_make_option(preserve_tabs => 1, _boolean, (markdown => 0));

=pod
=head3 B<preserve_white_lines> I<(boolean, default: true)>
By default, pmarkdown will try to preserve lines that contains only whitespace
Expand Down
41 changes: 28 additions & 13 deletions lib/Markdown/Perl/Util.pm
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ use warnings;
use utf8;
use feature ':5.24';

use Carp;
use English;
use Exporter 'import';
use List::MoreUtils 'first_index';
use List::Util 'max';
use List::Util 'max', 'min';
use Unicode::CaseFold 'fc';

our $VERSION = 0.01;
Expand All @@ -30,35 +32,48 @@ sub split_while : prototype(&@) { ## no critic (RequireArgUnpacking)
# matched to a tab-stop of size 4.
# Removes all the spaces if there is less than that.
# If needed, tabs are converted into 4 spaces.
# In list context, also returns how many spaces were actually matched.
sub remove_prefix_spaces {
my ($n, $text, $preserve_tabs) = @_;
$preserve_tabs //= 1; # when not specified we do preserve tabs
if (!$preserve_tabs) {
my $s = indent_size($text); # this sets pos($text);
return (' ' x max(0, $s - $n)).(substr $text, pos($text));
my $ret = (' ' x max(0, $s - $n)).(substr $text, pos($text));
return $ret unless wantarray;
return ($ret, min($s, $n));
}
my $t = int($n / 4);
my $s = $n % 4;
my $m = 0; # How many spaces we have matched.
for my $i (1 .. $t) {
if ($text =~ m/^( {0,3}\t| {4})/) {
# We remove one full tab-stop from the string.
substr $text, 0, length($1), '';
$m += 4;
} else {
# We didn’t have a full tab-stop, so we remove as many spaces as we had.
$text =~ m/^( {0,3})/;
return substr $text, length($1); ## no critic (ProhibitCaptureWithoutTest)
$text =~ m/^( {0,3})/ or confess 'Unexpected match failure';
$m += $LAST_MATCH_END[0] - $LAST_MATCH_START[0];
return substr $text, length($1) unless wantarray;
return ((substr $text, length($1)), $m);
}
}
return $text if $s == 0;
$text =~ m/^(?<p>\ {0,3}\t|\ {4})*?(?<l>\ {0,3}\t|\ {4})?(?<s>\ {0,3})(?<e>[^ \t].*|$)/xs; ## no critic (ProhibitComplexRegexes)
my $ns = length $+{s};
if ($ns >= $s) {
return ($+{p} // '').($+{l} // '').(' ' x ($ns - $s)).$+{e};
} elsif (length($+{l})) {
return ($+{p} // '').(' ' x (4 + $ns - $s)).$+{e};
} else {
return $+{e};
if ($s != 0) {
$text =~ m/^(?<p>\ {0,3}\t|\ {4})*?(?<l>\ {0,3}\t|\ {4})?(?<s>\ {0,3})(?<e>[^ \t].*|$)/xs; ## no critic (ProhibitComplexRegexes)
my $ns = length $+{s};
if ($ns >= $s) {
$text = ($+{p} // '').($+{l} // '').(' ' x ($ns - $s)).$+{e};
$m += $s;
} elsif (length($+{l})) {
$text = ($+{p} // '').(' ' x (4 + $ns - $s)).$+{e};
$m += $s;
} else {
$text = $+{e};
$m += $ns;
}
}
return $text unless wantarray;
return ($text, $m);
}

# Return the indentation of the given text
Expand Down
1 change: 1 addition & 0 deletions t/303-lists.t
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@ sub run {

is(run("* a\n* b\n* c\n\n\nfoo"), "<ul>\n<li>a</li>\n<li>b</li>\n<li>c</li>\n</ul>\n<p>foo</p>\n", 'list is tight');
is(run("1.\tfoo\n\n\tbar"), "<ol>\n<li><p>foo</p>\n<p>bar</p>\n</li>\n</ol>\n", 'indent_with_tabs_after_marker');
is(run(">1.\tfoo\n>\n> bar"), "<blockquote>\n<ol>\n<li><p>foo</p>\n<p>bar</p>\n</li>\n</ol>\n</blockquote>\n", 'indent_with_tabs_after_marker_inside_block');

done_testing;
12 changes: 11 additions & 1 deletion t/902-markdown-test-suite.t
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,17 @@ use MmdTest;
use Test2::V0;

# TODO: remove these todos.
my %opt = (todo => [16, 18, 21, 22]);
my %opt = (
todo => [16, 18, 22],
# These are bugs in the Markdown "spec", not in our implementation. All of
# these have been tested to be buggy in the real Markdown.pl implementation.
bugs => [
# The original implementation will emit <strong><em> tag for ***foo***,
# however this does not extrapolate well to other cases. In particular:
# ***foo** bar* is rendered as the buggy <strong><em>foo</strong> bar</em>
21,
],
);

while ($_ = shift) {
$opt{test_num} = shift @ARGV if /^-n$/;
Expand Down
3 changes: 3 additions & 0 deletions t/lib/MmdTest.pm
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ sub test_suite {
skip_all('MMD-Test-Suite must be checked out.') unless -d $test_dir;
my $i = $opt{start_num} // 0;
my %todo = map { $_ => 1 } @{$opt{todo} // []};
my %bugs = map { $_ => 1 } @{$opt{bugs} // []};
my $ext = $opt{ext} // 'html';
for my $md_file (glob "${test_dir}/*.text") {
$i++;
Expand All @@ -52,6 +53,8 @@ sub test_suite {
skip "Missing html file '${html_file}'" unless -f $html_file;
if ($todo{$i}) {
todo 'Not yet supported' => $test;
} elsif ($bugs{$i}) {
todo 'The spec is buggy' => $test;
} else {
$test->();
}
Expand Down

0 comments on commit 52cf880

Please sign in to comment.