diff --git a/lib/Markdown/Perl/BlockParser.pm b/lib/Markdown/Perl/BlockParser.pm index a0961d8..071adad 100644 --- a/lib/Markdown/Perl/BlockParser.pm +++ b/lib/Markdown/Perl/BlockParser.pm @@ -41,7 +41,8 @@ sub new { last_pos => 0, line_ending => '', continuation_re => qr//, - linkrefs => {} + linkrefs => {}, + matched_prefix_size => 0, }, $class; lock_keys_plus(%{$this}, qw(forced_line)); @@ -267,9 +268,12 @@ sub _test_lazy_continuation { sub _count_matching_blocks { my ($this, $lr) = @_; # $lr is a scalar *reference* to the current line text. + $this->{matched_prefix_size} += 0; for my $i (0 .. $#{$this->{blocks_stack}}) { local *::_ = $lr; - return $i unless $this->{blocks_stack}[$i]{cond}(); + my $r = $this->{blocks_stack}[$i]{cond}(); + $this->{matched_prefix_size} += $r if $r; + return $i unless $r; } return @{$this->{blocks_stack}}; } @@ -426,7 +430,8 @@ sub _do_indented_code_block { if (@{$this->{paragraph}} || $l !~ m/${indented_code_re}/) { return; } - my @code_lines = remove_prefix_spaces(4, $l.$this->line_ending(), $this->get_preserve_tabs); + my $preserve_tabs = !$this->get_code_blocks_convert_tabs_to_spaces; + my @code_lines = scalar(remove_prefix_spaces(4, $l.$this->line_ending(), $preserve_tabs)); my $count = 1; # The number of lines we have read my $valid_count = 1; # The number of lines we know are in the code block. my $valid_pos = $this->get_pos(); @@ -437,10 +442,10 @@ sub _do_indented_code_block { $valid_pos = $this->get_pos(); $valid_count = $count; push @code_lines, - remove_prefix_spaces(4, $nl.$this->line_ending(), $this->get_preserve_tabs); + scalar(remove_prefix_spaces(4, $nl.$this->line_ending(), $preserve_tabs)); } elsif ($nl eq '') { push @code_lines, - remove_prefix_spaces(4, $nl.$this->line_ending(), $this->get_preserve_tabs); + scalar(remove_prefix_spaces(4, $nl.$this->line_ending(), $preserve_tabs)); } else { last; } @@ -482,7 +487,7 @@ sub _do_fenced_code_block { last; } else { # We’re adding one line to the fenced code block - push @code_lines, remove_prefix_spaces($indent, $nl.$this->line_ending()); + push @code_lines, scalar(remove_prefix_spaces($indent, $nl.$this->line_ending())); } } else { # We’re out of our enclosing block and we haven’t seen the end of the @@ -544,11 +549,7 @@ sub _do_html_block { while (defined (my $nl = $this->next_line())) { if ($this->_all_blocks_match(\$nl)) { if ($nl !~ m/${html_end_condition}/) { - if ($this->get_preserve_tabs) { - push @html_lines, $nl.$this->line_ending(); - } else { - push @html_lines, remove_prefix_spaces(0, $nl.$this->line_ending(), 0); - } + push @html_lines, $nl.$this->line_ending(); } elsif ($nl eq '') { # This can only happen for rules 6 and 7 where the end condition # line is not part of the HTML block. @@ -582,14 +583,16 @@ sub _do_block_quotes { # the case of a line like '>\t\tfoo' where we need to retain the 6 # spaces of indentation, to produce a code block starting with two # spaces. - $_ = remove_prefix_spaces(length($1) + 1, $_); - return 1; + my $m; + ($_, $m) = remove_prefix_spaces(length($1) + 1, $_); + # Returns the matched horizontal size. + return $m; } return $this->_test_lazy_continuation($_); }; { local *::_ = \$l; - $cond->(); + $this->{matched_prefix_size} += $cond->(); } $this->{skip_next_block_matching} = 1; $this->_enter_child_block({type => 'quotes'}, $cond, qr/ {0,3}(?:> ?)?/, $l); @@ -609,7 +612,7 @@ sub _do_list_item { # compute the tab stops. This is better than nothing but won’t work inside # other container blocks. In all cases, using tabs instead of space should not # be encouraged. - my $text_indent = indent_size($text, $indent_marker); + my $text_indent = indent_size($text, $indent_marker + $this->{matched_prefix_size}); # When interrupting a paragraph, the rules are stricter. my $mode = $this->get_lists_can_interrupt_paragraph; if (@{$this->{paragraph}}) { @@ -637,7 +640,8 @@ sub _do_list_item { } if (indent_size($_) >= $indent) { $_ = remove_prefix_spaces($indent, $_); - return 1; + # Returns the matched horizontal size. + return $indent; } # TODO: we probably don’t need to test the list_item_re case here, just # the lazy continuation and the emptiness is enough. @@ -650,6 +654,7 @@ sub _do_list_item { # processing the condition and to correctly handle the case where the # list marker was followed by tabs. $forced_next_line = remove_prefix_spaces($indent, (' ' x $indent_marker).$text); + $this->{matched_prefix_size} = $indent; $this->{skip_next_block_matching} = 1; } # Note that we are handling the creation of the lists themselves in the diff --git a/lib/Markdown/Perl/Inlines.pm b/lib/Markdown/Perl/Inlines.pm index 36a34d5..ed6705c 100644 --- a/lib/Markdown/Perl/Inlines.pm +++ b/lib/Markdown/Perl/Inlines.pm @@ -516,10 +516,6 @@ sub process_styles { my $delim = delim_characters($that); my %max_delim_run_length = %{$that->get_inline_delimiters_max_run_length}; while (my @match = $tree->find_in_text(qr/([${delim}])\1*/, $current_child, 0)) { - # TODO: add an option to prevent some delimiters to be part of long run - # (e.g. max_delimiter_run_length), typically for ~ which can only be in run - # of lengths 2 according to GitHub spec (to not collide with code block - # probably). # We extract the delimiter run into a new node, that will be at $index. my ($delim_tree, $index) = $tree->extract($match[0], $match[1], $match[0], $match[2]); # We use the type literal so that if we do nothing with the delimiter it diff --git a/lib/Markdown/Perl/Options.pm b/lib/Markdown/Perl/Options.pm index 4b04a29..2614122 100644 --- a/lib/Markdown/Perl/Options.pm +++ b/lib/Markdown/Perl/Options.pm @@ -422,6 +422,18 @@ _make_option(code_blocks_info => 'language', _enum(qw(ignored language))); =pod +=head3 B I<(boolean, default: false)> + +By default, tabs are preserved inside codeblocks. With this option, all tabs (at +the beginning of the lines or inside) are turned into spaces, aligned with the +tab stops (currently always a multiple of 4). + +=cut + +_make_option(code_blocks_convert_tabs_to_spaces => 0, _boolean, (markdown => 1)); + +=pod + =head3 B I<(boolean, default: false)> Whether a table will have a cell in HTML for a missing cell in the markdown @@ -633,18 +645,6 @@ _make_option(force_final_new_line => 0, _boolean, (markdown => 1)); =pod -=head3 B I<(boolean, default: true)> - -When removing prefix spaces in front of some constructs (typically indented code -blocks), pmarkdown will try to preserve tabs when they are used instead of -space. If this option is set to false, prefix tabs will be turned into spaces. - -=cut - -_make_option(preserve_tabs => 1, _boolean, (markdown => 0)); - -=pod - =head3 B I<(boolean, default: true)> By default, pmarkdown will try to preserve lines that contains only whitespace diff --git a/lib/Markdown/Perl/Util.pm b/lib/Markdown/Perl/Util.pm index 5a1f24b..3f09d95 100644 --- a/lib/Markdown/Perl/Util.pm +++ b/lib/Markdown/Perl/Util.pm @@ -5,9 +5,11 @@ use warnings; use utf8; use feature ':5.24'; +use Carp; +use English; use Exporter 'import'; use List::MoreUtils 'first_index'; -use List::Util 'max'; +use List::Util 'max', 'min'; use Unicode::CaseFold 'fc'; our $VERSION = 0.01; @@ -30,35 +32,48 @@ sub split_while : prototype(&@) { ## no critic (RequireArgUnpacking) # matched to a tab-stop of size 4. # Removes all the spaces if there is less than that. # If needed, tabs are converted into 4 spaces. +# In list context, also returns how many spaces were actually matched. sub remove_prefix_spaces { my ($n, $text, $preserve_tabs) = @_; $preserve_tabs //= 1; # when not specified we do preserve tabs if (!$preserve_tabs) { my $s = indent_size($text); # this sets pos($text); - return (' ' x max(0, $s - $n)).(substr $text, pos($text)); + my $ret = (' ' x max(0, $s - $n)).(substr $text, pos($text)); + return $ret unless wantarray; + return ($ret, min($s, $n)); } my $t = int($n / 4); my $s = $n % 4; + my $m = 0; # How many spaces we have matched. for my $i (1 .. $t) { if ($text =~ m/^( {0,3}\t| {4})/) { # We remove one full tab-stop from the string. substr $text, 0, length($1), ''; + $m += 4; } else { # We didn’t have a full tab-stop, so we remove as many spaces as we had. - $text =~ m/^( {0,3})/; - return substr $text, length($1); ## no critic (ProhibitCaptureWithoutTest) + $text =~ m/^( {0,3})/ or confess 'Unexpected match failure'; + $m += $LAST_MATCH_END[0] - $LAST_MATCH_START[0]; + return substr $text, length($1) unless wantarray; + return ((substr $text, length($1)), $m); } } - return $text if $s == 0; - $text =~ m/^(?

\ {0,3}\t|\ {4})*?(?\ {0,3}\t|\ {4})?(?\ {0,3})(?[^ \t].*|$)/xs; ## no critic (ProhibitComplexRegexes) - my $ns = length $+{s}; - if ($ns >= $s) { - return ($+{p} // '').($+{l} // '').(' ' x ($ns - $s)).$+{e}; - } elsif (length($+{l})) { - return ($+{p} // '').(' ' x (4 + $ns - $s)).$+{e}; - } else { - return $+{e}; + if ($s != 0) { + $text =~ m/^(?

\ {0,3}\t|\ {4})*?(?\ {0,3}\t|\ {4})?(?\ {0,3})(?[^ \t].*|$)/xs; ## no critic (ProhibitComplexRegexes) + my $ns = length $+{s}; + if ($ns >= $s) { + $text = ($+{p} // '').($+{l} // '').(' ' x ($ns - $s)).$+{e}; + $m += $s; + } elsif (length($+{l})) { + $text = ($+{p} // '').(' ' x (4 + $ns - $s)).$+{e}; + $m += $s; + } else { + $text = $+{e}; + $m += $ns; + } } + return $text unless wantarray; + return ($text, $m); } # Return the indentation of the given text diff --git a/t/303-lists.t b/t/303-lists.t index 512a56e..8f55ad1 100644 --- a/t/303-lists.t +++ b/t/303-lists.t @@ -11,5 +11,6 @@ sub run { is(run("* a\n* b\n* c\n\n\nfoo"), "

    \n
  • a
  • \n
  • b
  • \n
  • c
  • \n
\n

foo

\n", 'list is tight'); is(run("1.\tfoo\n\n\tbar"), "
    \n
  1. foo

    \n

    bar

    \n
  2. \n
\n", 'indent_with_tabs_after_marker'); +is(run(">1.\tfoo\n>\n> bar"), "
\n
    \n
  1. foo

    \n

    bar

    \n
  2. \n
\n
\n", 'indent_with_tabs_after_marker_inside_block'); done_testing; diff --git a/t/902-markdown-test-suite.t b/t/902-markdown-test-suite.t index cb52912..3c64635 100644 --- a/t/902-markdown-test-suite.t +++ b/t/902-markdown-test-suite.t @@ -10,7 +10,17 @@ use MmdTest; use Test2::V0; # TODO: remove these todos. -my %opt = (todo => [16, 18, 21, 22]); +my %opt = ( + todo => [16, 18, 22], + # These are bugs in the Markdown "spec", not in our implementation. All of + # these have been tested to be buggy in the real Markdown.pl implementation. + bugs => [ + # The original implementation will emit tag for ***foo***, + # however this does not extrapolate well to other cases. In particular: + # ***foo** bar* is rendered as the buggy foo bar + 21, + ], +); while ($_ = shift) { $opt{test_num} = shift @ARGV if /^-n$/; diff --git a/t/lib/MmdTest.pm b/t/lib/MmdTest.pm index 293bbfb..b0fa2ad 100644 --- a/t/lib/MmdTest.pm +++ b/t/lib/MmdTest.pm @@ -42,6 +42,7 @@ sub test_suite { skip_all('MMD-Test-Suite must be checked out.') unless -d $test_dir; my $i = $opt{start_num} // 0; my %todo = map { $_ => 1 } @{$opt{todo} // []}; + my %bugs = map { $_ => 1 } @{$opt{bugs} // []}; my $ext = $opt{ext} // 'html'; for my $md_file (glob "${test_dir}/*.text") { $i++; @@ -52,6 +53,8 @@ sub test_suite { skip "Missing html file '${html_file}'" unless -f $html_file; if ($todo{$i}) { todo 'Not yet supported' => $test; + } elsif ($bugs{$i}) { + todo 'The spec is buggy' => $test; } else { $test->(); }