Skip to content

Commit

Permalink
feat: keep the original content spacing after the list marker (#196)
Browse files Browse the repository at this point in the history
A list item such as ` -   list item` consists of margin indentation
 (A), a list marker (B), 1-4 spaces (C) and contents.
The old code recorded the width from A to C as `ListItem.prepend` and
 B as `ListItem.leader`,
but the Markdown renderer always skipped margin indentation and output
exactly just 1 space after every list marker, such as `- list item`.

After this change, the width of A is recorded as `ListItem.indentation`,
so that the Markdown renderer can correctly restore both A and C.
But the formatting features of other tokens skip margin indentation (A),
for consistency of all formatting features,
the Markdown renderer only restores C, such as `-   list item`.
  • Loading branch information
allets authored and pbodnar committed Nov 28, 2023
1 parent 5f4c550 commit ee7ce94
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 19 deletions.
29 changes: 17 additions & 12 deletions mistletoe/block_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,7 @@ def check_interrupts_paragraph(cls, lines):
# and the list must either be unordered or start from 1.
marker_tuple = ListItem.parse_marker(lines.peek())
if (marker_tuple is not None):
_, leader, content = marker_tuple
_, _, leader, content = marker_tuple
if not content.strip() == '':
return not leader[0].isdigit() or leader in ['1.', '1)']
return False
Expand All @@ -499,7 +499,7 @@ def read(cls, lines):
while True:
anchor = lines.get_pos()
output, next_marker = ListItem.read(lines, next_marker)
item_leader = output[2]
item_leader = output[3]
if leader is None:
leader = item_leader
elif not cls.same_marker_type(leader, item_leader):
Expand Down Expand Up @@ -532,16 +532,18 @@ class ListItem(BlockToken):
Attributes:
leader (string): a bullet list marker or an ordered list marker.
indentation (int): spaces before the leader.
prepend (int): the start position of the content, i.e., the indentation required
for continuation lines.
loose (bool): whether the list is loose.
"""
repr_attributes = ("leader", "prepend", "loose")
pattern = re.compile(r' {0,3}(\d{0,9}[.)]|[+\-*])($|\s+)')
repr_attributes = ("leader", "indentation", "prepend", "loose")
pattern = re.compile(r'( {0,3})(\d{0,9}[.)]|[+\-*])($|\s+)')
continuation_pattern = re.compile(r'([ \t]*)(\S.*\n|\n)')

def __init__(self, parse_buffer, prepend, leader):
def __init__(self, parse_buffer, indentation, prepend, leader):
self.leader = leader
self.indentation = indentation
self.prepend = prepend
self.children = tokenizer.make_tokens(parse_buffer)
self.loose = parse_buffer.loose
Expand Down Expand Up @@ -574,22 +576,25 @@ def parse_marker(cls, line):
The leader is a bullet list marker, or an ordered list marker.
The indentation is spaces before the leader.
The prepend is the start position of the content, i.e., the indentation required
for continuation lines.
"""
match_obj = cls.pattern.match(line)
if match_obj is None:
return None
indentation = len(match_obj.group(1))
prepend = len(match_obj.group(0).expandtabs(4))
leader = match_obj.group(1)
leader = match_obj.group(2)
content = line[match_obj.end(0):]
n_spaces = prepend - match_obj.end(1)
n_spaces = prepend - match_obj.end(2)
if n_spaces > 4:
# if there are more than 4 spaces after the leader, we treat them as part of the content
# with the exception of the first (marker separator) space.
prepend -= n_spaces - 1
content = ' ' * (n_spaces - 1) + content
return prepend, leader, content
return indentation, prepend, leader, content

@classmethod
def read(cls, lines, prev_marker=None):
Expand All @@ -599,10 +604,10 @@ def read(cls, lines, prev_marker=None):
# first line
line = next(lines)
next_line = lines.peek()
prepend, leader, content = prev_marker if prev_marker else cls.parse_marker(line)
indentation, prepend, leader, content = prev_marker if prev_marker else cls.parse_marker(line)
if content.strip() == '':
# item starting with a blank line: look for the next non-blank line
prepend = len(leader) + 1
prepend = indentation + len(leader) + 1
blanks = 1
while next_line is not None and next_line.strip() == '':
blanks += 1
Expand All @@ -614,7 +619,7 @@ def read(cls, lines, prev_marker=None):
parse_buffer = tokenizer.ParseBuffer()
parse_buffer.loose = True
next_marker = cls.parse_marker(next_line) if next_line is not None else None
return (parse_buffer, prepend, leader), next_marker
return (parse_buffer, indentation, prepend, leader), next_marker
else:
line_buffer.append(content)

Expand Down Expand Up @@ -659,7 +664,7 @@ def read(cls, lines, prev_marker=None):
# block-level tokens are parsed here, so that footnotes can be
# recognized before span-level parsing.
parse_buffer = tokenizer.tokenize_block(line_buffer, _token_types)
return (parse_buffer, prepend, leader), next_marker
return (parse_buffer, indentation, prepend, leader), next_marker


class Table(BlockToken):
Expand Down
6 changes: 4 additions & 2 deletions mistletoe/markdown_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,15 +302,17 @@ def render_list(
def render_list_item(
self, token: block_token.ListItem, max_line_length: int
) -> Iterable[str]:
indentation = len(token.leader) + 1
indentation = token.prepend - token.indentation
max_child_line_length = (
max_line_length - indentation if max_line_length else None
)
lines = self.blocks_to_lines(
token.children, max_line_length=max_child_line_length
)
return self.prefix_lines(
list(lines) or [""], token.leader + " ", " " * indentation
list(lines) or [""],
token.leader + " " * (indentation - len(token.leader)),
" " * indentation
)

def render_table(
Expand Down
53 changes: 50 additions & 3 deletions test/test_markdown_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,11 @@ def test_setext_headings(self):

def test_numbered_list(self):
input = [
" 22) *emphasized list item*\n",
" 96)\n",
" 22) *emphasized list item*\n",
" 96) \n",
" 128) here begins a nested list.\n",
" + apples\n",
" + bananas\n",
" + bananas\n",
]
output = self.roundtrip(input)
expected = [
Expand All @@ -157,6 +157,53 @@ def test_bulleted_list(self):
output = self.roundtrip(input)
self.assertEqual(output, "".join(input))

# we don't currently support keeping margin indentation:
def test_list_item_margin_indentation_not_preserved(self):
# 0 to 4 spaces of indentation from the margin
input = [
"- 0 space: ok.\n",
" subsequent line.\n",
" - 1 space: ok.\n",
" subsequent line.\n",
" - 2 spaces: ok.\n",
" subsequent line.\n",
" - 3 spaces: ok.\n",
" subsequent line.\n",
" - 4 spaces: in the paragraph of the above list item.\n",
" subsequent line.\n",
]
output = self.roundtrip(input)
expected = [
"- 0 space: ok.\n",
" subsequent line.\n",
"- 1 space: ok.\n",
" subsequent line.\n",
"- 2 spaces: ok.\n",
" subsequent line.\n",
"- 3 spaces: ok.\n",
" subsequent line.\n",
" - 4 spaces: in the paragraph of the above list item.\n",
" subsequent line.\n",
]
self.assertEqual(output, "".join(expected))

def test_list_item_indentation_after_leader_preserved(self):
# leaders followed by 1 to 5 spaces
input = [
"- 1 space: ok.\n",
" subsequent line.\n",
"- 2 spaces: ok.\n",
" subsequent line.\n",
"- 3 spaces: ok.\n",
" subsequent line.\n",
"- 4 spaces: ok.\n",
" subsequent line.\n",
"- 5 spaces: list item starting with indented code.\n",
" subsequent line.\n",
]
output = self.roundtrip(input)
self.assertEqual(output, "".join(input))

def test_code_blocks(self):
input = [
" this is an indented code block\n",
Expand Down
4 changes: 2 additions & 2 deletions test/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ def test_codefence(self):
def test_unordered_list(self):
doc = Document("* Foo\n* Bar\n* Baz")
self._check_repr_matches(doc.children[0], "block_token.List with 3 children loose=False start=None")
self._check_repr_matches(doc.children[0].children[0], "block_token.ListItem with 1 child leader='*' prepend=2 loose=False")
self._check_repr_matches(doc.children[0].children[0], "block_token.ListItem with 1 child leader='*' indentation=0 prepend=2 loose=False")

def test_ordered_list(self):
doc = Document("1. Foo\n2. Bar\n3. Baz")
self._check_repr_matches(doc.children[0], "block_token.List with 3 children loose=False start=1")
self._check_repr_matches(doc.children[0].children[0], "block_token.ListItem with 1 child leader='1.' prepend=3 loose=False")
self._check_repr_matches(doc.children[0].children[0], "block_token.ListItem with 1 child leader='1.' indentation=0 prepend=3 loose=False")

def test_table(self):
doc = Document("| Foo | Bar | Baz |\n|:--- |:---:| ---:|\n| Foo | Bar | Baz |\n")
Expand Down

0 comments on commit ee7ce94

Please sign in to comment.