-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #37 from krassowski/plain-text
Add plain text and cPython docstring support
- Loading branch information
Showing
8 changed files
with
282 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
from re import sub | ||
|
||
|
||
def escape_markdown(text: str) -> str: | ||
return sub(r'([\\#*_[\]])', r'\\\1', text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
from typing import Union, List | ||
from re import fullmatch | ||
|
||
from ._utils import escape_markdown | ||
|
||
def _is_cpython_signature_line(line: str) -> bool: | ||
"""CPython uses signature lines in the following format: | ||
str(bytes_or_buffer[, encoding[, errors]]) -> str | ||
""" | ||
return fullmatch(r'\w+\(\S*(, \S+)*(\[, \S+\])*\)\s--?>\s.+', line) is not None | ||
|
||
|
||
def cpython_to_markdown(text: str) -> Union[str, None]: | ||
signature_lines: List[str] = [] | ||
other_lines: List[str] = [] | ||
for line in text.splitlines(): | ||
if not other_lines and _is_cpython_signature_line(line): | ||
signature_lines.append(line) | ||
elif not signature_lines: | ||
return None | ||
elif line.startswith(' '): | ||
signature_lines.append(line) | ||
else: | ||
other_lines.append(line) | ||
return '\n'.join([ | ||
'```', | ||
'\n'.join(signature_lines), | ||
'```', | ||
escape_markdown('\n'.join(other_lines)) | ||
]) | ||
|
||
def looks_like_cpython(text: str) -> bool: | ||
return cpython_to_markdown(text) is not None | ||
|
||
|
||
__all__ = ['looks_like_cpython', 'cpython_to_markdown'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
from re import fullmatch | ||
from ._utils import escape_markdown | ||
|
||
|
||
def looks_like_plain_text(value: str) -> bool: | ||
"""Check if given string has plain text following English syntax without need for escaping. | ||
Accepts: | ||
- words without numbers | ||
- full stop, bangs and question marks at the end of a word if followed by a space or end of string | ||
- commas, colons and semicolons if after a word and followed by a space | ||
- dashes between words (like in `e-mail`) | ||
- double and single quotes if proceeded with a space and followed by a word, or if proceeded by a word and followed by a space (or end of string); single quotes are also allowed in between two words | ||
- parentheses if opening preceded by space and closing followed by space or end | ||
Does not accept: | ||
- square brackets (used in markdown a lot) | ||
""" | ||
if '_' in value: | ||
return False | ||
return fullmatch(r"((\w[\.!\?\)'\"](\s|$))|(\w[,:;]\s)|(\w[-']\w)|(\w\s['\"\(])|\w|\s)+", value) is not None | ||
|
||
|
||
def plain_text_to_markdown(text: str) -> str: | ||
return escape_markdown(text) | ||
|
||
__all__ = ['looks_like_plain_text', 'plain_text_to_markdown'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
from docstring_to_markdown import convert, UnknownFormatError | ||
import pytest | ||
|
||
CPYTHON = """\ | ||
bool(x) -> bool | ||
Returns True when the argument x is true, False otherwise.\ | ||
""" | ||
|
||
|
||
CPYTHON_MD = """\ | ||
``` | ||
bool(x) -> bool | ||
``` | ||
Returns True when the argument x is true, False otherwise.\ | ||
""" | ||
|
||
GOOGLE = """Do **something**. | ||
Args: | ||
a: some arg | ||
b: some arg | ||
""" | ||
|
||
GOOGLE_MD = """Do **something**. | ||
#### Args | ||
- `a`: some arg | ||
- `b`: some arg | ||
""" | ||
|
||
|
||
RST = "Please see `this link<https://example.com>`__." | ||
RST_MD = "Please see [this link](https://example.com)." | ||
|
||
|
||
def test_convert_cpython(): | ||
assert convert(CPYTHON) == CPYTHON_MD | ||
|
||
|
||
def test_convert_plain_text(): | ||
assert convert('This is a sentence.') == 'This is a sentence.' | ||
|
||
|
||
def test_convert_google(): | ||
assert convert(GOOGLE) == GOOGLE_MD | ||
|
||
|
||
def test_convert_rst(): | ||
assert convert(RST) == RST_MD | ||
|
||
|
||
def test_unknown_format(): | ||
with pytest.raises(UnknownFormatError): | ||
convert('ARGS [arg1, arg2] RETURNS: str OR None') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
import pytest | ||
from docstring_to_markdown.cpython import looks_like_cpython, cpython_to_markdown | ||
|
||
BOOL = """\ | ||
bool(x) -> bool | ||
Returns True when the argument x is true, False otherwise.\ | ||
""" | ||
|
||
BOOL_MD = """\ | ||
``` | ||
bool(x) -> bool | ||
``` | ||
Returns True when the argument x is true, False otherwise.\ | ||
""" | ||
|
||
BYTES = """\ | ||
bytes(iterable_of_ints) -> bytes | ||
bytes(string, encoding[, errors]) -> bytes | ||
bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer | ||
bytes(int) -> bytes object of size given by the parameter initialized with null bytes | ||
bytes() -> empty bytes object | ||
Construct an immutable array of bytes from: | ||
- an iterable yielding integers in range(256) | ||
- a text string encoded using the specified encoding | ||
- any object implementing the buffer API. | ||
- an integer\ | ||
""" | ||
|
||
COLLECTIONS_DEQUEUE = """\ | ||
deque([iterable[, maxlen]]) --> deque object | ||
A list-like sequence optimized for data accesses near its endpoints.\ | ||
""" | ||
|
||
DICT = """\ | ||
dict() -> new empty dictionary | ||
dict(mapping) -> new dictionary initialized from a mapping object's | ||
(key, value) pairs | ||
dict(iterable) -> new dictionary initialized as if via: | ||
d = {} | ||
for k, v in iterable: | ||
d[k] = v | ||
dict(**kwargs) -> new dictionary initialized with the name=value pairs | ||
in the keyword argument list. For example: dict(one=1, two=2)\ | ||
""" | ||
|
||
STR = """\ | ||
str(object='') -> str | ||
str(bytes_or_buffer[, encoding[, errors]]) -> str | ||
Create a new string object from the given object. If encoding or | ||
errors is specified, then the object must expose a data buffer | ||
that will be decoded using the given encoding and error handler. | ||
Otherwise, returns the result of object.__str__() (if defined) | ||
or repr(object).\ | ||
""" | ||
|
||
STR_MD = """\ | ||
``` | ||
str(object='') -> str | ||
str(bytes_or_buffer[, encoding[, errors]]) -> str | ||
``` | ||
Create a new string object from the given object. If encoding or | ||
errors is specified, then the object must expose a data buffer | ||
that will be decoded using the given encoding and error handler. | ||
Otherwise, returns the result of object.\\_\\_str\\_\\_() (if defined) | ||
or repr(object).\ | ||
""" | ||
|
||
|
||
@pytest.mark.parametrize("text", [BYTES, STR, DICT, BOOL, COLLECTIONS_DEQUEUE]) | ||
def test_accepts_cpython_docstrings(text): | ||
assert looks_like_cpython(text) is True | ||
|
||
|
||
@pytest.mark.parametrize("text", [ | ||
"[link label](https://link)", | ||
"![image label](https://source)", | ||
"Some **bold** text", | ||
"More __bold__ text", | ||
"Some *italic* text", | ||
"More _italic_ text", | ||
"This is a sentence.", | ||
"Exclamation!", | ||
"Can I ask a question?", | ||
"Let's send an e-mail", | ||
"Parentheses (are) fine (really)", | ||
"Double \"quotes\" and single 'quotes'" | ||
]) | ||
def test_rejects_markdown_and_plain_text(text): | ||
assert looks_like_cpython(text) is False | ||
|
||
|
||
def test_conversion_bool(): | ||
assert cpython_to_markdown(BOOL) == BOOL_MD | ||
|
||
|
||
def test_conversion_str(): | ||
assert cpython_to_markdown(STR) == STR_MD |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import pytest | ||
from docstring_to_markdown.plain import looks_like_plain_text, plain_text_to_markdown | ||
|
||
|
||
@pytest.mark.parametrize("text", [ | ||
"This is a sentence.", | ||
"Exclamation!", | ||
"Can I ask a question?", | ||
"Let's send an e-mail", | ||
"Parentheses (are) fine (really)", | ||
"Double \"quotes\" and single 'quotes'" | ||
]) | ||
def test_accepts_english(text): | ||
assert looks_like_plain_text(text) is True | ||
|
||
|
||
@pytest.mark.parametrize("text", [ | ||
"[link label](https://link)", | ||
"![image label](https://source)", | ||
"Some **bold** text", | ||
"More __bold__ text", | ||
"Some *italic* text", | ||
"More _italic_ text" | ||
]) | ||
def test_rejects_markdown(text): | ||
assert looks_like_plain_text(text) is False | ||
|
||
|
||
@pytest.mark.parametrize("text", [ | ||
"def test():", | ||
"print(123)", | ||
"func(arg)", | ||
"2 + 2", | ||
"var['test']", | ||
"x = 'test'" | ||
]) | ||
def test_rejects_code(text): | ||
assert looks_like_plain_text(text) is False | ||
|
||
|
||
def test_conversion(): | ||
assert plain_text_to_markdown("test") == "test" |