5
5
import functools
6
6
import re
7
7
from html import unescape
8
- from typing import List , Tuple
8
+ from typing import List , Optional , Tuple
9
9
10
10
# Sentence delimiter, split on a period followed by any type of
11
11
# whitespace (space, new line, tab, etc.)
12
- REGEX_SENTENCE_DELIMITER = re .compile (r"\W (?:\s|$)" )
12
+ REGEX_SENTENCE_DELIMITER = re .compile (r"\. (?:\s|$)" , flags = re . M )
13
13
14
14
# Matches on pattern __prefix__ at the beginning of a description
15
15
# or after a comma
16
- REGEX_TECHDOCS_PREFIX = re .compile (r"(?:, |\A)__([\w- ]+)__" )
16
+ REGEX_TECHDOCS_PREFIX = re .compile (r"(?:, |\A)__([^_ ]+)__" )
17
17
18
18
# Matches on pattern [link title](https://.../)
19
19
REGEX_MARKDOWN_LINK = re .compile (r"\[(?P<text>.*?)]\((?P<link>.*?)\)" )
@@ -121,23 +121,35 @@ def get_short_description(description: str) -> str:
121
121
:rtype: set
122
122
"""
123
123
124
- target_lines = description .splitlines ()
125
- relevant_lines = None
126
-
127
- for i , line in enumerate (target_lines ):
124
+ def __simplify (sentence : str ) -> Optional [str ]:
128
125
# Edge case for descriptions starting with a note
129
- if line .lower ().startswith ("__note__" ):
130
- continue
126
+ if sentence .lower ().startswith ("__note__" ):
127
+ return None
128
+
129
+ sentence = strip_techdocs_prefixes (sentence )
131
130
132
- relevant_lines = target_lines [i :]
133
- break
131
+ # Check that the sentence still has content after stripping prefixes
132
+ if len (sentence ) < 2 :
133
+ return None
134
134
135
- if relevant_lines is None :
135
+ return sentence + "."
136
+
137
+ # Find the first relevant sentence
138
+ result = next (
139
+ simplified
140
+ for simplified in iter (
141
+ __simplify (sentence )
142
+ for sentence in REGEX_SENTENCE_DELIMITER .split (description )
143
+ )
144
+ if simplified is not None
145
+ )
146
+
147
+ if result is None :
136
148
raise ValueError (
137
149
f"description does not contain any relevant lines: { description } " ,
138
150
)
139
151
140
- return REGEX_SENTENCE_DELIMITER . split ( " \n " . join ( relevant_lines ), 1 )[ 0 ] + "."
152
+ return result
141
153
142
154
143
155
def strip_techdocs_prefixes (description : str ) -> str :
@@ -150,14 +162,10 @@ def strip_techdocs_prefixes(description: str) -> str:
150
162
:returns: The stripped description
151
163
:rtype: str
152
164
"""
153
- result_description = REGEX_TECHDOCS_PREFIX .sub (
154
- "" , description .lstrip ()
155
- ).lstrip ()
156
-
157
- return result_description
165
+ return REGEX_TECHDOCS_PREFIX .sub ("" , description .lstrip ()).lstrip ()
158
166
159
167
160
- def process_arg_description (description : str ) -> Tuple [str , str ]:
168
+ def simplify_description (description : str ) -> Tuple [str , str ]:
161
169
"""
162
170
Processes the given raw request argument description into one suitable
163
171
for help pages, etc.
@@ -173,12 +181,12 @@ def process_arg_description(description: str) -> Tuple[str, str]:
173
181
return "" , ""
174
182
175
183
result = get_short_description (description )
176
- result = strip_techdocs_prefixes (result )
177
184
result = result .replace ("\n " , " " ).replace ("\r " , " " )
178
185
179
- description , links = extract_markdown_links (result )
186
+ # NOTE: Links should only be separated from Rich Markdown links
187
+ result_no_links , links = extract_markdown_links (result )
180
188
181
189
if len (links ) > 0 :
182
- description += f" See: { '; ' .join (links )} "
190
+ result_no_links += f" See: { '; ' .join (links )} "
183
191
184
- return unescape (markdown_to_rich_markup (description )), unescape (description )
192
+ return unescape (markdown_to_rich_markup (result_no_links )), unescape (result )
0 commit comments