Skip to content

Commit

Permalink
Add test for URLs with percents, autoformat, and bump version
Browse files Browse the repository at this point in the history
  • Loading branch information
jponttuset committed Jan 17, 2024
1 parent ada0c67 commit 12228fd
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 31 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ patterns.
## Usage:

```
usage: [email protected].2 [-h] [--resize_images] [--im_size IM_SIZE]
usage: [email protected].3 [-h] [--resize_images] [--im_size IM_SIZE]
[--compress_pdf]
[--pdf_im_resolution PDF_IM_RESOLUTION]
[--images_allowlist IMAGES_ALLOWLIST]
Expand Down
2 changes: 1 addition & 1 deletion arxiv_latex_cleaner/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "v1.0.2"
__version__ = "v1.0.3"
61 changes: 32 additions & 29 deletions arxiv_latex_cleaner/arxiv_latex_cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,35 +194,38 @@ def _remove_iffalse_block(text):


def _remove_comments_inline(text):
"""Removes the comments from the string 'text' and ignores % inside \\url{}."""
if "auto-ignore" in text:
return text
if text.lstrip(" ").lstrip("\t").startswith("%"):
return ""

url_pattern = r"\\url\{(?>[^{}]|(?R))*\}"
def remove_comments(segment):
"""
Remove comments from a segment of text.
"""
if segment.lstrip().startswith("%"):
return ""
match = regex.search(r"(?<!\\)%", segment)
if match:
return segment[: match.end()] + "\n"
else:
return segment

# split the text into segments based on \url{} tags
segments = regex.split(f"({url_pattern})", text)

for i in range(len(segments)):
# only process segments that are not part of a \url{} tag
if not regex.match(url_pattern, segments[i]):
segments[i] = remove_comments(segments[i])

final_text = "".join(segments)
return final_text if final_text.endswith("\n") or final_text.endswith("\\n") else final_text + "\n"
"""Removes the comments from the string 'text' and ignores % inside \\url{}."""
if 'auto-ignore' in text:
return text
if text.lstrip(' ').lstrip('\t').startswith('%'):
return ''

url_pattern = r'\\url\{(?>[^{}]|(?R))*\}'

def remove_comments(segment):
"""Remove comments from a segment of text."""
if segment.lstrip().startswith('%'):
return ''
match = regex.search(r'(?<!\\)%', segment)
if match:
return segment[: match.end()] + '\n'
else:
return segment

# split the text into segments based on \url{} tags
segments = regex.split(f'({url_pattern})', text)

for i in range(len(segments)):
# only process segments that are not part of a \url{} tag
if not regex.match(url_pattern, segments[i]):
segments[i] = remove_comments(segments[i])

final_text = ''.join(segments)
return (
final_text
if final_text.endswith('\n') or final_text.endswith('\\n')
else final_text + '\n'
)


def _strip_tex_contents(lines, end_str):
Expand Down
5 changes: 5 additions & 0 deletions arxiv_latex_cleaner/tests/arxiv_latex_cleaner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,11 @@ def test_merge_args_into_config(self, args, config_params, final_args):
'line_in': 'Foo %Comment\n',
'true_output': 'Foo %\n',
},
{
'testcase_name': 'url_with_percent',
'line_in': '\\url{https://www.example.com/hello%20world}\n',
'true_output': '\\url{https://www.example.com/hello%20world}\n',
},
)
def test_remove_comments_inline(self, line_in, true_output):
self.assertEqual(
Expand Down

0 comments on commit 12228fd

Please sign in to comment.