|
| 1 | +import argparse # Import the argparse module |
1 | 2 | import re
|
2 | 3 | from pathlib import Path
|
3 | 4 | from typing import Match, Union
|
4 | 5 |
|
5 | 6 | from bs4 import BeautifulSoup, Tag
|
6 | 7 |
|
7 |
| -# Install from https://github.com/carpedm20/emoji/ |
8 |
| -# with pip install emoji |
9 | 8 | try:
|
10 | 9 | from emoji import emojize
|
11 | 10 | except ImportError:
|
@@ -35,25 +34,34 @@ def process_html_file(html_file: Union[str, Path]) -> None:
|
35 | 34 | with open(html_file, "r", encoding="utf-8") as file:
|
36 | 35 | content = file.read()
|
37 | 36 |
|
38 |
| - # Convert emojis in the entire HTML content |
39 | 37 | content = emojize_all(content)
|
40 |
| - |
41 | 38 | soup = BeautifulSoup(content, "html.parser")
|
42 |
| - |
43 |
| - # Update all <img> tags with src starting with "utils/" |
44 | 39 | update_image_paths(soup)
|
45 | 40 |
|
46 |
| - # Write the changes back to the HTML file |
47 | 41 | with open(html_file, "w", encoding="utf-8") as file:
|
48 | 42 | file.write(str(soup))
|
49 | 43 |
|
50 | 44 |
|
51 | 45 | if __name__ == "__main__":
|
52 |
| - # Specify the pattern to match the HTML files you want to postprocess |
53 |
| - __location__: Path = Path(__file__).parent |
54 |
| - html_files: list[Path] = list((__location__ / "_build" / "html").glob("*.html")) |
| 46 | + parser = argparse.ArgumentParser(description="Process HTML files.") |
| 47 | + parser.add_argument( |
| 48 | + "--path", |
| 49 | + type=str, |
| 50 | + help="Path to the directory containing HTML files to process.", |
| 51 | + ) |
| 52 | + |
| 53 | + args = parser.parse_args() |
| 54 | + |
| 55 | + if args.path: |
| 56 | + base_path = Path(args.path) |
| 57 | + else: |
| 58 | + __location__: Path = Path(__file__).parent |
| 59 | + base_path = __location__ / "_build" / "html" |
| 60 | + |
| 61 | + html_files: list[Path] = list(base_path.glob("*.html")) |
55 | 62 |
|
56 | 63 | for html_file in html_files:
|
| 64 | + print(f"Processing {html_file}...") |
57 | 65 | process_html_file(html_file)
|
58 | 66 |
|
59 | 67 | print("HTML postprocessing completed.")
|
0 commit comments