diff --git a/scripts/build.py b/scripts/build.py index 5dd1d26a1..2cd36de6a 100644 --- a/scripts/build.py +++ b/scripts/build.py @@ -5,6 +5,9 @@ import markdown import logging from datetime import datetime +from urllib.parse import quote +import re +from collections import defaultdict # Set up logging logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') @@ -17,27 +20,43 @@ class Page: path: Path content: str modified_date: datetime + category: Optional[str] + tags: List[str] + description: Optional[str] is_index: bool = False class SiteGenerator: """Generates a static site from a directory of mixed content""" - MARKDOWN_EXTENSIONS = ['meta', 'toc', 'fenced_code', 'tables'] - SUPPORTED_CONTENT = {'.md'} # Expandable for future content types - IGNORED_DIRECTORIES = {'.git', '__pycache__', 'node_modules'} + MARKDOWN_EXTENSIONS = [ + 'meta', + 'toc', + 'fenced_code', + 'tables', + 'attr_list', + 'footnotes', + 'def_list', + 'admonition' + ] + SUPPORTED_CONTENT = {'.md', '.markdown'} + IGNORED_DIRECTORIES = {'.git', '__pycache__', 'node_modules', '.github', 'venv', '.venv'} def __init__(self, input_dir: str, output_dir: str): self.input_dir = Path(input_dir) self.output_dir = Path(output_dir) self.markdown_converter = markdown.Markdown(extensions=self.MARKDOWN_EXTENSIONS) self.pages: Dict[Path, Page] = {} + self.categories: Dict[str, List[Page]] = defaultdict(list) + self.tags: Dict[str, List[Page]] = defaultdict(list) def generate_site(self) -> None: """Main method to generate the static site""" try: self._prepare_output_directory() self._process_content() + self._organize_content() self._copy_assets() + self._generate_special_pages() self._generate_html_pages() logger.info(f"Site generated successfully in {self.output_dir}") except Exception as e: @@ -49,9 +68,13 @@ def _prepare_output_directory(self) -> None: if self.output_dir.exists(): shutil.rmtree(self.output_dir) self.output_dir.mkdir(parents=True) - # Set directory permissions to 755 (rwxr-xr-x) self.output_dir.chmod(0o755) + # Create assets directory + assets_dir = self.output_dir / 'assets' + assets_dir.mkdir(parents=True) + assets_dir.chmod(0o755) + def _process_content(self) -> None: """Process all content in the input directory""" for file_path in self._walk_directory(self.input_dir): @@ -67,32 +90,76 @@ def _walk_directory(self, directory: Path) -> List[Path]: files.append(item) return files + def _extract_metadata(self, file_path: Path) -> dict: + """Extract metadata from markdown file""" + content = file_path.read_text(encoding='utf-8') + self.markdown_converter.reset() + self.markdown_converter.convert(content) + + metadata = {} + if hasattr(self.markdown_converter, 'Meta'): + metadata = { + 'title': self.markdown_converter.Meta.get('title', [file_path.stem.replace('-', ' ').title()])[0], + 'category': self.markdown_converter.Meta.get('category', [None])[0], + 'tags': self.markdown_converter.Meta.get('tags', [''])[0].split(',') if 'tags' in self.markdown_converter.Meta else [], + 'description': self.markdown_converter.Meta.get('description', [None])[0] + } + else: + metadata = { + 'title': file_path.stem.replace('-', ' ').title(), + 'category': None, + 'tags': [], + 'description': None + } + + return metadata + def _process_markdown(self, file_path: Path) -> None: """Process a markdown file into a Page object""" try: content = file_path.read_text(encoding='utf-8') + metadata = self._extract_metadata(file_path) + + # Convert content after metadata extraction self.markdown_converter.reset() html_content = self.markdown_converter.convert(content) - # Get title from metadata or filename - if hasattr(self.markdown_converter, 'Meta') and 'title' in self.markdown_converter.Meta: - title = self.markdown_converter.Meta['title'][0] - else: - title = file_path.stem.replace('-', ' ').title() - relative_path = file_path.relative_to(self.input_dir) is_index = file_path.stem.lower() == 'index' - self.pages[relative_path] = Page( - title=title, + # Clean and normalize tags + tags = [tag.strip().lower() for tag in metadata['tags'] if tag.strip()] + + page = Page( + title=metadata['title'], path=relative_path, content=html_content, modified_date=datetime.fromtimestamp(file_path.stat().st_mtime), + category=metadata['category'], + tags=tags, + description=metadata['description'], is_index=is_index ) + + self.pages[relative_path] = page + + # Organize by category and tags + if page.category: + self.categories[page.category].append(page) + for tag in page.tags: + self.tags[tag].append(page) + except Exception as e: logger.error(f"Failed to process {file_path}: {str(e)}") + def _organize_content(self) -> None: + """Organize pages by category and tags""" + # Sort pages within categories and tags + for category in self.categories: + self.categories[category].sort(key=lambda p: p.title) + for tag in self.tags: + self.tags[tag].sort(key=lambda p: p.title) + def _copy_assets(self) -> None: """Copy non-markdown files to output directory""" for file_path in self._walk_directory(self.input_dir): @@ -100,25 +167,147 @@ def _copy_assets(self) -> None: relative_path = file_path.relative_to(self.input_dir) output_path = self.output_dir / relative_path output_path.parent.mkdir(parents=True, exist_ok=True) - # Set directory permissions to 755 (rwxr-xr-x) output_path.parent.chmod(0o755) shutil.copy2(file_path, output_path) - # Set file permissions to 644 (rw-r--r--) output_path.chmod(0o644) + def _generate_special_pages(self) -> None: + """Generate special pages like category index and tag index""" + # Generate categories index + if self.categories: + categories_content = self._render_categories_index() + categories_page = Page( + title="Categories", + path=Path("categories/index.md"), + content=categories_content, + modified_date=datetime.now(), + category=None, + tags=[], + description="Index of all categories", + is_index=True + ) + self.pages[categories_page.path] = categories_page + + # Generate tags index + if self.tags: + tags_content = self._render_tags_index() + tags_page = Page( + title="Tags", + path=Path("tags/index.md"), + content=tags_content, + modified_date=datetime.now(), + category=None, + tags=[], + description="Index of all tags", + is_index=True + ) + self.pages[tags_page.path] = tags_page + + def _render_categories_index(self) -> str: + """Render the categories index page""" + content = "