diff --git a/notebooks/generate_site_simple.ipynb b/notebooks/generate_site_simple.ipynb new file mode 100644 index 0000000..0a781b3 --- /dev/null +++ b/notebooks/generate_site_simple.ipynb @@ -0,0 +1,116 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Website Generation\n", + "\n", + "This notebook provides a simple interface to generate the website." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "from pathlib import Path\n", + "\n", + "# Add src to path\n", + "sys.path.insert(0, str(Path.cwd().parent))\n", + "\n", + "from src.main import SiteGenerator" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Generate Full Site" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "generator = SiteGenerator(log_level=\"INFO\")\n", + "generator.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Debug Individual Stages\n", + "\n", + "If something fails, you can run stages individually:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create generator\n", + "generator = SiteGenerator(log_level=\"DEBUG\")\n", + "\n", + "# Load data\n", + "generator.load_articles()\n", + "generator.load_members()\n", + "generator.load_website_data()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Process data\n", + "generator.process_article_categories()\n", + "generator.process_member_roles()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Inspect loaded data\n", + "print(f\"Articles: {len(generator.data['articles_df'])}\")\n", + "print(f\"Members: {len(generator.data['member_info_df'])}\")\n", + "print(f\"Current: {len(generator.data['current_members_df'])}\")\n", + "print(f\"Alumni: {len(generator.data['alumni_members_df'])}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# View specific data\n", + "generator.data['current_members_df'][['current_role', 'full_name']]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..3abfc60 --- /dev/null +++ b/src/config.py @@ -0,0 +1,66 @@ +from pathlib import Path +from typing import List, Dict + +BASE_DIR = Path(__file__).parent.parent +GROUP_DATA_DIR = BASE_DIR.parent / "group-data" +TEMPLATE_DIR_PATH = BASE_DIR / "templates" +WEBSITE_DATA_PATH = GROUP_DATA_DIR / "website_data" +HOSTING_PATH = BASE_DIR.parent / "kerzendorf-lab.github.io" +ARTICLE_DIR_PATH = BASE_DIR.parent / "research_news" / "articles" +ARTICLE_IMAGE_DESTINATION_DIR = HOSTING_PATH / "website_files" / "images" / "article_content" +MEMBERS_DIR_PATH = GROUP_DATA_DIR / "members" +SUB_RESEARCH_PATH = HOSTING_PATH / "sub_research" +OPPORTUNITIES_PATH = WEBSITE_DATA_PATH / "content" / "opportunities.json" +ROLE_HIERARCHY_PATH = WEBSITE_DATA_PATH / "role_hierarchy.json" +GALLERY_CONTENT_SOURCE = WEBSITE_DATA_PATH / "content" / "gallery" + +GENERAL_TAGS: List[str] = [ + "Paper", "Poster", "Talk", "Award", "New Team Member", + "PhD", "Conference", "Undergraduate", "Event", "Achievement" +] + +TAG_COLORS: Dict[str, str] = { + 'paper': '#FF6B6B', + 'poster': '#4ECDC4', + 'talk': '#45B7D1', + 'award': '#96CEB4', + 'new team member': '#FFBE0B', + 'phd': '#9B5DE5', + 'conference': '#FF006E', + 'undergraduate': '#8338EC', + 'event': '#3A86FF', + 'achievement': '#FB5607', + 'astrophysics': '#2EC4B6', + 'machine learning': '#FF9F1C', + 'software': '#E71D36', + 'research': '#011627', + 'news': '#41EAD4' +} + +ARTICLE_METADATA_FIELDS: List[str] = [ + "article_id", "category", "date", "tags", + "title", "cover_image", "short_description" +] + +GROUP_FILTER: List[str] = ["DTI", "TARDIS", "ICER", "kerzendorf"] +INSTITUTION_FILTER: str = "Michigan State University" + +ROLE_MAP: Dict[str, str] = { + "Assistant Professor": "Professor", + "Professorial Assistant": "Undergraduate Student", + "Visiting Researcher": "Postdoctoral Researcher" +} + +DEGREE_MAP: Dict[str, str] = { + "Masters": "Graduate Student", + "PhD": "Postdoctorate", + "Bachelors": "Undergraduate Student", +} + +INDIVIDUAL_MEMBER_SECTION_MAP: Dict[str, str] = { + "education": "Education", + "experiences": "Experience", + "projects": "Projects", + "awards": "Awards & Recognition", + "outreach": "Outreach Programs", +} diff --git a/src/data_loaders/__init__.py b/src/data_loaders/__init__.py new file mode 100644 index 0000000..1887844 --- /dev/null +++ b/src/data_loaders/__init__.py @@ -0,0 +1,6 @@ +from .base import BaseDataLoader +from .articles import ArticleLoader +from .members import MemberLoader +from .website_data import WebsiteDataLoader + +__all__ = ['BaseDataLoader', 'ArticleLoader', 'MemberLoader', 'WebsiteDataLoader'] diff --git a/src/data_loaders/articles.py b/src/data_loaders/articles.py new file mode 100644 index 0000000..474e6d0 --- /dev/null +++ b/src/data_loaders/articles.py @@ -0,0 +1,119 @@ +import pandas as pd +from pathlib import Path +from datetime import datetime, date +from typing import Dict, Any + +from .base import BaseDataLoader +from src.config import ARTICLE_DIR_PATH, ARTICLE_IMAGE_DESTINATION_DIR +from src.utils.path_helpers import set_new_image_path + +class ArticleLoader(BaseDataLoader): + def __init__(self, logger=None): + super().__init__(logger) + self.article_dir = ARTICLE_DIR_PATH + self.image_dest_dir = ARTICLE_IMAGE_DESTINATION_DIR + + def load(self) -> pd.DataFrame: + self.logger.info(f"Loading articles from {self.article_dir}") + + if not self.article_dir.exists(): + raise FileNotFoundError( + f"Article directory not found: {self.article_dir}. " + f"Expected at: {self.article_dir.absolute()}. " + f"Check that research_news repo is in correct location." + ) + + article_content_list = [] + today = date.today() + today_datetime = datetime.combine(today, datetime.min.time()) + + info_files = list(self.article_dir.rglob('info.json')) + self.logger.info(f"Found {len(info_files)} article info.json files") + + for content_file_path in info_files: + article_content = self._load_single_article( + content_file_path, + today_datetime + ) + if article_content: + article_content_list.append(article_content) + + if not article_content_list: + self.logger.warning("No articles loaded successfully") + return pd.DataFrame() + + df = pd.DataFrame(article_content_list) + df = self._process_article_dataframe(df) + + self.logger.info( + f"Successfully loaded {len(df)} articles " + f"({len(df[df['category'] == 'News'])} news, " + f"{len(df[df['category'] != 'News'])} research)" + ) + + return df + + def _load_single_article( + self, + content_file_path: Path, + today_datetime: datetime + ) -> Dict[str, Any] | None: + article_content = self.load_json_file(content_file_path) + + required_fields = ['date', 'platforms', 'cover_image', 'content', 'category'] + missing_fields = [f for f in required_fields if f not in article_content] + if missing_fields: + raise ValueError( + f"Article {content_file_path.parent.name} missing required fields: " + f"{', '.join(missing_fields)}. " + f"File: {content_file_path}" + ) + + article_date = datetime.strptime(article_content["date"], "%m-%d-%Y") + + if "kg" not in article_content["platforms"]: + self.logger.debug( + f"Skipping {content_file_path.parent.name}: 'kg' not in platforms" + ) + return None + + if article_date > today_datetime: + self.logger.debug( + f"Skipping {content_file_path.parent.name}: " + f"future date {article_content['date']}" + ) + return None + + image_path = Path(article_content["cover_image"]) + article_content["cover_image"] = set_new_image_path( + content_file_path, + image_path, + self.image_dest_dir + ) + + for content_key, content_value in article_content["content"].items(): + if "img" in content_key: + new_content_value = set_new_image_path( + content_file_path, + Path(content_value), + self.image_dest_dir + ) + article_content["content"][content_key] = new_content_value + + return article_content + + def _process_article_dataframe(self, df: pd.DataFrame) -> pd.DataFrame: + df["date"] = pd.to_datetime(df["date"], format="%m-%d-%Y") + + df["cover_image_height"] = ( + df["cover_image_height"].fillna("330px").replace("", "330px") + ) + df["cover_image_width"] = ( + df["cover_image_width"].fillna("520px").replace("", "520px") + ) + + df["category"] = df["category"].replace("Overview", "Computational Metascience") + + df['image_name'] = df['cover_image'].apply(lambda x: Path(x).name) + + return df diff --git a/src/data_loaders/base.py b/src/data_loaders/base.py new file mode 100644 index 0000000..2cab7f2 --- /dev/null +++ b/src/data_loaders/base.py @@ -0,0 +1,65 @@ +import json +import logging +from pathlib import Path +from typing import Any, Dict, List +from abc import ABC, abstractmethod + +class BaseDataLoader(ABC): + def __init__(self, logger: logging.Logger = None): + self.logger = logger or logging.getLogger(__name__) + + def load_json_file(self, file_path: Path) -> Dict[str, Any]: + if not file_path.exists(): + raise FileNotFoundError( + f"JSON file not found: {file_path}. " + f"Expected at: {file_path.absolute()}" + ) + + try: + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + self.logger.debug(f"Successfully loaded JSON: {file_path.name}") + return data + except json.JSONDecodeError as e: + raise ValueError( + f"Invalid JSON in file: {file_path}. " + f"Error at line {e.lineno}, column {e.colno}: {e.msg}. " + f"Check file syntax at {file_path.absolute()}" + ) from e + except UnicodeDecodeError as e: + raise ValueError( + f"Encoding error in file: {file_path}. " + f"File must be UTF-8 encoded. Error: {e}" + ) from e + + def load_json_files(self, pattern: str, base_path: Path) -> List[Dict[str, Any]]: + files = list(base_path.rglob(pattern)) + + if not files: + self.logger.warning( + f"No files matching pattern '{pattern}' found in {base_path}" + ) + return [] + + data_list = [] + failed_files = [] + + for file_path in files: + try: + data = self.load_json_file(file_path) + data_list.append(data) + except (FileNotFoundError, ValueError) as e: + failed_files.append((file_path, str(e))) + self.logger.error(f"Failed to load {file_path}: {e}") + + if failed_files: + self.logger.warning( + f"Failed to load {len(failed_files)} files. " + f"Loaded {len(data_list)} successfully." + ) + + return data_list + + @abstractmethod + def load(self) -> Any: + pass diff --git a/src/data_loaders/members.py b/src/data_loaders/members.py new file mode 100644 index 0000000..b07806a --- /dev/null +++ b/src/data_loaders/members.py @@ -0,0 +1,171 @@ +import pandas as pd +from pathlib import Path +from typing import Dict, Any + +from .base import BaseDataLoader +from src.config import MEMBERS_DIR_PATH + +class MemberLoader(BaseDataLoader): + def __init__(self, logger=None): + super().__init__(logger) + self.members_dir = MEMBERS_DIR_PATH + + def load(self) -> Dict[str, pd.DataFrame]: + self.logger.info(f"Loading member data from {self.members_dir}") + + if not self.members_dir.exists(): + raise FileNotFoundError( + f"Members directory not found: {self.members_dir}. " + f"Expected at: {self.members_dir.absolute()}. " + f"Check that group-data repo is in correct location." + ) + + info_df = self._load_member_info() + experiences_df = self._load_member_json_data("experiences.json") + education_df = self._load_member_json_data("education.json") + projects_df = self._load_member_json_data("projects.json") + awards_df = self._load_member_json_data("awards.json") + outreach_df = self._load_member_json_data("outreach.json") + social_links_df = self._load_social_links() + documents_df = self._load_member_json_data("documents.json") + + self.logger.info( + f"Loaded data for {len(info_df)} members: " + f"experiences={len(experiences_df)}, education={len(education_df)}, " + f"projects={len(projects_df)}, awards={len(awards_df)}" + ) + + return { + 'info': info_df, + 'experiences': experiences_df, + 'education': education_df, + 'projects': projects_df, + 'awards': awards_df, + 'outreach': outreach_df, + 'social_links': social_links_df, + 'documents': documents_df, + } + + def _load_member_info(self) -> pd.DataFrame: + info_files = list(self.members_dir.glob("*/info.json")) + + if not info_files: + raise FileNotFoundError( + f"No member info.json files found in {self.members_dir}. " + f"Expected at least one member directory with info.json" + ) + + info_list = [] + for info_path in info_files: + try: + member_data = self.load_json_file(info_path) + + if 'id' not in member_data: + raise ValueError( + f"Member info.json missing 'id' field: {info_path}" + ) + + required_fields = ['first_name', 'last_name'] + missing_fields = [f for f in required_fields if f not in member_data] + if missing_fields: + self.logger.warning( + f"Member {member_data.get('id', 'unknown')} missing fields: " + f"{', '.join(missing_fields)} in {info_path}" + ) + + info_list.append(member_data) + except (FileNotFoundError, ValueError) as e: + self.logger.error(f"Failed to load member info {info_path}: {e}") + + df = pd.DataFrame(info_list).set_index("id") + + df["full_name"] = df.apply( + lambda row: ( + row.get("nick_name", "") + " " + row["last_name"] + if pd.notna(row.get("nick_name", "")) + else row["first_name"] + " " + row["last_name"] + ), + axis=1, + ) + + return df + + def _load_member_json_data(self, filename: str) -> pd.DataFrame: + data_list = [] + + for member_dir in self.members_dir.iterdir(): + if not member_dir.is_dir(): + continue + + info_path = member_dir / "info.json" + if not info_path.exists(): + continue + + try: + member_info = self.load_json_file(info_path) + member_id = member_info.get("id") + + if not member_id: + self.logger.warning( + f"Member directory {member_dir.name} has no 'id' in info.json" + ) + continue + + data_path = member_dir / "jsons" / filename + + if data_path.exists(): + data_entries = self.load_json_file(data_path) + + if not isinstance(data_entries, list): + raise ValueError( + f"Expected list in {data_path}, got {type(data_entries)}. " + f"File should contain JSON array." + ) + + for entry in data_entries: + entry["id"] = member_id + + data_list.extend(data_entries) + + except (FileNotFoundError, ValueError) as e: + self.logger.error( + f"Failed to load {filename} for member {member_dir.name}: {e}" + ) + + if not data_list: + return pd.DataFrame() + + df = pd.DataFrame(data_list).set_index("id") + return df + + def _load_social_links(self) -> pd.DataFrame: + social_links_list = [] + + for social_link_path in self.members_dir.rglob("social_links.json"): + try: + member_social_link = self.load_json_file(social_link_path) + + info_path = social_link_path.parent.parent / "info.json" + member_info = self.load_json_file(info_path) + + member_id = member_info.get("id") + if not member_id: + raise ValueError( + f"Cannot find member ID for social links at {social_link_path}" + ) + + member_social_link["id"] = member_id + social_links_list.append(member_social_link) + + except (FileNotFoundError, ValueError) as e: + self.logger.error( + f"Failed to load social links {social_link_path}: {e}" + ) + + if not social_links_list: + return pd.DataFrame() + + df = pd.DataFrame(social_links_list).set_index("id") + df.fillna("", inplace=True) + + return df diff --git a/src/data_loaders/website_data.py b/src/data_loaders/website_data.py new file mode 100644 index 0000000..257298c --- /dev/null +++ b/src/data_loaders/website_data.py @@ -0,0 +1,88 @@ +from pathlib import Path +from typing import Dict, Any + +from .base import BaseDataLoader +from src.config import ( + WEBSITE_DATA_PATH, + OPPORTUNITIES_PATH, + ROLE_HIERARCHY_PATH, + GALLERY_CONTENT_SOURCE +) + +class WebsiteDataLoader(BaseDataLoader): + def __init__(self, logger=None): + super().__init__(logger) + self.website_data_path = WEBSITE_DATA_PATH + + def load(self) -> Dict[str, Any]: + self.logger.info("Loading website configuration data") + + if not self.website_data_path.exists(): + raise FileNotFoundError( + f"Website data directory not found: {self.website_data_path}. " + f"Expected at: {self.website_data_path.absolute()}" + ) + + data = { + 'general': self._load_website_json('general.json'), + 'homepage': self._load_website_json('homepage.json'), + 'contact': self._load_website_json('contact.json'), + 'research': self._load_website_json('research_categories.json'), + 'support': self._load_website_json('support.json'), + 'opportunities': self._load_json_with_path(OPPORTUNITIES_PATH), + 'role_hierarchy': self._load_json_with_path(ROLE_HIERARCHY_PATH), + } + + self.logger.info("Successfully loaded all website configuration data") + + return data + + def _load_website_json(self, filename: str) -> Dict[str, Any]: + file_path = self.website_data_path / filename + + if not file_path.exists(): + raise FileNotFoundError( + f"Required website data file not found: {filename}. " + f"Expected at: {file_path}. " + f"This file is required for site generation." + ) + + return self.load_json_file(file_path) + + def _load_json_with_path(self, file_path: Path) -> Dict[str, Any]: + if not file_path.exists(): + raise FileNotFoundError( + f"Required file not found: {file_path}. " + f"Expected at: {file_path.absolute()}" + ) + + return self.load_json_file(file_path) + + def load_gallery_events(self) -> list: + self.logger.info("Loading gallery events") + + if not GALLERY_CONTENT_SOURCE.exists(): + self.logger.warning( + f"Gallery content directory not found: {GALLERY_CONTENT_SOURCE}" + ) + return [] + + events = [] + event_files = list(GALLERY_CONTENT_SOURCE.rglob("info.json")) + + for event_file in event_files: + try: + event_data = self.load_json_file(event_file) + + if "event_id" not in event_data: + raise ValueError( + f"Gallery event missing 'event_id': {event_file}" + ) + + events.append((event_file, event_data)) + except (FileNotFoundError, ValueError) as e: + self.logger.error(f"Failed to load gallery event {event_file}: {e}") + + self.logger.info(f"Loaded {len(events)} gallery events") + + return events diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..27ea9c1 --- /dev/null +++ b/src/main.py @@ -0,0 +1,271 @@ +import sys +from pathlib import Path + +from src.utils.logging_config import setup_logging +from src.data_loaders import ArticleLoader, MemberLoader, WebsiteDataLoader +from src.processors import ArticleProcessor, MemberProcessor +from src.renderers import ( + HomepageRenderer, + ContactPageRenderer, + SupportPageRenderer, + JoinUsPageRenderer, + CurrentMembersRenderer, + AlumniMembersRenderer, + IndividualMemberRenderer, + ResearchFrontPageRenderer, + SubResearchFrontPageRenderer, + IndividualResearchPageRenderer, + NewsFrontPageRenderer, + IndividualNewsPageRenderer, + GalleryPageRenderer, + AssetCopier +) + +class SiteGenerator: + def __init__(self, log_level: str = "INFO"): + self.logger = setup_logging(log_level) + self.data = {} + self.logger.info("="*80) + self.logger.info("Website Generator Started") + self.logger.info("="*80) + + def load_articles(self) -> None: + loader = ArticleLoader(self.logger) + self.data['articles_df'] = loader.load() + + if self.data['articles_df'].empty: + raise ValueError("No articles loaded. Cannot proceed with site generation.") + + def load_members(self) -> None: + loader = MemberLoader(self.logger) + member_data = loader.load() + + self.data['member_info_df'] = member_data['info'] + self.data['experiences_df'] = member_data['experiences'] + self.data['education_df'] = member_data['education'] + self.data['projects_df'] = member_data['projects'] + self.data['awards_df'] = member_data['awards'] + self.data['outreach_df'] = member_data['outreach'] + self.data['social_links_df'] = member_data['social_links'] + self.data['documents_df'] = member_data['documents'] + + if self.data['member_info_df'].empty: + raise ValueError("No members loaded. Cannot proceed with site generation.") + + def load_website_data(self) -> None: + loader = WebsiteDataLoader(self.logger) + website_data = loader.load() + + self.data['general'] = website_data['general'] + self.data['homepage'] = website_data['homepage'] + self.data['contact'] = website_data['contact'] + self.data['research'] = website_data['research'] + self.data['support'] = website_data['support'] + self.data['opportunities'] = website_data['opportunities'] + self.data['role_hierarchy'] = website_data['role_hierarchy'] + + self.data['gallery_events'] = loader.load_gallery_events() + + def process_article_categories(self) -> None: + processor = ArticleProcessor(self.logger) + + news_df, research_df = processor.split_by_category(self.data['articles_df']) + self.data['news_df'] = news_df + self.data['research_df'] = research_df + + self.data['recent_content_df'] = processor.get_recent_content_by_category( + self.data['articles_df'] + ) + + def process_member_roles(self) -> None: + processor = MemberProcessor(self.logger) + + current_df, alumni_df = processor.process_member_status( + self.data['member_info_df'], + self.data['experiences_df'], + self.data['education_df'], + self.data['projects_df'] + ) + + self.data['current_members_df'] = processor.sort_by_role_hierarchy( + current_df, + self.data['role_hierarchy'] + ) + self.data['alumni_members_df'] = alumni_df + + self.data['info_dict'] = self.data['member_info_df'].to_dict('index') + + self.data['education_grouped'] = processor.group_dataframe_by_id( + self.data['education_df'] + ) + self.data['experiences_grouped'] = processor.group_dataframe_by_id( + self.data['experiences_df'] + ) + self.data['projects_grouped'] = processor.group_dataframe_by_id( + self.data['projects_df'] + ) + self.data['awards_grouped'] = processor.group_dataframe_by_id( + self.data['awards_df'] + ) + self.data['outreach_grouped'] = processor.group_dataframe_by_id( + self.data['outreach_df'] + ) + + article_processor = ArticleProcessor(self.logger) + self.data['news_df'] = article_processor.process_news_content_links( + self.data['news_df'], + self.data['member_info_df'], + self.data['current_members_df'] + ) + + def render_homepage(self) -> None: + renderer = HomepageRenderer(self.logger) + renderer.render( + self.data['general'], + self.data['homepage'], + self.data['recent_content_df'] + ) + + def render_contact(self) -> None: + renderer = ContactPageRenderer(self.logger) + renderer.render(self.data['general'], self.data['contact']) + + def render_support(self) -> None: + renderer = SupportPageRenderer(self.logger) + renderer.render(self.data['general'], self.data['support']) + + def render_join_us(self) -> None: + renderer = JoinUsPageRenderer(self.logger) + renderer.render(self.data['general'], self.data['opportunities']) + + def render_member_pages(self) -> None: + current_renderer = CurrentMembersRenderer(self.logger) + current_renderer.render( + self.data['general'], + self.data['current_members_df'], + self.data['social_links_df'] + ) + + alumni_renderer = AlumniMembersRenderer(self.logger) + alumni_renderer.render( + self.data['general'], + self.data['alumni_members_df'] + ) + + individual_renderer = IndividualMemberRenderer(self.logger) + individual_renderer.render( + self.data['general'], + self.data['member_info_df'], + self.data['social_links_df'], + self.data['documents_df'], + self.data['education_grouped'], + self.data['experiences_grouped'], + self.data['projects_grouped'], + self.data['awards_grouped'], + self.data['outreach_grouped'], + self.data['articles_df'] + ) + + def render_research_pages(self) -> None: + front_renderer = ResearchFrontPageRenderer(self.logger) + front_renderer.render( + self.data['general'], + self.data['research_df'], + self.data['research'], + self.data['info_dict'] + ) + + sub_front_renderer = SubResearchFrontPageRenderer(self.logger) + sub_front_renderer.render( + self.data['general'], + self.data['research'], + self.data['research_df'], + self.data['info_dict'] + ) + + individual_renderer = IndividualResearchPageRenderer(self.logger) + individual_renderer.render( + self.data['general'], + self.data['research_df'], + self.data['info_dict'] + ) + + def render_news_pages(self) -> None: + front_renderer = NewsFrontPageRenderer(self.logger) + front_renderer.render( + self.data['general'], + self.data['news_df'], + self.data['info_dict'] + ) + + individual_renderer = IndividualNewsPageRenderer(self.logger) + individual_renderer.render( + self.data['general'], + self.data['news_df'], + self.data['info_dict'] + ) + + def render_gallery(self) -> None: + renderer = GalleryPageRenderer(self.logger) + renderer.render( + self.data['general'], + self.data['info_dict'], + self.data['gallery_events'] + ) + + def copy_assets(self) -> None: + copier = AssetCopier(self.logger) + copier.copy_assets() + + def run(self) -> None: + stages = [ + ("Load Articles", self.load_articles), + ("Load Member Data", self.load_members), + ("Load Website Configuration", self.load_website_data), + ("Process Article Categories", self.process_article_categories), + ("Process Member Roles", self.process_member_roles), + ("Render Homepage", self.render_homepage), + ("Render Contact Page", self.render_contact), + ("Render Support Page", self.render_support), + ("Render Join Us Page", self.render_join_us), + ("Render Member Pages", self.render_member_pages), + ("Render Research Pages", self.render_research_pages), + ("Render News Pages", self.render_news_pages), + ("Render Gallery Page", self.render_gallery), + ("Copy Assets", self.copy_assets), + ] + + total_stages = len(stages) + + for idx, (stage_name, stage_fn) in enumerate(stages, 1): + self.logger.info("") + self.logger.info(f"[{idx}/{total_stages}] Starting: {stage_name}") + self.logger.info("-" * 80) + + stage_fn() + self.logger.info(f"✓ Completed: {stage_name}") + + self.logger.info("") + self.logger.info("="*80) + self.logger.info("Website Generation Complete!") + self.logger.info("="*80) + + +def main(): + import argparse + + parser = argparse.ArgumentParser(description="Generate static website") + parser.add_argument( + "--log-level", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR"], + help="Set logging level" + ) + + args = parser.parse_args() + + generator = SiteGenerator(log_level=args.log_level) + generator.run() + +if __name__ == "__main__": + main() diff --git a/src/processors/__init__.py b/src/processors/__init__.py new file mode 100644 index 0000000..13c3a19 --- /dev/null +++ b/src/processors/__init__.py @@ -0,0 +1,4 @@ +from .articles import ArticleProcessor +from .members import MemberProcessor + +__all__ = ['ArticleProcessor', 'MemberProcessor'] diff --git a/src/processors/articles.py b/src/processors/articles.py new file mode 100644 index 0000000..e3d2bd9 --- /dev/null +++ b/src/processors/articles.py @@ -0,0 +1,89 @@ +import pandas as pd +import logging +import re +from pathlib import Path + +class ArticleProcessor: + def __init__(self, logger: logging.Logger = None): + self.logger = logger or logging.getLogger(__name__) + + def split_by_category(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]: + self.logger.info("Splitting articles by category") + + news_df = df[ + (df["category"] == "News") | + (df["tags"].apply( + lambda x: "news" in x if isinstance(x, list) else False + )) + ].sort_values(by=["date"], ascending=[False]) + + research_df = df[ + df["category"] != "News" + ].sort_values(by=["category", "date"], ascending=[True, False]) + + self.logger.info( + f"Split into {len(news_df)} news articles and " + f"{len(research_df)} research articles" + ) + + return news_df, research_df + + def get_recent_content_by_category(self, df: pd.DataFrame) -> pd.DataFrame: + self.logger.info("Getting most recent article per category") + + sorted_df = df.sort_values( + by=["category", "date"], + ascending=[True, False] + ) + + recent = sorted_df.groupby("category").head(1).copy() + + self.logger.info(f"Found {len(recent)} recent articles across categories") + + return recent + + def urlize_content( + self, + content: str, + info_df: pd.DataFrame, + current_members_df: pd.DataFrame + ) -> str: + def replace_id(match): + member_id = match.group(1) + + if member_id in info_df.index: + name = info_df.loc[member_id, 'full_name'] + + if member_id in current_members_df.index: + return ( + f'{name}' + ) + else: + return name + else: + return member_id.replace('_', ' ').title() + + return re.sub(r'\[(\w+)\]', replace_id, content) + + def process_news_content_links( + self, + news_df: pd.DataFrame, + info_df: pd.DataFrame, + current_members_df: pd.DataFrame + ) -> pd.DataFrame: + self.logger.info("Processing member links in news content") + + df_copy = news_df.copy() + + for index, row in df_copy.iterrows(): + content = row['content'] + for content_key in content: + if "para" in content_key: + content[content_key] = self.urlize_content( + content[content_key], + info_df, + current_members_df + ) + + return df_copy diff --git a/src/processors/members.py b/src/processors/members.py new file mode 100644 index 0000000..cedb6b2 --- /dev/null +++ b/src/processors/members.py @@ -0,0 +1,257 @@ +import pandas as pd +import logging +from datetime import datetime +from typing import Dict, Any + +from src.config import ( + GROUP_FILTER, + INSTITUTION_FILTER, + ROLE_MAP, + DEGREE_MAP +) + +class MemberProcessor: + def __init__(self, logger: logging.Logger = None): + self.logger = logger or logging.getLogger(__name__) + + def process_member_status( + self, + info_df: pd.DataFrame, + exp_df: pd.DataFrame, + edu_df: pd.DataFrame, + projects_df: pd.DataFrame + ) -> tuple[pd.DataFrame, pd.DataFrame]: + self.logger.info("Processing member current/alumni status") + + exp_processed = self._prepare_experiences(exp_df) + edu_processed = self._prepare_education(edu_df) + + merged_df = self._merge_education_experience(exp_processed, edu_processed) + merged_df = self._determine_current_status(merged_df) + + current_df = merged_df[merged_df['isCurrent']].copy() + alumni_df = merged_df[~merged_df['isCurrent']].copy() + + current_with_info = self._add_member_info( + current_df, + info_df, + projects_df + ) + alumni_with_info = self._add_alumni_info(alumni_df, info_df) + + self.logger.info( + f"Classified {len(current_with_info)} current members and " + f"{len(alumni_with_info)} alumni" + ) + + return current_with_info, alumni_with_info + + def _prepare_experiences(self, exp_df: pd.DataFrame) -> pd.DataFrame: + if exp_df.empty: + return pd.DataFrame() + + df = exp_df.copy() + df['end_date'] = pd.to_datetime(df['end_date'], format='%Y-%m-%d', errors='coerce') + df['start_date'] = pd.to_datetime(df['start_date'], format='%Y-%m-%d', errors='coerce') + df.fillna("", inplace=True) + + return df + + def _prepare_education(self, edu_df: pd.DataFrame) -> pd.DataFrame: + if edu_df.empty: + return pd.DataFrame() + + df = edu_df.copy() + df['end_date'] = pd.to_datetime(df['end_date'], format='%Y-%m-%d', errors='coerce') + df['start_date'] = pd.to_datetime(df['start_date'], format='%Y-%m-%d', errors='coerce') + + def get_most_recent(group): + sorted_group = group.sort_values( + by=['start_date', 'end_date'], + ascending=[False, True] + ) + return sorted_group.iloc[0:1] + + df_recent = df.groupby("id").apply(get_most_recent).droplevel(0) + + df_recent['academic_role'] = "" + for member_id, row in df_recent.iterrows(): + if row['institution'] == INSTITUTION_FILTER: + if row['degree'] == "Bachelors": + df_recent.at[member_id, 'academic_role'] = "Undergraduate Student" + elif row['degree'] in ["PhD", "Masters"]: + df_recent.at[member_id, 'academic_role'] = "Graduate Student" + + return df_recent.add_suffix('_edu') + + def _merge_education_experience( + self, + exp_df: pd.DataFrame, + edu_df: pd.DataFrame + ) -> pd.DataFrame: + if exp_df.empty and edu_df.empty: + return pd.DataFrame() + + if exp_df.empty: + return edu_df + + if edu_df.empty: + return exp_df.add_suffix('_exp') + + def get_most_recent_exp(group): + sorted_group = group.sort_values( + by=['start_date', 'end_date'], + ascending=[False, True] + ) + relevant = sorted_group[ + sorted_group['group'].str.contains('|'.join(GROUP_FILTER), na=False) + ] + + if len(relevant) > 0: + return relevant.iloc[0:1] + return sorted_group.iloc[0:1] + + exp_recent = exp_df.groupby("id").apply(get_most_recent_exp).droplevel(0) + exp_recent = exp_recent.add_suffix('_exp') + + merged = exp_recent.merge(edu_df, on='id', how='outer') + + return merged + + def _determine_current_status(self, merged_df: pd.DataFrame) -> pd.DataFrame: + if merged_df.empty: + return pd.DataFrame() + + df = merged_df.copy() + df['isCurrent'] = False + df['current_role'] = "" + + now = datetime.now() + + for member_id, row in df.iterrows(): + institution_edu = row.get('institution_edu', '') + end_date_edu = row.get('end_date_edu', pd.NaT) + academic_role = row.get('academic_role_edu', '') + + group_exp = row.get('group_exp', '') + end_date_exp = row.get('end_date_exp', pd.NaT) + role_exp = row.get('role_exp', '') + + if institution_edu == INSTITUTION_FILTER: + if pd.isna(end_date_edu) or end_date_edu >= now: + df.at[member_id, 'isCurrent'] = True + if pd.notna(end_date_exp): + df.at[member_id, 'isCurrent'] = False + if academic_role: + df.at[member_id, 'current_role'] = academic_role + else: + df.at[member_id, 'isCurrent'] = False + if academic_role: + df.at[member_id, 'current_role'] = academic_role + else: + df.at[member_id, 'current_role'] = role_exp + + elif group_exp in GROUP_FILTER: + if pd.isna(end_date_exp) or end_date_exp >= now: + df.at[member_id, 'isCurrent'] = True + df.at[member_id, 'current_role'] = role_exp + else: + df.at[member_id, 'isCurrent'] = False + if academic_role: + df.at[member_id, 'current_role'] = academic_role + else: + df.at[member_id, 'current_role'] = role_exp + + else: + df.at[member_id, 'isCurrent'] = False + if academic_role: + df.at[member_id, 'current_role'] = academic_role + else: + df.at[member_id, 'current_role'] = role_exp + + df.at[member_id, 'current_role'] = ROLE_MAP.get( + df.at[member_id, 'current_role'], + df.at[member_id, 'current_role'] + ) + + return df + + def _add_member_info( + self, + current_df: pd.DataFrame, + info_df: pd.DataFrame, + projects_df: pd.DataFrame + ) -> pd.DataFrame: + if current_df.empty: + return pd.DataFrame() + + current_with_role = current_df[["current_role"]] + merged = pd.merge(current_with_role, info_df, on='id', how='inner') + + merged["current_project_title"] = "" + for member_id in merged.index: + if member_id in projects_df.index: + member_projects = projects_df.loc[member_id] + if not member_projects.empty: + if isinstance(member_projects, pd.Series): + project_title = member_projects["project_title"] + else: + project_title = member_projects.iloc[0]["project_title"] + merged.loc[member_id, "current_project_title"] = project_title + + merged.fillna("", inplace=True) + + return merged + + def _add_alumni_info( + self, + alumni_df: pd.DataFrame, + info_df: pd.DataFrame + ) -> pd.DataFrame: + if alumni_df.empty: + return pd.DataFrame() + + alumni_with_role = alumni_df[["current_role"]] + merged = pd.merge(alumni_with_role, info_df, on='id', how='inner') + merged = merged[['current_role', 'full_name']] + + return merged + + def sort_by_role_hierarchy( + self, + df: pd.DataFrame, + role_hierarchy: Dict[str, int] + ) -> pd.DataFrame: + if df.empty: + return df + + df_copy = df.copy() + df_copy['rank'] = df_copy['current_role'].map(role_hierarchy) + + missing_roles = df_copy[df_copy['rank'].isna()]['current_role'].unique() + if len(missing_roles) > 0: + self.logger.warning( + f"Roles not in hierarchy (will be sorted last): " + f"{', '.join(missing_roles)}" + ) + df_copy['rank'].fillna(999, inplace=True) + + df_sorted = df_copy.sort_values(by='rank') + df_sorted.drop(columns='rank', inplace=True) + + return df_sorted + + def group_dataframe_by_id(self, df: pd.DataFrame) -> Dict[str, Dict[str, Any]]: + if df.empty: + return {} + + df_filled = df.fillna("") + grouped = ( + df_filled.groupby("id") + .apply(lambda x: x.to_dict(orient="records")) + .reset_index(name="info") + .set_index("id") + .to_dict(orient="index") + ) + + return grouped diff --git a/src/renderers/__init__.py b/src/renderers/__init__.py new file mode 100644 index 0000000..6894a22 --- /dev/null +++ b/src/renderers/__init__.py @@ -0,0 +1,39 @@ +from .base import BasePageRenderer +from .simple_pages import ( + HomepageRenderer, + ContactPageRenderer, + SupportPageRenderer, + JoinUsPageRenderer +) +from .member_pages import ( + CurrentMembersRenderer, + AlumniMembersRenderer, + IndividualMemberRenderer +) +from .article_pages import ( + ResearchFrontPageRenderer, + SubResearchFrontPageRenderer, + IndividualResearchPageRenderer, + NewsFrontPageRenderer, + IndividualNewsPageRenderer +) +from .gallery_page import GalleryPageRenderer +from .assets import AssetCopier + +__all__ = [ + 'BasePageRenderer', + 'HomepageRenderer', + 'ContactPageRenderer', + 'SupportPageRenderer', + 'JoinUsPageRenderer', + 'CurrentMembersRenderer', + 'AlumniMembersRenderer', + 'IndividualMemberRenderer', + 'ResearchFrontPageRenderer', + 'SubResearchFrontPageRenderer', + 'IndividualResearchPageRenderer', + 'NewsFrontPageRenderer', + 'IndividualNewsPageRenderer', + 'GalleryPageRenderer', + 'AssetCopier' +] diff --git a/src/renderers/article_pages.py b/src/renderers/article_pages.py new file mode 100644 index 0000000..5abf9d6 --- /dev/null +++ b/src/renderers/article_pages.py @@ -0,0 +1,100 @@ +import pandas as pd +from typing import Dict, Any + +from .base import BasePageRenderer +from src.config import SUB_RESEARCH_PATH +from src.utils.path_helpers import page_link + +class ResearchFrontPageRenderer(BasePageRenderer): + def render(self, general, research_content_df, research_data, info_dict) -> None: + self.logger.info("Rendering research front page") + + self.render_page( + "research.html.j2", + "Research.html", + general=general, + content=research_content_df, + research=research_data, + current_members=info_dict, + ) + +class SubResearchFrontPageRenderer(BasePageRenderer): + def render(self, general, research_data, research_content_df, info_dict) -> None: + self.logger.info("Rendering sub-research front pages") + + SUB_RESEARCH_PATH.mkdir(parents=True, exist_ok=True) + + categories = research_content_df.loc[ + research_content_df.category != "News", "category" + ].unique() + + for category in categories: + self.render_page( + "sub_research_frontpage.html.j2", + f"sub_research/{page_link(category.lower())}.html", + general=general, + research=research_data, + content=research_content_df, + category=category, + current_members=info_dict, + ) + + self.logger.info(f"Rendered {len(categories)} sub-research front pages") + +class IndividualResearchPageRenderer(BasePageRenderer): + def render(self, general, research_content_df, info_dict) -> None: + self.logger.info(f"Rendering {len(research_content_df)} individual research pages") + + for index, row in research_content_df.iterrows(): + category_path = page_link(row.category.lower()) + article_path = page_link(row.article_id.lower()) + + if row['category'] == "Software": + destination_path = f"sub_research/{article_path}.html" + else: + destination_path = f"sub_research/{category_path}/{article_path}.html" + + folder_path = SUB_RESEARCH_PATH / category_path + folder_path.mkdir(parents=True, exist_ok=True) + + self.render_page( + "research_page_no_twitter.html.j2", + destination_path, + general=general, + content=row, + member_data=info_dict, + article_id=row["article_id"], + ) + + self.logger.info(f"Rendered {len(research_content_df)} research pages") + +class NewsFrontPageRenderer(BasePageRenderer): + def render(self, general, news_content_df, info_dict) -> None: + self.logger.info("Rendering news front page") + + self.render_page( + "news.html.j2", + "News.html", + general=general, + content=news_content_df, + category="News", + member_data=info_dict, + ) + +class IndividualNewsPageRenderer(BasePageRenderer): + def render(self, general, news_content_df, info_dict) -> None: + self.logger.info(f"Rendering {len(news_content_df)} individual news pages") + + for index, row in news_content_df.iterrows(): + article_path = page_link(row.article_id.lower()) + + self.render_page( + "news_page_no_twitter.html.j2", + f"news/{article_path}.html", + general=general, + content=row, + member_data=info_dict, + category="News" + ) + + self.logger.info(f"Rendered {len(news_content_df)} news pages") diff --git a/src/renderers/assets.py b/src/renderers/assets.py new file mode 100644 index 0000000..9291c5f --- /dev/null +++ b/src/renderers/assets.py @@ -0,0 +1,27 @@ +import shutil +import logging +from pathlib import Path + +from src.config import BASE_DIR, HOSTING_PATH + +class AssetCopier: + def __init__(self, logger: logging.Logger = None): + self.logger = logger or logging.getLogger(__name__) + self.source_assets = BASE_DIR / "assets" + self.destination_assets = HOSTING_PATH / "assets" + + def copy_assets(self) -> None: + self.logger.info("Copying assets to hosting directory") + + if not self.source_assets.exists(): + raise FileNotFoundError( + f"Assets directory not found: {self.source_assets}. " + f"Expected at: {self.source_assets.absolute()}" + ) + + shutil.copytree( + self.source_assets, + self.destination_assets, + dirs_exist_ok=True + ) + self.logger.info(f"Assets copied to {self.destination_assets}") diff --git a/src/renderers/base.py b/src/renderers/base.py new file mode 100644 index 0000000..4aa3f48 --- /dev/null +++ b/src/renderers/base.py @@ -0,0 +1,56 @@ +import logging +from pathlib import Path +from jinja2 import Environment, FileSystemLoader +from abc import ABC, abstractmethod + +from src.config import TEMPLATE_DIR_PATH, HOSTING_PATH, TAG_COLORS +from src.utils.path_helpers import page_link, get_tag_color + +class BasePageRenderer(ABC): + def __init__(self, logger: logging.Logger = None): + self.logger = logger or logging.getLogger(__name__) + self.hosting_path = HOSTING_PATH + self.environment = self._setup_jinja_environment() + + def _setup_jinja_environment(self) -> Environment: + env = Environment( + loader=FileSystemLoader(TEMPLATE_DIR_PATH), + extensions=["jinja2.ext.loopcontrols", "jinja2.ext.do"] + ) + + env.globals["page_link"] = page_link + env.globals['tag_colors'] = TAG_COLORS + env.globals['get_tag_color'] = lambda tag: get_tag_color(tag, TAG_COLORS) + + return env + + def render_page( + self, + template_name: str, + output_path: str, + **context + ) -> None: + template = self.environment.get_template(template_name) + + template_level = output_path.count("/") + + full_output_path = self.hosting_path / output_path + full_output_path.parent.mkdir(parents=True, exist_ok=True) + + content = template.render(TEMPLATE_LEVEL=template_level, **context) + + try: + with open(full_output_path, mode="w", encoding="utf-8") as f: + f.write(content) + + self.logger.debug(f"Rendered: {output_path}") + except IOError as e: + raise IOError( + f"Failed to write output file '{full_output_path}'. " + f"Check disk space and permissions. " + f"Error: {e}" + ) from e + + @abstractmethod + def render(self, **kwargs) -> None: + pass diff --git a/src/renderers/gallery_page.py b/src/renderers/gallery_page.py new file mode 100644 index 0000000..9b08dc4 --- /dev/null +++ b/src/renderers/gallery_page.py @@ -0,0 +1,68 @@ +import shutil +import pandas as pd +from pathlib import Path +from PIL import Image +from typing import List, Dict, Any + +from .base import BasePageRenderer +from src.config import HOSTING_PATH, GALLERY_CONTENT_SOURCE + +class GalleryPageRenderer(BasePageRenderer): + def render(self, general, info_dict, gallery_events: List[tuple]) -> None: + self.logger.info("Rendering gallery page") + + processed_events = [] + + for event_file, event_data in gallery_events: + processed_event = self._process_gallery_event(event_file, event_data) + processed_events.append(processed_event) + + if "date" in processed_events[0] if processed_events else {}: + for event in processed_events: + event["date"] = pd.to_datetime(event["date"]) + + self.render_page( + "gallery.html.j2", + "Gallery.html", + general=general, + member_data=info_dict, + events=processed_events + ) + + self.logger.info(f"Rendered gallery page with {len(processed_events)} events") + + def _process_gallery_event(self, event_file: Path, event_data: Dict[str, Any]) -> Dict[str, Any]: + event_id = event_data.get("event_id", "unknown_event") + + dest_image_dir = ( + HOSTING_PATH / "website_files" / "images" / + "gallery" / event_id / "media" / "images" + ) + dest_image_dir.mkdir(parents=True, exist_ok=True) + + source_image_dir = event_file.parent / "media" / "images" + if source_image_dir.exists(): + shutil.copytree(source_image_dir, dest_image_dir, dirs_exist_ok=True) + else: + self.logger.warning( + f"No images directory found for event {event_id} at {source_image_dir}" + ) + + for image in event_data.get("images", []): + image_path = GALLERY_CONTENT_SOURCE / event_id / image["image_path"] + + if not image_path.exists(): + self.logger.warning( + f"Image not found: {image_path} for event {event_id}" + ) + continue + + with Image.open(image_path) as img: + width, height = img.size + new_width = int(width * 0.7) + new_height = int(height * 0.7) + + image["scaled_width"] = new_width + image["scaled_height"] = new_height + + return event_data diff --git a/src/renderers/member_pages.py b/src/renderers/member_pages.py new file mode 100644 index 0000000..54c922d --- /dev/null +++ b/src/renderers/member_pages.py @@ -0,0 +1,68 @@ +import pandas as pd +from typing import Dict, Any + +from .base import BasePageRenderer +from src.config import INDIVIDUAL_MEMBER_SECTION_MAP + +class CurrentMembersRenderer(BasePageRenderer): + def render(self, general, current_members, social_links) -> None: + self.logger.info("Rendering current members page") + + self.render_page( + "current_members.html.j2", + "current_members.html", + general=general, + current_members=current_members, + socials=social_links.to_dict("index") if not social_links.empty else {} + ) + +class AlumniMembersRenderer(BasePageRenderer): + def render(self, general, alumni_members) -> None: + self.logger.info("Rendering alumni members page") + + self.render_page( + "alumni_members.html.j2", + "alumni_members.html", + general=general, + alumni_members=alumni_members, + ) + +class IndividualMemberRenderer(BasePageRenderer): + def render( + self, + general, + info_df: pd.DataFrame, + social_links_df: pd.DataFrame, + documents_df: pd.DataFrame, + education_grouped: Dict[str, Any], + experience_grouped: Dict[str, Any], + projects_grouped: Dict[str, Any], + awards_grouped: Dict[str, Any], + outreach_grouped: Dict[str, Any], + article_content_df: pd.DataFrame + ) -> None: + self.logger.info(f"Rendering {len(info_df)} individual member pages") + + socials_dict = social_links_df.to_dict("index") if not social_links_df.empty else {} + documents_dict = documents_df.to_dict("index") if not documents_df.empty else {} + content_dict = article_content_df.to_dict("index") if not article_content_df.empty else {} + + for person_id, person_data in info_df.iterrows(): + self.render_page( + "individual_person.html.j2", + f"members/{person_id}/{person_id}.html", + general=general, + member_id=person_id, + member_data=person_data, + socials=socials_dict, + documents=documents_dict, + education=education_grouped, + experience=experience_grouped, + projects=projects_grouped, + awards=awards_grouped, + outreach=outreach_grouped, + section_headings=INDIVIDUAL_MEMBER_SECTION_MAP, + content=content_dict, + ) + + self.logger.info(f"Rendered {len(info_df)} member pages") diff --git a/src/renderers/simple_pages.py b/src/renderers/simple_pages.py new file mode 100644 index 0000000..18f9f4c --- /dev/null +++ b/src/renderers/simple_pages.py @@ -0,0 +1,46 @@ +from .base import BasePageRenderer + +class HomepageRenderer(BasePageRenderer): + def render(self, general, homepage, recent_content) -> None: + self.logger.info("Rendering homepage") + + self.render_page( + "homepage.html.j2", + "index.html", + general=general, + homepage=homepage, + recent_content=recent_content.to_dict(orient="records"), + ) + +class ContactPageRenderer(BasePageRenderer): + def render(self, general, contact) -> None: + self.logger.info("Rendering contact page") + + self.render_page( + "contact.html.j2", + "Contact.html", + general=general, + contact=contact + ) + +class SupportPageRenderer(BasePageRenderer): + def render(self, general, support) -> None: + self.logger.info("Rendering support page") + + self.render_page( + "support.html.j2", + "Support.html", + general=general, + support=support + ) + +class JoinUsPageRenderer(BasePageRenderer): + def render(self, general, opportunities) -> None: + self.logger.info("Rendering join us page") + + self.render_page( + "join_us.html.j2", + "Join_Us.html", + general=general, + opportunities=opportunities + ) diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/utils/logging_config.py b/src/utils/logging_config.py new file mode 100644 index 0000000..51dcd5b --- /dev/null +++ b/src/utils/logging_config.py @@ -0,0 +1,32 @@ +import logging +import sys +from pathlib import Path +from datetime import datetime + +def setup_logging(log_level: str = "INFO") -> logging.Logger: + log_dir = Path(__file__).parent.parent.parent / "logs" + log_dir.mkdir(exist_ok=True) + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + log_file = log_dir / f"site_generation_{timestamp}.log" + + logger = logging.getLogger("website_generator") + logger.setLevel(getattr(logging, log_level.upper())) + + file_handler = logging.FileHandler(log_file) + file_handler.setLevel(logging.DEBUG) + + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(logging.INFO) + + formatter = logging.Formatter( + '%(asctime)s | %(levelname)-8s | %(name)-20s | %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + file_handler.setFormatter(formatter) + console_handler.setFormatter(formatter) + + logger.addHandler(file_handler) + logger.addHandler(console_handler) + + return logger diff --git a/src/utils/path_helpers.py b/src/utils/path_helpers.py new file mode 100644 index 0000000..9919e6b --- /dev/null +++ b/src/utils/path_helpers.py @@ -0,0 +1,29 @@ +from pathlib import Path + +def page_link(text: str) -> str: + return text.replace(" ", "_") if " " in text else text + +def get_tag_color(tag: str, tag_colors: dict) -> str: + return tag_colors.get(tag.lower(), '#6c757d') + +def set_new_image_path(source_dir: Path, old_image_path: Path, destination_dir: Path) -> str: + article_image_path = source_dir.parent / "media" / "images" + image_source = article_image_path / old_image_path.name + image_destination = destination_dir / old_image_path.name + + if not image_source.exists(): + raise FileNotFoundError( + f"Source image not found: {image_source}. " + f"Expected in {article_image_path}. " + f"Original path reference: {old_image_path}" + ) + + image_destination.parent.mkdir(parents=True, exist_ok=True) + + import shutil + shutil.copy2(image_source, image_destination) + + website_files_index = image_destination.parts.index("website_files") + new_image_path = Path(*image_destination.parts[website_files_index:]) + + return str(new_image_path)