diff --git a/.github/workflows/update_website.yml b/.github/workflows/update_website.yml index f0aed89..6aed176 100644 --- a/.github/workflows/update_website.yml +++ b/.github/workflows/update_website.yml @@ -55,6 +55,10 @@ jobs: - name: Run Notebooks run: | + jupyter nbconvert --to python members.ipynb + python3 members.py + jupyter nbconvert --to python articles.ipynb + python3 articles.py jupyter nbconvert --to python create_htmls.ipynb python3 create_htmls.py working-directory: ./groupwebsite_generator/notebooks diff --git a/notebooks/articles.ipynb b/notebooks/articles.ipynb new file mode 100644 index 0000000..08ede56 --- /dev/null +++ b/notebooks/articles.ipynb @@ -0,0 +1,1133 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "891e79e2-0a9e-4744-af34-0c53ec563a49", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "import shutil\n", + "import re\n", + "from datetime import datetime\n", + "\n", + "# Constants\n", + "GROUP_DATA_DIR = Path(\"../../group-data\")\n", + "ARTICLE_DIR_PATH = Path(\"../../research_news/articles\")\n", + "HOSTING_PATH = GROUP_DATA_DIR.parent / \"kerzendorf-lab.github.io\"\n", + "ARTICLE_IMAGE_DESTINATION_DIR = HOSTING_PATH / \"website_files\" / \"images\" / \"article_content\"\n", + "\n", + "DEFAULT_COVER_IMAGE_HEIGHT = \"330px\"\n", + "DEFAULT_COVER_IMAGE_WIDTH = \"520px\"" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "652c31c8-4cbf-4b2e-bf6f-e8a396c85c2f", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Utility function\n", + "def urlize_content(content_text, members_df, current_members_df):\n", + " \"\"\"Replace [member_id] with linked names\"\"\"\n", + " def replace_id(match):\n", + " id_to_fetch = match.group(1)\n", + " if id_to_fetch in members_df.index:\n", + " name = members_df.loc[id_to_fetch, 'full_name']\n", + " if id_to_fetch in current_members_df.index:\n", + " return f'{name}'\n", + " return name\n", + " return id_to_fetch.replace('_', ' ').title()\n", + "\n", + " return re.sub(r'\\[(\\w+)\\]', replace_id, content_text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "9bf8a6eb-93c8-4831-a71d-d817b6484731", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "class ArticleDataLoader:\n", + " def __init__(self, article_dir: Path, image_dest_dir: Path, members_df: pd.DataFrame, current_members_df: pd.DataFrame, platform_filter: str = \"kg\"):\n", + " self.article_dir = article_dir\n", + " self.image_dest_dir = image_dest_dir\n", + " self.members_df = members_df\n", + " self.current_members_df = current_members_df\n", + " self.platform_filter = platform_filter\n", + " self.category_replacements = {\"Overview\": \"Computational Metascience\"} if platform_filter == \"kg\" else {}\n", + "\n", + " def _copy_image(self, source_dir, image_path_str):\n", + " \"\"\"Copy image from article media to destination, return new path\"\"\"\n", + " # Skip URLs\n", + " if image_path_str.startswith(('http://', 'https://')):\n", + " return image_path_str\n", + "\n", + " image_name = Path(image_path_str).name\n", + " source = source_dir.parent / \"media\" / \"images\" / image_name\n", + " dest = self.image_dest_dir / image_name\n", + " dest.parent.mkdir(parents=True, exist_ok=True)\n", + " shutil.copy2(source, dest)\n", + " return f\"website_files/images/article_content/{image_name}\"\n", + "\n", + " def _process_images(self, article, source_dir):\n", + " \"\"\"Process all images in article (cover + content)\"\"\"\n", + " if article[\"cover_image\"]:\n", + " article[\"cover_image\"] = self._copy_image(source_dir, article[\"cover_image\"])\n", + "\n", + " for key, val in article[\"content\"].items():\n", + " if \"img\" in key and val:\n", + " article[\"content\"][key] = self._copy_image(source_dir, val)\n", + "\n", + " def split_news_research(self):\n", + " \"\"\"Split articles into news and research dataframes\"\"\"\n", + " is_news = (\n", + " (self.articles_df[\"category\"] == \"News\") |\n", + " self.articles_df[\"tags\"].apply(lambda x: \"news\" in x if isinstance(x, list) else False)\n", + " )\n", + "\n", + " self.news_df = self.articles_df[is_news].sort_values(\"date\", ascending=False)\n", + " self.research_df = self.articles_df[~is_news].sort_values([\"category\", \"date\"], ascending=[True, False])\n", + "\n", + " def load_all_articles(self):\n", + " \"\"\"Load articles filtered by platform and date\"\"\"\n", + " articles = []\n", + " today = datetime.now()\n", + "\n", + " for info_json in self.article_dir.rglob('info.json'):\n", + " article = json.loads(info_json.read_text())\n", + "\n", + " if self.platform_filter not in article[\"platforms\"]:\n", + " continue\n", + "\n", + " article_date = pd.to_datetime(article[\"date\"], format=\"%m-%d-%Y\")\n", + " if article_date > today:\n", + " continue\n", + "\n", + " article[\"date\"] = article_date\n", + " self._process_images(article, info_json)\n", + "\n", + " if article[\"category\"] == \"News\" or (\"news\" in article[\"tags\"]):\n", + " for key, val in article[\"content\"].items():\n", + " if \"para\" in key:\n", + " article[\"content\"][key] = urlize_content(val, self.members_df, self.current_members_df)\n", + "\n", + " articles.append(article)\n", + "\n", + " self.articles_df = pd.DataFrame(articles).set_index('article_id')\n", + " self.articles_df[\"cover_image_height\"] = self.articles_df[\"cover_image_height\"].fillna(DEFAULT_COVER_IMAGE_HEIGHT).replace(\"\", DEFAULT_COVER_IMAGE_HEIGHT)\n", + " self.articles_df[\"cover_image_width\"] = self.articles_df[\"cover_image_width\"].fillna(DEFAULT_COVER_IMAGE_WIDTH).replace(\"\", DEFAULT_COVER_IMAGE_WIDTH)\n", + " self.articles_df[\"category\"] = self.articles_df[\"category\"].replace(self.category_replacements)\n", + " self.articles_df['image_name'] = self.articles_df['cover_image'].apply(lambda x: Path(x).name)\n", + "\n", + " self.split_news_research()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "b2592170-940e-45e6-b940-166b7ccc30bb", + "metadata": {}, + "outputs": [], + "source": [ + "members_df = pd.read_csv(\"members.csv\", index_col=0)\n", + "current_members_df = pd.read_csv(\"current_members.csv\", index_col=0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d316e23c-bbb0-4b55-ac7e-baf2a03f02aa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
first_namelast_nameimage_pathcover_image_pathintroductionfull_namegithub_handlelinkedinemailnick_namewebsiteorcidtwitter_handlelinkedin_handleadsacademic_rolecurrent_project_title
id
gracie_tvrdikGracieTvrdikmedia/images/gracie.jpgmedia/images/cover.jpgI am an undergraduate student at Bowling Green...Gracie Tvrdikgracietvwww.linkedin.com/in/grayson-tvrdik-34b7872a7graysontvrdik1@gmail.comNaNNaNNaNNaNNaNNaNREU studentNaN
josh_shieldsJoshuaShieldsmedia/images/josh_photo.jpgmedia/images/cover.jpgJosh is a senior graduate student in astrophys...Josh ShieldsjvshieldsNaNshield90@msu.eduJoshhttps://jvshields.github.io/0000-0002-1560-5286NaNNaNNaNGraduate StudentNaN
anirban_duttaAnirbanDuttamedia/images/anirban_dutta.jpgmedia/images/cover.jpgHi there! This is Anirban.Anirban DuttaKnights-TemplarsNaNanirbaniamdutta@gmail.comNaNhttps://sites.google.com/view/anirbaniamdutta0000-0002-7708-3831Anirban29Duttaanirban-dutta-6a0377238NaNPostdoctoral ResearcherNaN
\n", + "
" + ], + "text/plain": [ + " first_name last_name image_path \\\n", + "id \n", + "gracie_tvrdik Gracie Tvrdik media/images/gracie.jpg \n", + "josh_shields Joshua Shields media/images/josh_photo.jpg \n", + "anirban_dutta Anirban Dutta media/images/anirban_dutta.jpg \n", + "\n", + " cover_image_path \\\n", + "id \n", + "gracie_tvrdik media/images/cover.jpg \n", + "josh_shields media/images/cover.jpg \n", + "anirban_dutta media/images/cover.jpg \n", + "\n", + " introduction \\\n", + "id \n", + "gracie_tvrdik I am an undergraduate student at Bowling Green... \n", + "josh_shields Josh is a senior graduate student in astrophys... \n", + "anirban_dutta Hi there! This is Anirban. \n", + "\n", + " full_name github_handle \\\n", + "id \n", + "gracie_tvrdik Gracie Tvrdik gracietv \n", + "josh_shields Josh Shields jvshields \n", + "anirban_dutta Anirban Dutta Knights-Templars \n", + "\n", + " linkedin \\\n", + "id \n", + "gracie_tvrdik www.linkedin.com/in/grayson-tvrdik-34b7872a7 \n", + "josh_shields NaN \n", + "anirban_dutta NaN \n", + "\n", + " email nick_name \\\n", + "id \n", + "gracie_tvrdik graysontvrdik1@gmail.com NaN \n", + "josh_shields shield90@msu.edu Josh \n", + "anirban_dutta anirbaniamdutta@gmail.com NaN \n", + "\n", + " website \\\n", + "id \n", + "gracie_tvrdik NaN \n", + "josh_shields https://jvshields.github.io/ \n", + "anirban_dutta https://sites.google.com/view/anirbaniamdutta \n", + "\n", + " orcid twitter_handle linkedin_handle \\\n", + "id \n", + "gracie_tvrdik NaN NaN NaN \n", + "josh_shields 0000-0002-1560-5286 NaN NaN \n", + "anirban_dutta 0000-0002-7708-3831 Anirban29Dutta anirban-dutta-6a0377238 \n", + "\n", + " ads academic_role current_project_title \n", + "id \n", + "gracie_tvrdik NaN REU student NaN \n", + "josh_shields NaN Graduate Student NaN \n", + "anirban_dutta NaN Postdoctoral Researcher NaN " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "members_df.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "3669510d-d4a8-4afe-b3e0-1689c53d2cfe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
current_rolefirst_namelast_nameimage_pathcover_image_pathintroductionfull_namegithub_handlelinkedinemailnick_namewebsiteorcidtwitter_handlelinkedin_handleadscurrent_project_title
wolfgang_kerzendorfProfessorWolfgangKerzendorfmedia/images/wolfgang.jpgmedia/images/cover.jpgI am an astrophysicist deeply intrigued by nuc...Wolfgang KerzendorfwkerzendorfNaNwkerzend@msu.eduNaNhttps://wolfgangkerzendorf.com0000-0002-0479-7235wkerzendorfwolfgang-kerzendorf-598a0466NaNSupernovae & Computational Metaresearch
connor_mcclellanPostdoctoral ResearcherConnorMcClellanmedia/images/profile.pngmedia/images/cover.jpgI joined the TARDIS group in 2025 as a post-do...Connor McClellanNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
jing_luPostdoctoral ResearcherJingLumedia/images/jing.jpgmedia/images/cover.jpgI will be joining TARDIS group in summer 2023 ...Jing LuDeerWhaleNaNlujingeve158@gmail.comNaNNaN0000-0002-3900-1452NaNjing-lu-bb89211bbNaNExplore the hidden Helium in Type Ic Supernovae
\n", + "
" + ], + "text/plain": [ + " current_role first_name last_name \\\n", + "wolfgang_kerzendorf Professor Wolfgang Kerzendorf \n", + "connor_mcclellan Postdoctoral Researcher Connor McClellan \n", + "jing_lu Postdoctoral Researcher Jing Lu \n", + "\n", + " image_path cover_image_path \\\n", + "wolfgang_kerzendorf media/images/wolfgang.jpg media/images/cover.jpg \n", + "connor_mcclellan media/images/profile.png media/images/cover.jpg \n", + "jing_lu media/images/jing.jpg media/images/cover.jpg \n", + "\n", + " introduction \\\n", + "wolfgang_kerzendorf I am an astrophysicist deeply intrigued by nuc... \n", + "connor_mcclellan I joined the TARDIS group in 2025 as a post-do... \n", + "jing_lu I will be joining TARDIS group in summer 2023 ... \n", + "\n", + " full_name github_handle linkedin \\\n", + "wolfgang_kerzendorf Wolfgang Kerzendorf wkerzendorf NaN \n", + "connor_mcclellan Connor McClellan NaN NaN \n", + "jing_lu Jing Lu DeerWhale NaN \n", + "\n", + " email nick_name \\\n", + "wolfgang_kerzendorf wkerzend@msu.edu NaN \n", + "connor_mcclellan NaN NaN \n", + "jing_lu lujingeve158@gmail.com NaN \n", + "\n", + " website orcid \\\n", + "wolfgang_kerzendorf https://wolfgangkerzendorf.com 0000-0002-0479-7235 \n", + "connor_mcclellan NaN NaN \n", + "jing_lu NaN 0000-0002-3900-1452 \n", + "\n", + " twitter_handle linkedin_handle ads \\\n", + "wolfgang_kerzendorf wkerzendorf wolfgang-kerzendorf-598a0466 NaN \n", + "connor_mcclellan NaN NaN NaN \n", + "jing_lu NaN jing-lu-bb89211bb NaN \n", + "\n", + " current_project_title \n", + "wolfgang_kerzendorf Supernovae & Computational Metaresearch \n", + "connor_mcclellan NaN \n", + "jing_lu Explore the hidden Helium in Type Ic Supernovae " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "current_members_df.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ffc002c3-a837-4d83-a455-cf468fa0611a", + "metadata": {}, + "outputs": [], + "source": [ + "article_loader = ArticleDataLoader(\n", + " ARTICLE_DIR_PATH,\n", + " ARTICLE_IMAGE_DESTINATION_DIR,\n", + " members_df,\n", + " current_members_df\n", + ")\n", + "article_loader.load_all_articles()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "13ce104f-a74d-4d2c-bb3a-3a057c877403", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titleauthor_iddisplaydatecategorytagsplatformsshort_descriptioncover_imagecontentpeople_involved_idslinkstwittercover_image_heightcover_image_widthresearch_idimage_name
article_id
geonintern_international_benUnveiling Earth's Secrets with AI: Our Undergr...benjamin_mellonTrue2024-03-18News[undergraduate, internship][kg, dti]This upcoming August, Benjamin Mellon and fell...website_files/images/article_content/2BCAFnorw...{'1_para': 'This upcoming August, Benjamin Mel...[benjamin_mellon]{}None330px520pxNaN2BCAFnorway_geo.jpg
reu_student_announcementSummer REU Students Join Kerzendorf Grouprichard_dowTrue2023-06-23News[New Team Member, undergraduate][kg]Two undergraduate research assistants have joi...website_files/images/article_content/nsflogo.jpg{'1_para': 'Tripp Dow and Iliomar Rodriguez Ra...[richard_dow, iliomar_rodriguez_ramos]{}None330px520pxNaNnsflogo.jpg
prur_conferencePeer Review Under Review - Workshop at Europea...vicente_amadoTrue2023-02-12News[Metascience, Conference][dti, kg]DeepThought Initiative and collaborators organ...website_files/images/article_content/img_PRUR.png{'1_para': 'Wolfgang Kerzendorf and collaborat...[vicente_amado, wolfgang_kerzendorf]{'NASA ADS': 'https://ui.adsabs.harvard.edu/ab...None330px520pxNaNimg_PRUR.png
\n", + "
" + ], + "text/plain": [ + " title \\\n", + "article_id \n", + "geonintern_international_ben Unveiling Earth's Secrets with AI: Our Undergr... \n", + "reu_student_announcement Summer REU Students Join Kerzendorf Group \n", + "prur_conference Peer Review Under Review - Workshop at Europea... \n", + "\n", + " author_id display date category \\\n", + "article_id \n", + "geonintern_international_ben benjamin_mellon True 2024-03-18 News \n", + "reu_student_announcement richard_dow True 2023-06-23 News \n", + "prur_conference vicente_amado True 2023-02-12 News \n", + "\n", + " tags platforms \\\n", + "article_id \n", + "geonintern_international_ben [undergraduate, internship] [kg, dti] \n", + "reu_student_announcement [New Team Member, undergraduate] [kg] \n", + "prur_conference [Metascience, Conference] [dti, kg] \n", + "\n", + " short_description \\\n", + "article_id \n", + "geonintern_international_ben This upcoming August, Benjamin Mellon and fell... \n", + "reu_student_announcement Two undergraduate research assistants have joi... \n", + "prur_conference DeepThought Initiative and collaborators organ... \n", + "\n", + " cover_image \\\n", + "article_id \n", + "geonintern_international_ben website_files/images/article_content/2BCAFnorw... \n", + "reu_student_announcement website_files/images/article_content/nsflogo.jpg \n", + "prur_conference website_files/images/article_content/img_PRUR.png \n", + "\n", + " content \\\n", + "article_id \n", + "geonintern_international_ben {'1_para': 'This upcoming August, Benjamin Mel... \n", + "reu_student_announcement {'1_para': 'Tripp Dow and Iliomar Rodriguez Ra... \n", + "prur_conference {'1_para': 'Wolfgang Kerzendorf and collaborat... \n", + "\n", + " people_involved_ids \\\n", + "article_id \n", + "geonintern_international_ben [benjamin_mellon] \n", + "reu_student_announcement [richard_dow, iliomar_rodriguez_ramos] \n", + "prur_conference [vicente_amado, wolfgang_kerzendorf] \n", + "\n", + " links \\\n", + "article_id \n", + "geonintern_international_ben {} \n", + "reu_student_announcement {} \n", + "prur_conference {'NASA ADS': 'https://ui.adsabs.harvard.edu/ab... \n", + "\n", + " twitter cover_image_height cover_image_width \\\n", + "article_id \n", + "geonintern_international_ben None 330px 520px \n", + "reu_student_announcement None 330px 520px \n", + "prur_conference None 330px 520px \n", + "\n", + " research_id image_name \n", + "article_id \n", + "geonintern_international_ben NaN 2BCAFnorway_geo.jpg \n", + "reu_student_announcement NaN nsflogo.jpg \n", + "prur_conference NaN img_PRUR.png " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "article_loader.articles_df.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "5b1f488d-f94c-418c-a31c-7d6d53091cfd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titleauthor_iddisplaydatecategorytagsplatformsshort_descriptioncover_imagecontentpeople_involved_idslinkstwittercover_image_heightcover_image_widthresearch_idimage_name
article_id
stardis_releaseIntroducing STARDIS - An Open and Modular Stel...josh_shieldsTrue2025-08-28News[paper, Astrophysics][kg, tardis]We introduce STARDIS, a new open-source Python...website_files/images/article_content/halpha_so...{'1_para': 'We are excited to announce the rel...[josh_shields, wolfgang_kerzendorf, ryan_grone...{'manuscript': 'https://iopscience.iop.org/art...330px520pxNaNhalpha_sol.png
tardis_summer_school_25TARDIS Summer School 2025: Explosive Transient...josh_shieldsTrue2025-08-15News[Education, Summer School, TARDIS, Radiative T...[kg, tardis]We hosted a week-long summer school where 14 p...website_files/images/article_content/cachedIma...{'1_para': 'We successfully hosted the TARDIS ...[josh_shields, wolfgang_kerzendorf, jing_lu, a...{}330px520pxNaNcachedImage.PNG
thesis_defense_deekshaDeeksha Mohanty Defends Master's Thesis on Enh...deeksha_mohantyTrue2025-07-07News[Master's Thesis, talk][kg, tardis]Deeksha Mohanty successfully defended her mast...website_files/images/article_content/defense_b...{'1_para': 'We congratulate Deeksha Mohanty on...[deeksha_mohanty]{}None390px520pxNaNdefense_before.jpeg
\n", + "
" + ], + "text/plain": [ + " title \\\n", + "article_id \n", + "stardis_release Introducing STARDIS - An Open and Modular Stel... \n", + "tardis_summer_school_25 TARDIS Summer School 2025: Explosive Transient... \n", + "thesis_defense_deeksha Deeksha Mohanty Defends Master's Thesis on Enh... \n", + "\n", + " author_id display date category \\\n", + "article_id \n", + "stardis_release josh_shields True 2025-08-28 News \n", + "tardis_summer_school_25 josh_shields True 2025-08-15 News \n", + "thesis_defense_deeksha deeksha_mohanty True 2025-07-07 News \n", + "\n", + " tags \\\n", + "article_id \n", + "stardis_release [paper, Astrophysics] \n", + "tardis_summer_school_25 [Education, Summer School, TARDIS, Radiative T... \n", + "thesis_defense_deeksha [Master's Thesis, talk] \n", + "\n", + " platforms \\\n", + "article_id \n", + "stardis_release [kg, tardis] \n", + "tardis_summer_school_25 [kg, tardis] \n", + "thesis_defense_deeksha [kg, tardis] \n", + "\n", + " short_description \\\n", + "article_id \n", + "stardis_release We introduce STARDIS, a new open-source Python... \n", + "tardis_summer_school_25 We hosted a week-long summer school where 14 p... \n", + "thesis_defense_deeksha Deeksha Mohanty successfully defended her mast... \n", + "\n", + " cover_image \\\n", + "article_id \n", + "stardis_release website_files/images/article_content/halpha_so... \n", + "tardis_summer_school_25 website_files/images/article_content/cachedIma... \n", + "thesis_defense_deeksha website_files/images/article_content/defense_b... \n", + "\n", + " content \\\n", + "article_id \n", + "stardis_release {'1_para': 'We are excited to announce the rel... \n", + "tardis_summer_school_25 {'1_para': 'We successfully hosted the TARDIS ... \n", + "thesis_defense_deeksha {'1_para': 'We congratulate Deeksha Mohanty on... \n", + "\n", + " people_involved_ids \\\n", + "article_id \n", + "stardis_release [josh_shields, wolfgang_kerzendorf, ryan_grone... \n", + "tardis_summer_school_25 [josh_shields, wolfgang_kerzendorf, jing_lu, a... \n", + "thesis_defense_deeksha [deeksha_mohanty] \n", + "\n", + " links \\\n", + "article_id \n", + "stardis_release {'manuscript': 'https://iopscience.iop.org/art... \n", + "tardis_summer_school_25 {} \n", + "thesis_defense_deeksha {} \n", + "\n", + " twitter cover_image_height cover_image_width \\\n", + "article_id \n", + "stardis_release 330px 520px \n", + "tardis_summer_school_25 330px 520px \n", + "thesis_defense_deeksha None 390px 520px \n", + "\n", + " research_id image_name \n", + "article_id \n", + "stardis_release NaN halpha_sol.png \n", + "tardis_summer_school_25 NaN cachedImage.PNG \n", + "thesis_defense_deeksha NaN defense_before.jpeg " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "article_loader.news_df.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "08700e38-47ff-49d6-9354-34d9d1146644", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titleauthor_iddisplaydatecategorytagsplatformsshort_descriptioncover_imagecontentpeople_involved_idslinkstwittercover_image_heightcover_image_widthresearch_idimage_name
article_id
midsure22_poster_beaMIDSURE 2022bea_luTrue2022-07-22Computational Metascience[research][kg, dti]Poster presentation at the Mid-Michigan Sympos...website_files/images/article_content/bea_midsu...{'1_para': 'Abstract: Interdisciplinary scient...[bea_lu, vicente_amado, wolfgang_kerzendorf]{}None330px520pxNaNbea_midsure_poster.jpg
uuraf21_poster_vicenteMSU UURAF 2021vicente_amadoTrue2021-04-19Computational Metascience[research][kg, dti]Poster presentation for MSU's University Under...website_files/images/article_content/MAST_Post...{'1_para': 'Abstract: The modern scientific co...[vicente_amado, wolfgang_kerzendorf, jack_o_br...{}None330px520pxNaNMAST_Poster.jpg
\n", + "
" + ], + "text/plain": [ + " title author_id display date \\\n", + "article_id \n", + "midsure22_poster_bea MIDSURE 2022 bea_lu True 2022-07-22 \n", + "uuraf21_poster_vicente MSU UURAF 2021 vicente_amado True 2021-04-19 \n", + "\n", + " category tags platforms \\\n", + "article_id \n", + "midsure22_poster_bea Computational Metascience [research] [kg, dti] \n", + "uuraf21_poster_vicente Computational Metascience [research] [kg, dti] \n", + "\n", + " short_description \\\n", + "article_id \n", + "midsure22_poster_bea Poster presentation at the Mid-Michigan Sympos... \n", + "uuraf21_poster_vicente Poster presentation for MSU's University Under... \n", + "\n", + " cover_image \\\n", + "article_id \n", + "midsure22_poster_bea website_files/images/article_content/bea_midsu... \n", + "uuraf21_poster_vicente website_files/images/article_content/MAST_Post... \n", + "\n", + " content \\\n", + "article_id \n", + "midsure22_poster_bea {'1_para': 'Abstract: Interdisciplinary scient... \n", + "uuraf21_poster_vicente {'1_para': 'Abstract: The modern scientific co... \n", + "\n", + " people_involved_ids \\\n", + "article_id \n", + "midsure22_poster_bea [bea_lu, vicente_amado, wolfgang_kerzendorf] \n", + "uuraf21_poster_vicente [vicente_amado, wolfgang_kerzendorf, jack_o_br... \n", + "\n", + " links twitter cover_image_height cover_image_width \\\n", + "article_id \n", + "midsure22_poster_bea {} None 330px 520px \n", + "uuraf21_poster_vicente {} None 330px 520px \n", + "\n", + " research_id image_name \n", + "article_id \n", + "midsure22_poster_bea NaN bea_midsure_poster.jpg \n", + "uuraf21_poster_vicente NaN MAST_Poster.jpg " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "article_loader.research_df" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "6c80e21f-b016-46ce-9242-453e90ad192a", + "metadata": {}, + "outputs": [], + "source": [ + "article_loader.articles_df.to_csv(\"articles.csv\")\n", + "article_loader.news_df.to_csv(\"news.csv\")\n", + "article_loader.research_df.to_csv(\"research.csv\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29c2640e-2024-4ac8-8251-ba66450f4359", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/create_htmls.ipynb b/notebooks/create_htmls.ipynb index 355848f..3775210 100644 --- a/notebooks/create_htmls.ipynb +++ b/notebooks/create_htmls.ipynb @@ -1,1500 +1,480 @@ { "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, - "source": [ - "### This notebook consist of code for creating the html files for the website each time data is updated." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Set-up" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Importing classes" - ] - }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T07:07:36.145701234Z", - "start_time": "2023-11-21T07:07:35.993568325Z" - } - }, + "execution_count": 1, + "id": "788cfd21-41e7-4af6-9c39-61bc422e98d5", + "metadata": {}, "outputs": [], "source": [ + "import ast\n", "import json\n", "import pandas as pd\n", - "from jinja2 import Environment, FileSystemLoader\n", "from pathlib import Path\n", "import shutil\n", - "from datetime import datetime, date\n", - "import numpy as np\n", - "from PIL import Image" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Defining paths" + "from PIL import Image\n", + "from jinja2 import Environment, FileSystemLoader\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T07:07:36.276371007Z", - "start_time": "2023-11-21T07:07:36.010519700Z" - } - }, + "execution_count": 2, + "id": "25047e15-0a70-4d44-921a-edbda7e3b938", + "metadata": {}, "outputs": [], "source": [ + "# Constants\n", + "CSV_DIR_PATH = Path(\".\")\n", "GROUP_DATA_DIR = Path(\"../../group-data\")\n", + "HOSTING_PATH = GROUP_DATA_DIR.parent / \"kerzendorf-lab.github.io\"\n", "TEMPLATE_DIR_PATH = GROUP_DATA_DIR.parent / \"groupwebsite_generator\" / \"templates\"\n", "WEBSITE_DATA_PATH = GROUP_DATA_DIR / \"website_data/\"\n", - "HOSTING_PATH = GROUP_DATA_DIR.parent / \"kerzendorf-lab.github.io\"\n", - "ARTICLE_DIR_PATH = Path(\"../../research_news/articles\")\n", - "ARTICLE_IMAGE_DESTINATION_DIR = (HOSTING_PATH / \"website_files\" / \"images\" / \"article_content\")\n", - "MEMBERS_DIR_PATH = GROUP_DATA_DIR / \"members/\"\n", + "GALLERY_CONTENT_SOURCE = WEBSITE_DATA_PATH / \"content\" / \"gallery\"\n", + "SOURCE_ASSETS = GROUP_DATA_DIR.parent / \"groupwebsite_generator\" / \"assets\"\n", "SUB_RESEARCH_PATH = HOSTING_PATH / \"sub_research\"\n", "OPPORTUNITIES_PATH = WEBSITE_DATA_PATH / \"content\" / \"opportunities.json\"\n", - "ROLE_HIERARCHY_PATH = WEBSITE_DATA_PATH / \"role_hierarchy.json\"\n", - "\n", - "GENERAL_TAGS = [\n", - " \"Paper\",\n", - " \"Poster\", \n", - " \"Talk\",\n", - " \"Award\",\n", - " \"New Team Member\",\n", - " \"PhD\",\n", - " \"Conference\",\n", - " \"Undergraduate\",\n", - " \"Event\",\n", - " \"Achievement\"\n", - "]\n", "\n", - "# Define tag colors mapping\n", "TAG_COLORS = {\n", - " 'paper': '#FF6B6B', # Coral red\n", - " 'poster': '#4ECDC4', # Turquoise\n", - " 'talk': '#45B7D1', # Light blue\n", - " 'award': '#96CEB4', # Sage green\n", - " 'new team member': '#FFBE0B', # Golden yellow\n", - " 'phd': '#9B5DE5', # Purple\n", - " 'conference': '#FF006E', # Pink\n", - " 'undergraduate': '#8338EC', # Violet\n", - " 'event': '#3A86FF', # Royal blue\n", - " 'achievement': '#FB5607', # Orange\n", - " 'astrophysics': '#2EC4B6', # Teal\n", - " 'machine learning': '#FF9F1C', # Light orange\n", - " 'software': '#E71D36', # Bright red\n", - " 'research': '#011627', # Dark blue\n", - " 'news': '#41EAD4' # Cyan\n", + " 'paper': '#FF6B6B',\n", + " 'poster': '#4ECDC4',\n", + " 'talk': '#45B7D1',\n", + " 'award': '#96CEB4',\n", + " 'new team member': '#FFBE0B',\n", + " 'phd': '#9B5DE5',\n", + " 'conference': '#FF006E',\n", + " 'undergraduate': '#8338EC',\n", + " 'event': '#3A86FF',\n", + " 'achievement': '#FB5607',\n", + " 'astrophysics': '#2EC4B6',\n", + " 'machine learning': '#FF9F1C',\n", + " 'software': '#E71D36',\n", + " 'research': '#011627',\n", + " 'news': '#41EAD4'\n", "}" ] }, { "cell_type": "markdown", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, - "source": [ - "Setting up jinja environment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T07:07:36.311056453Z", - "start_time": "2023-11-21T07:07:36.038221785Z" - } - }, - "outputs": [], + "id": "4f3ee0ad-abd4-4603-a275-2ffad7469a12", + "metadata": {}, "source": [ - "# Function to create proper HTML file names by replacing spaces with underscores\n", - "def page_link(a):\n", - " \"\"\"Return the HTML file name after replacing blank spaces(\" \") with underscores(\"-\")\"\"\"\n", - " return a.replace(\" \", \"_\") if \" \" in a else a\n", - "\n", - "# Function to get tag color, returns a default if tag not in mapping\n", - "def get_tag_color(tag):\n", - " \"\"\"Get color for a specific tag, with fallback to default\"\"\"\n", - " tag = tag.lower()\n", - " return TAG_COLORS.get(tag, '#6c757d') # Default gray if tag not found\n", - "\n" + "# Setup Jinja2 environment" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, + "id": "83b7c7e4-fd73-46eb-ab6e-70b179e2dd66", "metadata": {}, "outputs": [], "source": [ + "# Setup Jinja2 environment\n", "environment = Environment(\n", " loader=FileSystemLoader(TEMPLATE_DIR_PATH), extensions=[\"jinja2.ext.loopcontrols\", \"jinja2.ext.do\"]\n", ")\n", - "environment.globals[\"page_link\"] = page_link\n", - "# Add tag colors to jinja environment globals\n", - "environment.globals['tag_colors'] = TAG_COLORS\n", - "environment.globals['get_tag_color'] = get_tag_color" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Data Processing Setup" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Data Processing Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T07:07:36.308005773Z", - "start_time": "2023-11-21T07:07:36.024555660Z" - } - }, - "outputs": [], - "source": [ - "# Needed columns for articles\n", - "ARTICLE_METADATA_FIELDS = [\n", - " \"article_id\",\n", - " \"category\",\n", - " \"date\",\n", - " \"tags\",\n", - " \"title\",\n", - " \"cover_image\",\n", - " \"short_description\"\n", - "]\n", - "# Groups and institution used in filtering data\n", - "GROUP_FILTER = [\"DTI\", \"TARDIS\", \"ICER\", \"kerzendorf\"]\n", - "INSTITUTION_FILTER = \"Michigan State University\"\n", "\n", - "# Map roles to standardized roles for consistency\n", - "ROLE_MAP = {\n", - " \"Assistant Professor\": \"Professor\",\n", - " \"Professorial Assistant\": \"Undergraduate Student\",\n", - " \"Visiting Researcher\": \"Postdoctoral Researcher\"\n", - "}\n", + "# Helper Functions\n", + "def page_link(a):\n", + " \"\"\"Return the HTML file name after replacing blank spaces with underscores\"\"\"\n", + " return a.replace(\" \", \"_\") if \" \" in a else a\n", + "def get_tag_color(tag):\n", + " \"\"\"Get color for a specific tag, with fallback to default\"\"\"\n", + " return TAG_COLORS.get(tag.lower(), '#6c757d')\n", "\n", - "# Map degrees to standardized academic levels\n", - "DEGREE_MAP = {\n", - " \"Masters\": \"Graduate Student\",\n", - " \"PhD\": \"Postdoctorate\", # if end_date is present\n", - " \"Bachelors\": \"Undergraduate Student\",\n", - "}\n", + "environment.globals[\"page_link\"] = page_link\n", + "environment.globals['tag_colors'] = TAG_COLORS\n", + "environment.globals['get_tag_color'] = get_tag_color\n", "\n", - "INDIVIDUAL_MEMBER_SECTION_MAP = {\n", - " \"education\": \"Education\",\n", - " \"experiences\": \"Experience\",\n", - " \"projects\": \"Projects\",\n", - " \"awards\": \"Awards & Recognition\",\n", - " \"outreach\": \"Outreach Programs\",\n", - "}" + "def create_page(template, html, **kwargs):\n", + " \"\"\"Create an HTML page using a Jinja2 template and save it to a specified path\"\"\"\n", + " page_template = environment.get_template(template)\n", + " template_level = html.count(\"/\")\n", + " page_html_path = HOSTING_PATH / html\n", + " page_html_path.parent.mkdir(parents=True, exist_ok=True)\n", + " page_content = page_template.render(TEMPLATE_LEVEL=template_level, **kwargs)\n", + " with open(page_html_path, mode=\"w\", encoding=\"utf-8\") as page:\n", + " page.write(page_content)" ] }, { "cell_type": "markdown", + "id": "d2784b7b-9456-4fd3-92a4-baf46a5a09d5", "metadata": {}, "source": [ - "# Functions for Data Handling" + "# Read Data" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, + "id": "00852806-a096-462c-be87-2ad74af2869e", "metadata": {}, "outputs": [], "source": [ - "def loading_website_data(file_to_load):\n", - " \"\"\"\n", - " Load data from JSON files specified in a list of file names.\n", - "\n", - " Parameters:\n", - " ----------\n", - " json_data_list : list of str\n", - " A list of file names (without extension) to load as JSON.\n", + "# Load member and article data from CSVs\n", + "members_df = pd.read_csv(CSV_DIR_PATH / \"members.csv\", index_col=0)\n", + "education_df = pd.read_csv(CSV_DIR_PATH / \"education.csv\", index_col=0)\n", + "experiences_df = pd.read_csv(CSV_DIR_PATH / \"experiences.csv\", index_col=0)\n", + "projects_df = pd.read_csv(CSV_DIR_PATH / \"projects.csv\", index_col=0)\n", + "awards_df = pd.read_csv(CSV_DIR_PATH / \"awards.csv\", index_col=0)\n", + "outreach_df = pd.read_csv(CSV_DIR_PATH / \"outreach.csv\", index_col=0)\n", + "documents_df = pd.read_csv(CSV_DIR_PATH / \"documents.csv\", index_col=0)\n", "\n", - " Returns:\n", - " -------\n", - " dict\n", - " A dictionary where keys are file names and values are the corresponding JSON data.\n", - "\n", - " Raises:\n", - " ------\n", - " FileNotFoundError:\n", - " If a specified file does not exist.\n", - " json.JSONDecodeError:\n", - " If there's an issue decoding the JSON content from a file.\n", - "\n", - " \"\"\"\n", - " loaded_data = {}\n", - " file_matches = WEBSITE_DATA_PATH/ f\"{file_to_load}.json\"\n", - " if file_matches:\n", - " try:\n", - " with open(file_matches, \"r\") as json_file:\n", - " loaded_data = json.load(json_file)\n", - " except json.JSONDecodeError:\n", - " print(f\"Error decoding JSON in '{file_matches}'.\")\n", - " else:\n", - " print(f\"File '{file_to_load}.json' not found.\")\n", - "\n", - " return loaded_data" + "current_members_with_info = pd.read_csv(CSV_DIR_PATH / \"current_members.csv\", index_col=0)\n", + "# Replace NaN with empty string for current_project_title to avoid displaying \"nan\"\n", + "current_members_with_info['current_project_title'] = current_members_with_info['current_project_title'].fillna('')\n", + "alumni_members_with_info = pd.read_csv(CSV_DIR_PATH / \"alumni_members.csv\", index_col=0)\n", + "articles_df = pd.read_csv(CSV_DIR_PATH / \"articles.csv\", index_col=0, parse_dates=['date'])\n", + "news_df = pd.read_csv(CSV_DIR_PATH / \"news.csv\", index_col=0, parse_dates=['date'])\n", + "research_df = pd.read_csv(CSV_DIR_PATH / \"research.csv\", index_col=0, parse_dates=['date'])\n", + "\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, + "id": "8489c0c4-db14-4437-926e-a038f3dc3db7", "metadata": {}, "outputs": [], "source": [ - "def read_member_data_jsons(file_to_read):\n", - " member_data_list = []\n", - " member_data_df = pd.DataFrame([])\n", - " for single_info_file_path in MEMBERS_DIR_PATH.glob(\"*/info.json\"):\n", - " with open(single_info_file_path, \"r\") as f_info:\n", - " member_data = json.load(f_info)\n", - " member_unique_id = member_data[\"id\"]\n", - " file_to_read_path = single_info_file_path.parent / \"jsons\" / file_to_read\n", + "# Parse dates for dataframes with date columns\n", + "for df in [education_df, experiences_df, projects_df, outreach_df]:\n", + " if 'start_date' in df.columns:\n", + " df['start_date'] = pd.to_datetime(df['start_date'])\n", + " if 'end_date' in df.columns:\n", + " df['end_date'] = pd.to_datetime(df['end_date'])\n", "\n", - " if file_to_read_path.exists():\n", - " with file_to_read_path.open(\"r\") as f_data:\n", - " member_other_data = json.load(f_data)\n", - " for entry in member_other_data:\n", - " entry[\"id\"] = member_unique_id\n", - " member_data_list.append(\n", - " pd.DataFrame(member_other_data)\n", - " )\n", - " # else:\n", - " # data_path_in_kl = KERZENDORF_GROUP_DATA / \"members\" / member_unique_id / \"jsons\" / file_to_read\n", - " # if data_path_in_kl.exists():\n", - " # with data_path_in_kl.open(\"r\") as data_file:\n", - " # member_other_data_kl = json.load(data_file)\n", - " # for entry in member_other_data_kl:\n", - " # entry[\"id\"] = member_unique_id\n", - " # member_data_list.append(\n", - " # pd.DataFrame(member_other_data_kl)\n", - " # )\n", - "\n", - " if member_data_list:\n", - " member_data_df = pd.concat(\n", - " member_data_list, ignore_index=True\n", - " )\n", - " member_data_df.set_index(\"id\", inplace=True)\n", - "\n", - " return member_data_df" + "# Awards has additional 'date' column\n", + "if 'date' in awards_df.columns:\n", + " awards_df['date'] = pd.to_datetime(awards_df['date'])\n", + "if 'start_date' in awards_df.columns:\n", + " awards_df['start_date'] = pd.to_datetime(awards_df['start_date'])\n", + "if 'end_date' in awards_df.columns:\n", + " awards_df['end_date'] = pd.to_datetime(awards_df['end_date'])" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, + "id": "83d6e833-c0f7-4818-a362-a8e36d580614", "metadata": {}, "outputs": [], "source": [ - "def set_new_image_path(source_dir, old_image_path):\n", - " article_image_path = source_dir.parent / \"media\" / \"images\"\n", - " image_source = article_image_path / old_image_path.name\n", - " image_destination = ARTICLE_IMAGE_DESTINATION_DIR / old_image_path.name\n", - "\n", - " # Create destination directory if it doesn't exist\n", - " image_destination.parent.mkdir(parents=True, exist_ok=True)\n", - "\n", - " website_files_index = image_destination.parts.index(\"website_files\")\n", - " new_image_path = Path(*image_destination.parts[website_files_index:])\n", - " shutil.copy2(image_source, image_destination)\n", - " return str(new_image_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# DataFrame Creation and Processing" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Creating dataframes for articles which can be updated further " + "# Convert string columns back to their original types\n", + "for df in [articles_df, news_df, research_df]:\n", + " df['content'] = df['content'].apply(ast.literal_eval)\n", + " df['links'] = df['links'].apply(ast.literal_eval)\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T07:07:36.355136270Z", - "start_time": "2023-11-21T07:07:36.080422459Z" - } - }, + "execution_count": 7, + "id": "8b9d6812-991d-4072-aaa2-b9bc185ee83f", + "metadata": {}, "outputs": [], "source": [ - "# Reading all articles\n", - "article_content_list = []\n", - "today = date.today()\n", - "for content_file_name in ARTICLE_DIR_PATH.rglob('info.json'):\n", - " with open(content_file_name, \"r\") as fcontent:\n", - " article_content = json.load(fcontent)\n", - " today_datetime = datetime.combine(today, datetime.min.time())\n", - " article_date = datetime.strptime(article_content[\"date\"], \"%m-%d-%Y\")\n", - " if \"kg\" in article_content[\"platforms\"] and article_date <= today_datetime:\n", - " image_path = Path(article_content[\"cover_image\"])\n", - " article_content[\"cover_image\"] = set_new_image_path(content_file_name, image_path)\n", - " for content_key, content_value in article_content[\"content\"].items():\n", - " if \"img\" in content_key:\n", - " new_content_value = set_new_image_path(content_file_name, Path(content_value))\n", - " article_content[\"content\"][content_key] = new_content_value\n", - " article_content_list.append(article_content)\n", - "article_content_df = pd.DataFrame(article_content_list)\n", - "\n", - "article_content_df[\"date\"] = pd.to_datetime(\n", - " article_content_df[\"date\"], format=\"%m-%d-%Y\"\n", - ")\n", - "\n", - "article_content_df[\"cover_image_height\"] = (\n", - " article_content_df[\"cover_image_height\"].fillna(\"330px\").replace(\"\", \"330px\")\n", - ")\n", - "article_content_df[\"cover_image_width\"] = (\n", - " article_content_df[\"cover_image_width\"].fillna(\"520px\").replace(\"\", \"520px\")\n", - ")\n", + "# Group dataframes\n", + "def group_df(df):\n", + " \"\"\"Group dataframe by index and convert to nested dict format for templates\"\"\"\n", + " return df.fillna(\"\").groupby(level=0).apply(lambda x: x.to_dict('records')).to_frame('info').to_dict('index')\n", "\n", - "#THis line is only for kerzendorf lab and is not needed on dti\n", - "article_content_df[\"category\"] = article_content_df[\"category\"].replace(\n", - " \"Overview\", \"Computational Metascience\"\n", - ")\n", "\n", - "article_content_df['image_name'] = article_content_df['cover_image'].apply(lambda x: Path(x).name)" + "education = group_df(education_df)\n", + "experience = group_df(experiences_df)\n", + "projects = group_df(projects_df)\n", + "awards = group_df(awards_df)\n", + "outreach = group_df(outreach_df)\n", + "documents = group_df(documents_df)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, + "id": "9b2c0b5c-a413-4484-ad35-ef104801679b", "metadata": {}, "outputs": [], "source": [ - "news_content_df = article_content_df[\n", - " (article_content_df[\"category\"] == \"News\")\n", - " | (\n", - " article_content_df[\"tags\"].apply(\n", - " lambda x: \"news\" in x if isinstance(x, list) else False\n", - " )\n", - " )\n", - "].sort_values(by=[\"date\"], ascending=[False])\n", - "\n", - "research_content_df = article_content_df[\n", - " article_content_df[\"category\"] != \"News\"\n", - "].sort_values(by=[\"category\", \"date\"], ascending=[True, False])" + "# Setup socials and dicts\n", + "# Load social fields from schema\n", + "social_schema_path = GROUP_DATA_DIR / \"schemas/members/social_links.json\"\n", + "social_schema = json.loads(social_schema_path.read_text())\n", + "social_cols = list(social_schema['properties'].keys())\n", + "socials = members_df[social_cols].fillna('').to_dict('index')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, + "id": "8f1b6484-021f-4df7-8b30-ddbf5d350fbe", "metadata": {}, "outputs": [], "source": [ - "info_json_list = []\n", - "for single_info_file_path in MEMBERS_DIR_PATH.glob(\"*/info.json\"):\n", - " with open(single_info_file_path, \"r\") as f_info:\n", - " member_data = json.load(f_info)\n", - " # if len(member_data.keys()) == 1:\n", - " # info_json_path = (\n", - " # KERZENDORF_GROUP_DATA / \"members\" / member_data[\"id\"] / \"info.json\"\n", - " # )\n", - " # member_images_dir = HOSTING_PATH / \"members\" / member_data[\"id\"] / \"media\"\n", - " # with open(info_json_path, \"r\") as f_info_kl:\n", - " # member_data_from_kl = json.load(f_info_kl)\n", - " # member_images_dir_source = (\n", - " # KERZENDORF_GROUP_DATA / \"members\" / member_data[\"id\"] / \"media\"\n", - " # )\n", - "\n", - " # shutil.copytree(member_images_dir_source, member_images_dir, dirs_exist_ok=True)\n", - " # info_json_list.append(member_data_from_kl)\n", - " # else:\n", - " info_json_list.append(member_data)\n", - "info_json_df = pd.DataFrame(info_json_list)\n", - "info_json_df.set_index(\"id\", inplace=True)\n", - "info_json_df[\"full_name\"] = info_json_df.apply(\n", - " lambda row: (\n", - " row[\"nick_name\"] + \" \" + row[\"last_name\"]\n", - " if pd.notna(row[\"nick_name\"])\n", - " else row[\"first_name\"] + \" \" + row[\"last_name\"]\n", - " ),\n", - " axis=1,\n", - ")\n", - "info_json_dict = info_json_df.to_dict(\"index\")" + "all_members_dict = members_df.to_dict(\"index\")\n", + "all_articles_dict = {\n", + " aid: {**data, 'article_id': aid}\n", + " for aid, data in articles_df.to_dict(\"index\").items()\n", + "}\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, + "id": "22ab9d8b-7bc2-4e85-9801-2d6414106978", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Created 34 individual member pages\n" + ] + } + ], "source": [ - "exp_df = read_member_data_jsons(\"experiences.json\")[\n", - " [\n", - " \"role\",\n", - " \"start_date\",\n", - " \"end_date\",\n", - " \"institution\",\n", - " \"group\",\n", - " ]\n", - "]\n", - "edu_df = read_member_data_jsons(\"education.json\")[\n", - " [\"start_date\", \"end_date\", \"institution\", \"subject\", \"degree\"]\n", - "]" + "general = json.loads((WEBSITE_DATA_PATH / \"general.json\").read_text())\n", + "\n", + "for person_id, person_data in members_df.iterrows():\n", + " create_page(\n", + " \"individual_person.html.j2\",\n", + " f\"members/{person_id}/{person_id}.html\",\n", + " general=general,\n", + " member_id=person_id,\n", + " member_data=person_data,\n", + " socials=socials,\n", + " documents=documents,\n", + " education=education,\n", + " experience=experience,\n", + " projects=projects,\n", + " awards=awards,\n", + " outreach=outreach,\n", + " content=all_articles_dict,\n", + " )\n", + "print(f\"Created {len(members_df)} individual member pages\")\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, + "id": "c7e55573-9b43-44e2-b214-88fd870724ee", "metadata": {}, "outputs": [], "source": [ - "edu_df['end_date'] = pd.to_datetime(edu_df['end_date'], format='%Y-%m-%d')\n", - "edu_df['start_date'] = pd.to_datetime(edu_df['start_date'], format='%Y-%m-%d')\n", + "# Copy assets and load JSON files\n", + "shutil.copytree(SOURCE_ASSETS, HOSTING_PATH / \"assets\", dirs_exist_ok=True)\n", "\n", - "def most_recent_row(group):\n", - " sorted_group = group.sort_values(by=['start_date', 'end_date'], ascending=[False, True])\n", - " return sorted_group.iloc[0:1] # Force single row\n", + "homepage = json.loads((WEBSITE_DATA_PATH / \"homepage.json\").read_text())\n", + "contact = json.loads((WEBSITE_DATA_PATH / \"contact.json\").read_text())\n", + "support = json.loads((WEBSITE_DATA_PATH / \"support.json\").read_text())\n", + "research = json.loads((WEBSITE_DATA_PATH / \"research_categories.json\").read_text())\n", "\n", - " \n", - "edu_df_most_recent = (\n", - " edu_df.groupby(\"id\").apply(most_recent_row).droplevel(0)\n", - ")\n", + "# Get recent content for homepage\n", + "recent_content_df = articles_df.sort_values(\n", + " [\"category\", \"date\"], ascending=[True, False]\n", + ").groupby(\"category\").head(1)\n", "\n", "\n", - "edu_df_most_recent['academic_role'] = \"\"\n", - "for edu_mem_id, edu_mem_value in edu_df_most_recent.iterrows():\n", - " if edu_mem_value['institution'] == INSTITUTION_FILTER:\n", - " if edu_mem_value['degree'] == \"Bachelors\":\n", - " edu_df_most_recent.at[edu_mem_id, 'academic_role'] = \"Undergraduate Student\"\n", - " elif edu_mem_value['degree'] in [\"PhD\", \"Masters\"]:\n", - " edu_df_most_recent.at[edu_mem_id, 'academic_role'] = \"Graduate Student\"\n", - "edu_df_most_recent_diff_suffix = edu_df_most_recent.add_suffix(\"_edu\")" + "# Create homepage\n", + "create_page(\n", + " \"homepage.html.j2\",\n", + " \"index.html\",\n", + " general=general,\n", + " homepage=homepage,\n", + " recent_content=recent_content_df.reset_index().to_dict(orient=\"records\"),\n", + ")\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, + "id": "bf471fde-5a05-4ddd-9ae8-038f6f35e9be", "metadata": {}, "outputs": [], "source": [ - "social_link_list = []\n", - "for single_member_file_path in MEMBERS_DIR_PATH.rglob(\"social_links.json\"):\n", - " with open(single_member_file_path, \"r\") as fname:\n", - " member_social_link = json.load(fname)\n", - " info_json_file_path = single_member_file_path.parent.parent / \"info.json\"\n", - " with open(info_json_file_path, \"r\") as file_info:\n", - " member_info_data = json.load(file_info)\n", - " mem_id = member_info_data[\"id\"]\n", - " member_social_link[\"id\"] = mem_id\n", - " social_link_list.append(member_social_link)\n", - "social_links_df = pd.DataFrame(social_link_list)\n", - "social_links_df.set_index(\"id\", inplace=True)\n", - "social_links_df.fillna(\"\", inplace=True)" + "\n", + "# Create current members page\n", + "create_page(\n", + " \"current_members.html.j2\",\n", + " \"current_members.html\",\n", + " general=general,\n", + " current_members=current_members_with_info,\n", + " socials=socials\n", + ")\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, + "id": "daa3b00e-9fa3-4468-b42f-fa0fbbd88d00", "metadata": {}, "outputs": [], "source": [ - "recent_content = article_content_df.sort_values(\n", - " by=[\"category\", \"date\"], ascending=[True, False]\n", - ")\n", - "# Get the first row for each category using groupby and head\n", - "recent_content = recent_content.groupby(\"category\").head(1).copy()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Page Creation" + "# Create alumni page\n", + "create_page(\n", + " \"alumni_members.html.j2\",\n", + " \"alumni_members.html\",\n", + " general=general,\n", + " alumni_members=alumni_members_with_info,\n", + ")" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 14, + "id": "4a638ad6-aad1-4b3a-b3db-46b8f3f36aed", "metadata": {}, + "outputs": [], "source": [ - "Function to create a page" + "# Create contact page\n", + "create_page(\n", + " \"contact.html.j2\",\n", + " \"Contact.html\",\n", + " general=general,\n", + " contact=contact\n", + ")" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T07:07:36.337418487Z", - "start_time": "2023-11-21T07:07:36.065742781Z" - } - }, + "execution_count": 15, + "id": "04895fd5-cc5e-4238-bfdb-30377b5d8f5d", + "metadata": {}, "outputs": [], "source": [ - "def create_page(template, html, **kwargs):\n", - " \"\"\"\n", - " Create an HTML page using a Jinja2 template and save it to a specified path.\n", "\n", - " Parameters:\n", - " ----------\n", - " template : str\n", - " The filename of the Jinja2 template to be used.\n", - " html : str\n", - " The filename of the HTML file to be generated.\n", - " **kwargs : dict\n", - " Additional keyword arguments to be passed to the Jinja2 template for rendering.\n", - "\n", - " Returns:\n", - " -------\n", - " None\n", - "\n", - " \"\"\"\n", - " page_template = environment.get_template(template)\n", - " template_level = html.count(\"/\")\n", - " page_html_path = HOSTING_PATH / html\n", - " page_html_path.parent.mkdir(parents=True, exist_ok=True)\n", - " page_content = page_template.render(TEMPLATE_LEVEL=template_level, **kwargs)\n", - " with open(page_html_path, mode=\"w\", encoding=\"utf-8\") as page:\n", - " page.write(page_content)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Processing List Of JSON files" + "# Create support page\n", + "create_page(\n", + " \"support.html.j2\",\n", + " \"Support.html\",\n", + " general=general,\n", + " support=support\n", + ")\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T07:07:36.392640967Z", - "start_time": "2023-11-21T07:07:36.099540795Z" - } - }, - "outputs": [], - "source": [ - "# Function Call\n", - "general = loading_website_data(\"general\")\n", - "homepage = loading_website_data(\"homepage\")\n", - "contact = loading_website_data(\"contact\")\n", - "research = loading_website_data(\"research_categories\")\n", - "support = loading_website_data(\"support\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Homepage" - ] - }, - { - "cell_type": "markdown", + "execution_count": 16, + "id": "505c0a4f-9b47-4499-9dc4-22d54b36a125", "metadata": {}, - "source": [ - "Storing selected columns for Homepage only" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T07:07:36.460321141Z", - "start_time": "2023-11-21T07:07:36.164866903Z" - } - }, "outputs": [], "source": [ + "# Create research page\n", "create_page(\n", - " \"homepage.html.j2\",\n", - " \"index.html\",\n", + " \"research.html.j2\",\n", + " \"Research.html\",\n", " general=general,\n", - " homepage=homepage,\n", - " recent_content=recent_content.to_dict(orient=\"records\"),\n", + " content=research_df.reset_index(),\n", + " research=research,\n", + " current_members=all_members_dict,\n", ")" ] }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Current Members Page" - ] - }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T07:07:36.460567962Z", - "start_time": "2023-11-21T07:07:36.165051029Z" - } - }, + "execution_count": 17, + "id": "006d5615-5c19-4cc2-ada0-c86535bf2118", + "metadata": {}, "outputs": [], "source": [ - "exp_df['end_date'] = pd.to_datetime(exp_df['end_date'], format='%Y-%m-%d')\n", - "exp_df['start_date'] = pd.to_datetime(exp_df['start_date'], format='%Y-%m-%d')\n", - "exp_df = exp_df.fillna(\"\")\n", - "filtered_exp_df = exp_df[(exp_df[\"end_date\"].isna()) | (exp_df[\"end_date\"].dt.date >= datetime.now().date())]\n", - "\n", - "def most_recent_row(group):\n", - " sorted_group = group.sort_values(by=['start_date', 'end_date'], ascending=[False, True])\n", - " # Filter the sorted group by the condition that the group name is in GROUP_FILTER\n", - " relevant_group = sorted_group[sorted_group['group'].str.contains('|'.join(GROUP_FILTER))]\n", - " # Return the most recent relevant experience\n", - " return relevant_group.iloc[0:1] if len(relevant_group) > 0 else sorted_group.iloc[0:1]\n", - " \n", - " \n", - "filtered_exp_df_most_recent = exp_df.groupby(\"id\").apply(most_recent_row).droplevel(0)\n", - "exp_df_most_recent = exp_df.groupby(\"id\").apply(most_recent_row).droplevel(0)\n", - "exp_df_most_recent_diff_suffix = exp_df_most_recent.add_suffix('_exp')" + "# Create sub_research directory\n", + "SUB_RESEARCH_PATH.mkdir(parents=True, exist_ok=True)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, + "id": "17654174-2dfc-4446-9298-aa5f791c3c6b", "metadata": {}, "outputs": [], "source": [ - "merged_edu_exp_df = exp_df_most_recent_diff_suffix.merge(edu_df_most_recent_diff_suffix, on='id', how='outer')" + "\n", + "# Create category pages\n", + "for category in research_df[\"category\"].unique():\n", + " create_page(\n", + " \"sub_research_frontpage.html.j2\",\n", + " f\"sub_research/{page_link(category.lower())}.html\",\n", + " general=general,\n", + " research=research,\n", + " content=research_df.reset_index(),\n", + " category=category,\n", + " current_members=all_members_dict,\n", + " )\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, + "id": "48dd8fd1-8983-4a3f-8883-4249409733f1", "metadata": {}, "outputs": [], "source": [ - "merged_edu_exp_df" + "\n", + "# Create individual research pages\n", + "for article_id, ind_research_values in research_df.iterrows():\n", + " destination_research_path = f\"sub_research/{page_link(ind_research_values.category.lower())}/{page_link(article_id.lower())}.html\"\n", + " if ind_research_values['category'] == \"Software\":\n", + " destination_research_path = f\"sub_research/{page_link(article_id.lower())}.html\"\n", + "\n", + " folder_path = SUB_RESEARCH_PATH / page_link(ind_research_values.category.lower())\n", + " folder_path.mkdir(parents=True, exist_ok=True)\n", + " create_page(\n", + " \"research_page_no_twitter.html.j2\",\n", + " destination_research_path,\n", + " general=general,\n", + " content=ind_research_values,\n", + " member_data=all_members_dict,\n", + " article_id=article_id,\n", + " )\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, + "id": "f532fcfd-f0b3-4b79-9e08-0db138ad3d4c", "metadata": {}, "outputs": [], "source": [ - "merged_edu_exp_df['isCurrent'] = False\n", - "merged_edu_exp_df['current_role'] = \"\"\n", - "for merged_mem_id, merged_mem_value in merged_edu_exp_df.iterrows():\n", - " if merged_mem_value['institution_edu'] == INSTITUTION_FILTER:\n", - " if pd.isna(merged_mem_value['end_date_edu']) or merged_mem_value['end_date_edu'] >= datetime.now():\n", - " merged_edu_exp_df.at[merged_mem_id, 'isCurrent'] = True\n", - " if pd.notna(merged_mem_value['end_date_exp']):\n", - " merged_edu_exp_df.at[merged_mem_id, 'isCurrent'] = False\n", - " acad_role = merged_mem_value.get('academic_role_edu')\n", - " if acad_role:\n", - " merged_edu_exp_df.at[merged_mem_id, 'current_role'] = merged_mem_value[\"academic_role_edu\"]\n", - " else:\n", - " merged_edu_exp_df.at[merged_mem_id, 'isCurrent'] = False\n", - " acad_role = merged_mem_value.get('academic_role_edu')\n", - " if acad_role:\n", - " merged_edu_exp_df.at[merged_mem_id, 'current_role'] = merged_mem_value[\"academic_role_edu\"]\n", - " else:\n", - " merged_edu_exp_df.at[merged_mem_id, 'current_role'] = merged_mem_value[\"role_exp\"]\n", - " elif merged_mem_value['group_exp'] in GROUP_FILTER and (pd.isna(merged_mem_value['end_date_exp']) or merged_mem_value['end_date_exp'] >= datetime.now()):\n", - " merged_edu_exp_df.at[merged_mem_id, 'isCurrent'] = True\n", - " merged_edu_exp_df.at[merged_mem_id, 'current_role'] = merged_mem_value[\"role_exp\"]\n", - " else:\n", - " merged_edu_exp_df.at[merged_mem_id, 'isCurrent'] = False\n", - " acad_role = merged_mem_value.get('academic_role_edu')\n", - " if acad_role:\n", - " merged_edu_exp_df.at[merged_mem_id, 'current_role'] = merged_mem_value[\"academic_role_edu\"]\n", - " else:\n", - " merged_edu_exp_df.at[merged_mem_id, 'current_role'] = merged_mem_value[\"role_exp\"]\n", - "merged_edu_exp_df['current_role'] = merged_edu_exp_df['current_role'].replace(ROLE_MAP)" + "\n", + "# Create news page\n", + "create_page(\n", + " \"news.html.j2\",\n", + " \"News.html\",\n", + " general=general,\n", + " content=news_df.reset_index(),\n", + " category=\"News\",\n", + " member_data=all_members_dict,\n", + ")\n", + "\n", + "# Create individual news pages\n", + "news_dict_list = news_df.reset_index().to_dict('records')\n", + "for news_item in news_dict_list:\n", + " create_page(\n", + " \"news_page_no_twitter.html.j2\",\n", + " f\"news/{page_link(news_item['article_id'].lower())}.html\",\n", + " general=general,\n", + " content=news_item,\n", + " member_data=all_members_dict,\n", + " category=\"News\"\n", + " )\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "current_member_df = merged_edu_exp_df[merged_edu_exp_df['isCurrent'] == True][[\"current_role\"]]\n", - "current_member_df_with_info = pd.merge(current_member_df, info_json_df, on='id', how='inner')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "alumni_member_df = merged_edu_exp_df[merged_edu_exp_df['isCurrent'] == False][[\"current_role\"]]\n", - "alumni_member_df_with_info = pd.merge(alumni_member_df, info_json_df, on='id', how='inner')[['current_role', 'full_name']]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "projects_df = read_member_data_jsons(\"projects.json\").sort_values(\n", - " by=[\"end_date\"], ascending=False\n", - ")\n", - "projects_df['end_date'] = pd.to_datetime(projects_df['end_date'], format='%Y-%m-%d')\n", - "projects_df['start_date'] = pd.to_datetime(projects_df['start_date'], format='%Y-%m-%d')\n", - "projects_df.fillna(\"\", inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for mem_key, mem_value in current_member_df.iterrows():\n", - " if mem_key in projects_df.index:\n", - " mem_projects = projects_df.loc[mem_key]\n", - " if not mem_projects.empty:\n", - " if isinstance(mem_projects, pd.Series):\n", - " current_project_title = mem_projects[\"project_title\"]\n", - " else:\n", - " current_project_title = mem_projects.iloc[0][\"project_title\"]\n", - " else:\n", - " current_project_title = \"\"\n", - " current_member_df_with_info.loc[mem_key, \"current_project_title\"] = current_project_title" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "current_member_df" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Function to sort the members on basis of their roles" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with open(ROLE_HIERARCHY_PATH, \"r\") as file_name: \n", - " role_hierarchy = json.load(file_name)\n", - "current_member_df_with_info['rank'] = current_member_df_with_info['current_role'].map(role_hierarchy)\n", - "\n", - "current_member_df_with_info = current_member_df_with_info.sort_values(by='rank')\n", - "current_member_df_with_info = current_member_df_with_info.drop(columns='rank')\n", - "current_member_df_with_info[['current_role', 'full_name', 'image_path', 'cover_image_path','current_project_title']]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Current Members Page" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "create_page(\n", - " \"current_members.html.j2\",\n", - " \"current_members.html\",\n", - " general=general,\n", - " current_members=current_member_df_with_info,\n", - " socials=social_links_df.to_dict(\"index\")\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Alumni Members Page" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "create_page(\n", - " \"alumni_members.html.j2\",\n", - " \"alumni_members.html\",\n", - " general=general,\n", - " alumni_members=alumni_member_df_with_info,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Individual People Page" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def group_df(df):\n", - " new_df = (df.fillna(\"\").groupby(\"id\")\n", - " .apply(lambda x: x.to_dict(orient=\"records\"))\n", - " .reset_index(name=\"info\")\n", - " .set_index(\"id\")\n", - " .to_dict(orient=\"index\"))\n", - " return new_df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "document_df = read_member_data_jsons(\"documents.json\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "outreach_df = read_member_data_jsons(\"outreach.json\")\n", - "if not outreach_df.empty:\n", - " outreach_grouped = group_df(outreach_df)\n", - "else:\n", - " outreach_grouped = {}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "awards_df = read_member_data_jsons(\"awards.json\")\n", - "awards_grouped = group_df(awards_df)\n", - "\n", - "exp_grouped = group_df(exp_df)\n", - "edu_grouped = group_df(edu_df)\n", - "projects_grouped = group_df(projects_df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "info_json_df.fillna(\"\", inplace=True)\n", - "info_json_df[\"academic_role\"] = \"\"\n", - "info_json_df[\"current_project_title\"] = \"\"\n", - "for member_id, member_data in info_json_df.iterrows():\n", - " if member_id in current_member_df_with_info.index:\n", - " # Handle case where member has duplicate entries (returns Series)\n", - " current_role_value = current_member_df_with_info.loc[member_id, \"current_role\"]\n", - " if isinstance(current_role_value, pd.Series):\n", - " current_role_value = current_role_value.iloc[0]\n", - " \n", - " current_project_value = current_member_df_with_info.loc[member_id, \"current_project_title\"]\n", - " if isinstance(current_project_value, pd.Series):\n", - " current_project_value = current_project_value.iloc[0]\n", - " \n", - " info_json_df.at[member_id, \"academic_role\"] = current_role_value\n", - " info_json_df.at[member_id, \"current_project_title\"] = current_project_value\n", - " elif member_id in alumni_member_df.index:\n", - " role_value = alumni_member_df.loc[member_id, \"current_role\"]\n", - " # Handle case where member has duplicate entries (returns Series)\n", - " if isinstance(role_value, pd.Series):\n", - " role_value = role_value.iloc[0]\n", - " # Convert NaN to empty string\n", - " if pd.isna(role_value):\n", - " role_value = \"\"\n", - " info_json_df.at[member_id, \"academic_role\"] = role_value\n", - "alumni_member_df.replace(\"nan\", np.nan, inplace=True)\n", - "alumni_member_df.fillna(\"\", inplace=True)\n", - "current_member_df_with_info.fillna(\"\", inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for person_id, person_data in info_json_df.iterrows():\n", - " create_page(\n", - " \"individual_person.html.j2\",\n", - " f\"members/{person_id}/{person_id}.html\",\n", - " general=general,\n", - " member_id=person_id,\n", - " member_data=person_data,\n", - " socials=social_links_df.to_dict(\"index\"),\n", - " documents=document_df.to_dict(\"index\"),\n", - " education=edu_grouped,\n", - " experience=exp_grouped,\n", - " projects=projects_grouped,\n", - " awards=awards_grouped,\n", - " outreach=outreach_grouped,\n", - " section_headings=INDIVIDUAL_MEMBER_SECTION_MAP,\n", - " content=article_content_df.to_dict(\"index\"),\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Contact Page" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T07:07:36.569018310Z", - "start_time": "2023-11-21T07:07:36.457030906Z" - } - }, - "outputs": [], - "source": [ - "create_page(\n", - " \"contact.html.j2\",\n", - " \"Contact.html\",\n", - " general=general,\n", - " contact=contact\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Support Page" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T07:07:36.588932886Z", - "start_time": "2023-11-21T07:07:36.457249500Z" - } - }, - "outputs": [], - "source": [ - "create_page(\n", - " \"support.html.j2\",\n", - " \"Support.html\",\n", - " general=general,\n", - " support=support\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Research Front Page" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For adding more columns in dataframe to render front pages and individual article pages" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T07:07:36.589247191Z", - "start_time": "2023-11-21T07:07:36.501093779Z" - } - }, - "outputs": [], - "source": [ - "create_page(\n", - " \"research.html.j2\",\n", - " \"Research.html\",\n", - " general=general,\n", - " content=research_content_df,\n", - " research=research,\n", - " current_members=info_json_dict,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T07:07:36.589414282Z", - "start_time": "2023-11-21T07:07:36.544920583Z" - } - }, - "outputs": [], - "source": [ - "SUB_RESEARCH_PATH.mkdir(parents=True, exist_ok=True)\n", - "\n", - "for category in article_content_df.loc[\n", - " article_content_df.category != \"News\", \"category\"\n", - "].unique():\n", - " create_page(\n", - " \"sub_research_frontpage.html.j2\",\n", - " f\"sub_research/{page_link(category.lower())}.html\",\n", - " general=general,\n", - " research=research,\n", - " content=research_content_df,\n", - " category=category,\n", - " current_members=info_json_dict,\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T07:07:36.589414282Z", - "start_time": "2023-11-21T07:07:36.544920583Z" - } - }, - "source": [ - "Individual Research Page\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T07:07:36.589414282Z", - "start_time": "2023-11-21T07:07:36.544920583Z" - } - }, - "outputs": [], - "source": [ - "for ind_research_keys, ind_research_values in research_content_df.iterrows():\n", - " destination_research_path = f\"sub_research/{page_link(ind_research_values.category.lower())}/{page_link(ind_research_values.article_id.lower())}.html\"\n", - " if ind_research_values['category'] == \"Software\":\n", - " destination_research_path = f\"sub_research/{page_link(ind_research_values.article_id.lower())}.html\"\n", - "\n", - " folder_path = SUB_RESEARCH_PATH / page_link(ind_research_values.category.lower())\n", - " folder_path.mkdir(parents=True, exist_ok=True)\n", - " create_page(\n", - " \"research_page_no_twitter.html.j2\",\n", - " destination_research_path,\n", - " general=general,\n", - " content=ind_research_values,\n", - " member_data=info_json_dict,\n", - " article_id=ind_research_values[\"article_id\"],\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# News Page" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import re\n", - "\n", - "def urlize_content(content):\n", - " \"\"\"\n", - " Replaces IDs wrapped in [] with corresponding names from an existing DataFrame,\n", - " and wraps the names in anchor tags.\n", - "\n", - " Args:\n", - " content (str): The text content containing IDs in square brackets.\n", - "\n", - " Returns:\n", - " str: The updated content with IDs replaced by anchor tags.\n", - " \"\"\"\n", - "\n", - " def replace_id(match):\n", - " id_to_fetch= match.group(1)\n", - " replace_string=\"\"\n", - " if id_to_fetch in info_json_df.index:\n", - " name = info_json_df.loc[id_to_fetch, 'full_name']\n", - " if id_to_fetch in current_member_df_with_info.index:\n", - " replace_string =f'{name}'\n", - " else:\n", - " replace_string = name\n", - " else:\n", - " replace_string = id_to_fetch.replace('_', ' ').title()\n", - "\n", - " return replace_string\n", - "\n", - " urlized_content = re.sub(r'\\[(\\w+)\\]', replace_id, content)\n", - "\n", - " return urlized_content\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for index, row in news_content_df.iterrows():\n", - " content = row['content']\n", - " for content_key in content:\n", - " if \"para\" in content_key:\n", - " content[content_key] = urlize_content(content[content_key])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T07:10:29.508008736Z", - "start_time": "2023-11-21T07:10:29.418263240Z" - } - }, - "outputs": [], - "source": [ - "create_page(\n", - " \"news.html.j2\",\n", - " \"News.html\",\n", - " general=general,\n", - " content=news_content_df,\n", - " category=\"News\",\n", - " member_data=info_json_dict,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Individual News Page" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for ind_news_keys, ind_news_values in news_content_df.iterrows():\n", - " folder_path = HOSTING_PATH / \"news\" / page_link(ind_news_values.article_id.lower())\n", - " create_page(\n", - " \"news_page_no_twitter.html.j2\",\n", - " f\"news/{page_link(ind_news_values.article_id.lower())}.html\",\n", - " general=general,\n", - " content=ind_news_values,\n", - " member_data=info_json_dict,\n", - " category=\"News\"\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Join Us Page" - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 21, + "id": "c5df40b7-acb7-47c7-9a87-54cb6327373b", "metadata": {}, "outputs": [], "source": [ + "# Create join us page\n", "with open(OPPORTUNITIES_PATH, 'r') as f_opp:\n", - " OPPORTUNITIES = json.load(f_opp)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ + " opportunities = json.load(f_opp)\n", + "\n", "create_page(\n", " \"join_us.html.j2\",\n", " \"Join_Us.html\",\n", " general=general,\n", - " opportunities=OPPORTUNITIES\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# New Research" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# all_research_data = []\n", - "# for json_file in RESEARCH_CONTENT_SOURCE.rglob(\"info.json\"):\n", - "# sub_research = []\n", - "# relative_path = json_file.relative_to(RESEARCH_CONTENT_SOURCE.parent).with_suffix(\"\")\n", - "# for sub_dir in json_file.parent.iterdir():\n", - "# if sub_dir.is_dir():\n", - "# if sub_dir.name != \"media\":\n", - "# sub_dir_name = sub_dir.name\n", - "# sub_research.append(sub_dir_name)\n", - "# else:\n", - "# dest_path = HOSTING_PATH / relative_path.parent\n", - "# shutil.copytree(sub_dir, dest_path / \"media\", dirs_exist_ok=True)\n", - " \n", - "# # Parse the JSON file\n", - "# with open(json_file, \"r\") as f_research:\n", - "# data = json.load(f_research)\n", - "# if 'research_id' in data:\n", - "# data['sub_research'] = sub_research\n", - "# data['url'] = f\"{relative_path}.html\"\n", - "# all_research_data.append(data)\n", - "# all_research_df = pd.DataFrame(all_research_data)\n", - "# indexed_research_df = all_research_df.set_index('research_id')\n", - "# # Fill all NaN values with empty strings in the DataFrame\n", - "# indexed_research_df = indexed_research_df.fillna(\"\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# d = {}\n", - "\n", - "# for index, article in article_content_df.iterrows():\n", - "# res_articles, news_articles = [], []\n", - "# if pd.notna(article[\"research_id\"]):\n", - "# res_id = article[\"research_id\"]\n", - "# article_id = article[\"article_id\"]\n", - "# if article['category'] == 'Research':\n", - "# res_articles.append((article_id, article['date']))\n", - "# if article['category'] == 'News':\n", - "# news_articles.append((article_id, article['date']))\n", - "\n", - "# if res_id not in d:\n", - "# d[res_id] = {\"res_articles\": [], \"news_articles\": []}\n", - "# d[res_id][\"res_articles\"].extend(res_articles)\n", - "# d[res_id][\"news_articles\"].extend(news_articles)\n", - "\n", - "# def get_aggregated_articles(research_id, visited=None):\n", - "# if visited is None:\n", - "# visited = set()\n", - "\n", - "# # Avoid processing the same research_id multiple times\n", - "# if research_id in visited:\n", - "# return {\"res_articles\": [], \"news_articles\": []}\n", - " \n", - "# visited.add(research_id)\n", - "\n", - "# # Start with articles for the current research_id\n", - "# aggregated_articles = d.get(research_id, {\"res_articles\": [], \"news_articles\": []}).copy()\n", - "\n", - "# # Get sub-research IDs from `indexed_research_df`\n", - "# sub_researches = indexed_research_df.loc[research_id, \"sub_research\"] if research_id in indexed_research_df.index else []\n", - "# if isinstance(sub_researches, list) and len(sub_researches) > 0:\n", - "# for sub_research in sub_researches:\n", - "# sub_articles = get_aggregated_articles(sub_research, visited)\n", - "# aggregated_articles[\"res_articles\"].extend(sub_articles[\"res_articles\"])\n", - "# aggregated_articles[\"news_articles\"].extend(sub_articles[\"news_articles\"])\n", - "\n", - "# return aggregated_articles\n", - "\n", - "# f = {}\n", - "# for research_index in indexed_research_df.index:\n", - "# f[research_index] = get_aggregated_articles(research_index)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# def sort_articles(articles):\n", - "# # Sort by date in descending order\n", - "# sorted_articles = sorted(articles, key=lambda x: x[1], reverse=True)\n", - "# # Extract only article IDs\n", - "# return [article[0] for article in sorted_articles]\n", - "\n", - "# # Update `f` with sorted articles\n", - "# for research_index in f:\n", - "# f[research_index][\"res_articles\"] = sort_articles(f[research_index][\"res_articles\"])\n", - "# f[research_index][\"news_articles\"] = sort_articles(f[research_index][\"news_articles\"])\n", - "\n", - "# # Add sorted articles to `indexed_research_df`\n", - "# indexed_research_df[\"res_articles\"] = indexed_research_df.index.map(\n", - "# lambda idx: f.get(idx, {}).get(\"res_articles\", [])\n", - "# )\n", - "# indexed_research_df[\"news_articles\"] = indexed_research_df.index.map(\n", - "# lambda idx: f.get(idx, {}).get(\"news_articles\", [])\n", - "# )\n", - "\n", - "# # Display the updated DataFrame\n", - "# indexed_research_df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# indexed_article_df = article_content_df.set_index('article_id', inplace=False)\n", - "# for index, research in indexed_research_df.iterrows():\n", - "# create_page(\n", - "# \"sub_research_frontpage.html.j2\",\n", - "# research['url'],\n", - "# general=general,\n", - "# data=research,\n", - "# current_research_id=index,\n", - "# indexed_research_df=indexed_research_df,\n", - "# indexed_article_df=indexed_article_df,\n", - "# member_data=info_json_dict\n", - "# )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Gallery page" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "GALLERY_CONTENT_SOURCE = WEBSITE_DATA_PATH / \"content\" / \"gallery\"\n", - "events = []\n", - "\n", - "for event_file in GALLERY_CONTENT_SOURCE.rglob(\"info.json\"):\n", - " with open(event_file, \"r\") as f_event:\n", - " event_data = json.load(f_event)\n", - " \n", - " if \"date\" in event_data:\n", - " event_data[\"date\"] = pd.to_datetime(event_data[\"date\"])\n", - " event_id = event_data.get(\"event_id\", \"unknown_event\") # Default if event_id is missing\n", - " \n", - " # Define new destination path using event_id\n", - " dest_image_dir = HOSTING_PATH / \"website_files\" / \"images\" / \"gallery\" / event_id / \"media\" / \"images\"\n", - " \n", - " # Create destination directory if it doesn't exist\n", - " dest_image_dir.mkdir(parents=True, exist_ok=True)\n", - " \n", - " # Copy images directory to the structured destination\n", - " source_image_dir = event_file.parent / \"media\" / \"images\"\n", - " if source_image_dir.exists():\n", - " shutil.copytree(source_image_dir, dest_image_dir, dirs_exist_ok=True)\n", - "\n", - " # # Update image paths in event data to use website path\n", - " # for key in event_data:\n", - " # if isinstance(event_data[key], str) and \"images\" in event_data[key]:\n", - " # event_data[key] = str(Path(\"website_files\") / \"images\" / \"gallery\" / Path(event_data[key]).name)\n", - " for image in event_data.get(\"images\", []):\n", - " image_path = GALLERY_CONTENT_SOURCE / event_id / image[\"image_path\"]\n", - " with Image.open(image_path) as img:\n", - " width, height = img.size\n", - " new_width = int(width * 0.7) # Reduce by 30%\n", - " new_height = int(height * 0.7) # Reduce by 30%\n", - "\n", - " image[\"scaled_width\"] = new_width\n", - " image[\"scaled_height\"] = new_height\n", - " events.append(event_data)\n", - "\n", - "create_page(\n", - " \"gallery.html.j2\",\n", - " \"Gallery.html\",\n", - " general=general,\n", - " member_data=info_json_dict,\n", - " events=events\n", + " opportunities=opportunities\n", ")" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Copy assets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "source_assets = GROUP_DATA_DIR.parent / \"groupwebsite_generator\" / \"assets\"\n", - "shutil.copytree(source_assets, HOSTING_PATH / \"assets\", dirs_exist_ok=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -1517,5 +497,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 5 } diff --git a/notebooks/members.ipynb b/notebooks/members.ipynb new file mode 100644 index 0000000..efd970f --- /dev/null +++ b/notebooks/members.ipynb @@ -0,0 +1,611 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "b9379b22-dbcf-48a4-8a82-db05404d12ec", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import json\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "from datetime import datetime\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "9e91e830-4d98-4a57-b412-4c1e237437f1", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Constants\n", + "GROUP_DATA_DIR = Path(\"../../group-data\")\n", + "MEMBERS_DIR_PATH = GROUP_DATA_DIR / \"members/\"\n", + "WEBSITE_DATA_PATH = GROUP_DATA_DIR / \"website_data/\"\n", + "ROLE_HIERARCHY_PATH = WEBSITE_DATA_PATH / \"role_hierarchy.json\"\n", + "\n", + "GROUP_FILTER = [\"DTI\", \"TARDIS\", \"kerzendorf\"]\n", + "INSTITUTION_FILTER = \"Michigan State University\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8e5497ad-454d-4fb6-a90d-78a7cbade567", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "class MemberDataLoader:\n", + " def __init__(self, members_dir: Path = MEMBERS_DIR_PATH):\n", + " self.members_dir = members_dir\n", + "\n", + " def _load_records(self, jsons_dir, filename, member_id):\n", + " path = jsons_dir / filename\n", + " if not path.exists():\n", + " return []\n", + " records = json.loads(path.read_text())\n", + " for record in records:\n", + " record['member_id'] = member_id\n", + " return records\n", + "\n", + " def _parse_dates(self, records, date_fields, member_id=None):\n", + " for record in records:\n", + " for field in date_fields:\n", + " if field not in record:\n", + " continue\n", + "\n", + " if not record[field]:\n", + " record[field] = pd.NaT\n", + " continue\n", + "\n", + " try:\n", + " record[field] = pd.to_datetime(record[field])\n", + " except ValueError as e:\n", + " if member_id:\n", + " print(e, member_id)\n", + " return records\n", + "\n", + " def load_all_data(self):\n", + " data_types = ['education', 'experiences', 'projects', 'awards', 'outreach', 'documents', 'posters', 'publications']\n", + " data_config = {dt: f\"{dt}.json\" for dt in data_types}\n", + " data = {key: [] for key in data_config}\n", + "\n", + " start_end_dates = ['education.json', 'experiences.json', 'projects.json', 'outreach.json']\n", + " single_date_with_errors = ['publications.json']\n", + " dual_date_format = ['awards.json']\n", + "\n", + " members_data = []\n", + "\n", + " for member_dir in self.members_dir.glob(\"*\"):\n", + " info_path = member_dir / \"info.json\"\n", + " member_info = json.loads(info_path.read_text())\n", + " member_id = member_info[\"id\"]\n", + "\n", + " full_name = (\n", + " f\"{member_info.get('nick_name', member_info.get('first_name', ''))} {member_info.get('last_name', '')}\"\n", + " if member_info.get('nick_name')\n", + " else f\"{member_info.get('first_name', '')} {member_info.get('last_name', '')}\"\n", + " )\n", + " member_info['full_name'] = full_name.strip()\n", + "\n", + " jsons_dir = member_dir / \"jsons\"\n", + "\n", + " social_path = jsons_dir / \"social_links.json\"\n", + " if social_path.exists():\n", + " social_data = json.loads(social_path.read_text())\n", + " member_info.update(social_data)\n", + "\n", + " members_data.append(member_info)\n", + "\n", + " for key, filename in data_config.items():\n", + " records = self._load_records(jsons_dir, filename, member_id)\n", + " if filename in start_end_dates:\n", + " records = self._parse_dates(records, ['start_date', 'end_date'])\n", + " elif filename in single_date_with_errors:\n", + " records = self._parse_dates(records, ['date'], member_id)\n", + " elif filename in dual_date_format:\n", + " records = self._parse_dates(records, ['date', 'start_date', 'end_date'], member_id)\n", + " data[key].extend(records)\n", + "\n", + " members_df = pd.DataFrame(members_data).set_index('id')\n", + " self.members_df = members_df\n", + "\n", + " for key in data:\n", + " df = pd.DataFrame(data[key]).set_index('member_id')\n", + " setattr(self, f\"{key}_df\", df)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "db3ee435-4a70-4f62-aed4-7f0ef64cd4fc", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "class CurrentMemberProcessor:\n", + " def __init__(self, members_df, education_df, experiences_df, projects_df):\n", + " self.members_df = members_df\n", + " self.education_df = education_df\n", + " self.experiences_df = experiences_df\n", + " self.projects_df = projects_df\n", + "\n", + " with open(ROLE_HIERARCHY_PATH, \"r\") as file_name:\n", + " self.role_hierarchy = json.load(file_name)\n", + "\n", + " def process_education(self):\n", + " \"\"\"Get most recent education and determine academic role\"\"\"\n", + " def most_recent_row(group):\n", + " sorted_group = group.sort_values(by=['start_date', 'end_date'], ascending=[False, True])\n", + " return sorted_group.iloc[0:1]\n", + "\n", + " self.edu_most_recent = self.education_df.groupby(level=0).apply(most_recent_row).droplevel(0)\n", + "\n", + " self.edu_most_recent['academic_role'] = \"\"\n", + " msu_mask = self.edu_most_recent['institution'] == INSTITUTION_FILTER\n", + " bachelors_mask = msu_mask & (self.edu_most_recent['degree'] == \"Bachelors\")\n", + " grad_mask = msu_mask & (self.edu_most_recent['degree'].isin([\"PhD\", \"Masters\"]))\n", + "\n", + " self.edu_most_recent.loc[bachelors_mask, 'academic_role'] = \"Undergraduate Student\"\n", + " self.edu_most_recent.loc[grad_mask, 'academic_role'] = \"Graduate Student\"\n", + "\n", + " def process_experiences(self):\n", + " \"\"\"Get most recent experience per member\"\"\"\n", + " self.experiences_df = self.experiences_df.fillna(\"\")\n", + "\n", + " def most_recent_row(group):\n", + " sorted_group = group.sort_values(by=['start_date', 'end_date'], ascending=[False, True])\n", + " relevant_group = sorted_group[sorted_group['group'].str.contains('|'.join(GROUP_FILTER))]\n", + " return relevant_group.iloc[0:1] if not relevant_group.empty else sorted_group.iloc[0:1]\n", + "\n", + " self.exp_most_recent = self.experiences_df.groupby(level=0).apply(most_recent_row).droplevel(0)\n", + "\n", + " def _merge_edu_exp(self):\n", + " \"\"\"Merge education and experience dataframes\"\"\"\n", + " exp_suffixed = self.exp_most_recent.add_suffix('_exp')\n", + " edu_suffixed = self.edu_most_recent.add_suffix('_edu')\n", + " return exp_suffixed.merge(edu_suffixed, left_index=True, right_index=True, how='outer')\n", + "\n", + " def _determine_status_and_role(self, row):\n", + " \"\"\"Determine if member is current and their role\"\"\"\n", + " if row['institution_edu'] == INSTITUTION_FILTER:\n", + " is_current_edu = pd.isna(row['end_date_edu']) or row['end_date_edu'] >= datetime.now()\n", + " has_ended_exp = pd.notna(row['end_date_exp'])\n", + " is_current = is_current_edu and not has_ended_exp\n", + "\n", + " if row['academic_role_edu']:\n", + " current_role = row['academic_role_edu']\n", + " else:\n", + " current_role = row['role_exp']\n", + "\n", + " return pd.Series({'isCurrent': is_current, 'current_role': current_role})\n", + " elif row['group_exp'] in GROUP_FILTER and (pd.isna(row['end_date_exp']) or row['end_date_exp'] >= datetime.now()):\n", + " return pd.Series({'isCurrent': True, 'current_role': row['role_exp']})\n", + " else:\n", + " current_role = row['academic_role_edu'] if row['academic_role_edu'] else row['role_exp']\n", + " return pd.Series({'isCurrent': False, 'current_role': current_role})\n", + "\n", + " def _add_projects(self, df):\n", + " \"\"\"Add current project titles to members\"\"\"\n", + " df[\"current_project_title\"] = \"\"\n", + "\n", + " common_members = df.index.intersection(self.projects_df.index)\n", + " projects_first = self.projects_df.loc[common_members].groupby(level=0).first()\n", + " df.loc[common_members, \"current_project_title\"] = projects_first[\"project_title\"]\n", + "\n", + " def _sort_by_hierarchy(self, df):\n", + " \"\"\"Sort members by role hierarchy\"\"\"\n", + " df['rank'] = df['current_role'].map(self.role_hierarchy)\n", + " df = df.sort_values(by='rank')\n", + " return df.drop(columns='rank')\n", + "\n", + " def merge_and_determine_status(self):\n", + " \"\"\"Merge edu/exp and determine current vs alumni status\"\"\"\n", + " merged = self._merge_edu_exp()\n", + " status_role = merged.apply(self._determine_status_and_role, axis=1)\n", + " merged = pd.concat([merged, status_role], axis=1)\n", + "\n", + " self.current_members = merged[merged['isCurrent']][[\"current_role\"]]\n", + " self.alumni_members = merged[~merged['isCurrent']][[\"current_role\"]]\n", + "\n", + " self.current_members_with_info = pd.merge(self.current_members, self.members_df, left_index=True, right_index=True, how='inner')\n", + " self.alumni_members_with_info = pd.merge(self.alumni_members, self.members_df, left_index=True, right_index=True, how='inner')[['current_role', 'full_name']]\n", + "\n", + " self._add_projects(self.current_members_with_info)\n", + " self.current_members_with_info = self._sort_by_hierarchy(self.current_members_with_info)\n", + "\n", + " def process(self):\n", + " \"\"\"Run full pipeline\"\"\"\n", + " self.process_education()\n", + " self.process_experiences()\n", + " self.merge_and_determine_status()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "54df076c-712d-4ce4-ae1f-0432802421f1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Members: 34\n", + "Education records: 44\n", + "Experiences records: 53\n", + "Projects records: 31\n" + ] + } + ], + "source": [ + "# Load member data\n", + "loader = MemberDataLoader()\n", + "loader.load_all_data()\n", + "\n", + "print(f\"Members: {len(loader.members_df)}\")\n", + "print(f\"Education records: {len(loader.education_df)}\")\n", + "print(f\"Experiences records: {len(loader.experiences_df)}\")\n", + "print(f\"Projects records: {len(loader.projects_df)}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "5312ec3d-4e35-4925-9e0a-e9d68060b71a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
first_namelast_nameimage_pathcover_image_pathintroductionfull_namegithub_handlelinkedinemailnick_namewebsiteorcidtwitter_handlelinkedin_handleads
id
gracie_tvrdikGracieTvrdikmedia/images/gracie.jpgmedia/images/cover.jpgI am an undergraduate student at Bowling Green...Gracie Tvrdikgracietvwww.linkedin.com/in/grayson-tvrdik-34b7872a7graysontvrdik1@gmail.comNaNNaNNaNNaNNaNNaN
josh_shieldsJoshuaShieldsmedia/images/josh_photo.jpgmedia/images/cover.jpgJosh is a senior graduate student in astrophys...Josh ShieldsjvshieldsNaNshield90@msu.eduJoshhttps://jvshields.github.io/0000-0002-1560-5286NaNNaNNaN
anirban_duttaAnirbanDuttamedia/images/anirban_dutta.jpgmedia/images/cover.jpgHi there! This is Anirban.Anirban DuttaKnights-TemplarsNaNanirbaniamdutta@gmail.comNaNhttps://sites.google.com/view/anirbaniamdutta0000-0002-7708-3831Anirban29Duttaanirban-dutta-6a0377238NaN
erin_visserErinVissermedia/images/erin_visser_website_pic.jpgmedia/images/cover.jpgNaNErin VissererinvisserNaNvisserer@msu.eduNaNNaN0009-0001-8470-275XNaNNaNNaN
abhinav_ohriAbhinavOhrimedia/images/abhinav_ohri.jpgmedia/images/cover.jpgHi there! This is Abhinav.Abhinav OhriKasukabeDefenceForceNaNabhinavohri13@gmail.comNaNNaNNaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " first_name last_name image_path \\\n", + "id \n", + "gracie_tvrdik Gracie Tvrdik media/images/gracie.jpg \n", + "josh_shields Joshua Shields media/images/josh_photo.jpg \n", + "anirban_dutta Anirban Dutta media/images/anirban_dutta.jpg \n", + "erin_visser Erin Visser media/images/erin_visser_website_pic.jpg \n", + "abhinav_ohri Abhinav Ohri media/images/abhinav_ohri.jpg \n", + "\n", + " cover_image_path \\\n", + "id \n", + "gracie_tvrdik media/images/cover.jpg \n", + "josh_shields media/images/cover.jpg \n", + "anirban_dutta media/images/cover.jpg \n", + "erin_visser media/images/cover.jpg \n", + "abhinav_ohri media/images/cover.jpg \n", + "\n", + " introduction \\\n", + "id \n", + "gracie_tvrdik I am an undergraduate student at Bowling Green... \n", + "josh_shields Josh is a senior graduate student in astrophys... \n", + "anirban_dutta Hi there! This is Anirban. \n", + "erin_visser NaN \n", + "abhinav_ohri Hi there! This is Abhinav. \n", + "\n", + " full_name github_handle \\\n", + "id \n", + "gracie_tvrdik Gracie Tvrdik gracietv \n", + "josh_shields Josh Shields jvshields \n", + "anirban_dutta Anirban Dutta Knights-Templars \n", + "erin_visser Erin Visser erinvisser \n", + "abhinav_ohri Abhinav Ohri KasukabeDefenceForce \n", + "\n", + " linkedin \\\n", + "id \n", + "gracie_tvrdik www.linkedin.com/in/grayson-tvrdik-34b7872a7 \n", + "josh_shields NaN \n", + "anirban_dutta NaN \n", + "erin_visser NaN \n", + "abhinav_ohri NaN \n", + "\n", + " email nick_name \\\n", + "id \n", + "gracie_tvrdik graysontvrdik1@gmail.com NaN \n", + "josh_shields shield90@msu.edu Josh \n", + "anirban_dutta anirbaniamdutta@gmail.com NaN \n", + "erin_visser visserer@msu.edu NaN \n", + "abhinav_ohri abhinavohri13@gmail.com NaN \n", + "\n", + " website \\\n", + "id \n", + "gracie_tvrdik NaN \n", + "josh_shields https://jvshields.github.io/ \n", + "anirban_dutta https://sites.google.com/view/anirbaniamdutta \n", + "erin_visser NaN \n", + "abhinav_ohri NaN \n", + "\n", + " orcid twitter_handle linkedin_handle \\\n", + "id \n", + "gracie_tvrdik NaN NaN NaN \n", + "josh_shields 0000-0002-1560-5286 NaN NaN \n", + "anirban_dutta 0000-0002-7708-3831 Anirban29Dutta anirban-dutta-6a0377238 \n", + "erin_visser 0009-0001-8470-275X NaN NaN \n", + "abhinav_ohri NaN NaN NaN \n", + "\n", + " ads \n", + "id \n", + "gracie_tvrdik NaN \n", + "josh_shields NaN \n", + "anirban_dutta NaN \n", + "erin_visser NaN \n", + "abhinav_ohri NaN " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loader.members_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b3545f7-65ef-45ae-93fd-433381fbbe5a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f9ef05e-fe14-41c4-9389-131d9a66f299", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "6d7f9710-9ba9-466a-8290-13c52c9e07c7", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Process current and alumni members\n", + "processor = CurrentMemberProcessor(loader.members_df, loader.education_df, loader.experiences_df, loader.projects_df)\n", + "processor.process()\n", + "\n", + "# Add academic role and project info to members_df\n", + "loader.members_df[\"academic_role\"] = \"\"\n", + "loader.members_df[\"current_project_title\"] = \"\"\n", + "\n", + "loader.members_df.loc[processor.current_members_with_info.index, \"academic_role\"] = processor.current_members_with_info[\"current_role\"]\n", + "loader.members_df.loc[processor.current_members_with_info.index, \"current_project_title\"] = processor.current_members_with_info[\"current_project_title\"]\n", + "\n", + "alumni_only = processor.alumni_members_with_info.index.difference(processor.current_members_with_info.index)\n", + "loader.members_df.loc[alumni_only, \"academic_role\"] = processor.alumni_members_with_info.loc[alumni_only, \"current_role\"]\n", + "\n", + "processor.alumni_members_with_info = processor.alumni_members_with_info.replace(\"nan\", pd.NA).fillna(\"\")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "21c8fe04-8b7e-49ab-9742-d56738fb3eb2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saved members.csv (34 members)\n", + "Saved current_members.csv (10 current members)\n", + "Saved alumni_members.csv (24 alumni members)\n" + ] + } + ], + "source": [ + "# Save to CSV\n", + "loader.members_df.to_csv(\"members.csv\")\n", + "processor.current_members_with_info.to_csv(\"current_members.csv\")\n", + "processor.alumni_members_with_info.to_csv(\"alumni_members.csv\")\n", + "loader.education_df.to_csv(\"education.csv\")\n", + "loader.experiences_df.to_csv(\"experiences.csv\")\n", + "loader.projects_df.to_csv(\"projects.csv\")\n", + "loader.awards_df.to_csv(\"awards.csv\")\n", + "loader.outreach_df.to_csv(\"outreach.csv\")\n", + "loader.documents_df.to_csv(\"documents.csv\")\n", + "\n", + "print(f\"Saved members.csv ({len(loader.members_df)} members)\")\n", + "print(f\"Saved current_members.csv ({len(processor.current_members_with_info)} current members)\")\n", + "print(f\"Saved alumni_members.csv ({len(processor.alumni_members_with_info)} alumni members)\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95f09b05-33b3-467f-ba70-2f352a19c259", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f788225-0603-4c85-91db-9a499ec3d52d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/templates/individual_person.html.j2 b/templates/individual_person.html.j2 index 82d1082..f86cb87 100644 --- a/templates/individual_person.html.j2 +++ b/templates/individual_person.html.j2 @@ -1,6 +1,14 @@ {% extends 'base.html.j2' %} {% from 'macros.html.j2' import format_years %} +{% set section_headings = { + "education": "Education", + "experiences": "Experience", + "projects": "Projects", + "awards": "Awards & Recognition", + "outreach": "Outreach Programs" +} %} + {% block title %} {{member_data["full_name"]}} | {{ general.website_title }} {% endblock %} diff --git a/templates/macros.html.j2 b/templates/macros.html.j2 index 11c2156..1621678 100644 --- a/templates/macros.html.j2 +++ b/templates/macros.html.j2 @@ -11,14 +11,11 @@ {%- endmacro -%} {%- macro format_years(row) -%} - {%- if row.get("start_date") is not none -%} - {{ row["start_date"].year }} - - {%- if row.get("end_date") is not none -%} - {{ row["end_date"].year }} - {%- else -%} - Present - {%- endif -%} - {%- elif row.get("date") is string -%} - {{ row["date"][:4] }} + {%- if row.get("start_date") is not none and row["start_date"]|string != 'NaT' -%} + {{ row["start_date"].year }} - {{ row["end_date"].year if (row.get("end_date") is not none and row["end_date"]|string != 'NaT') else "Present" }} + {%- elif row.get("end_date") is not none and row["end_date"]|string != 'NaT' -%} + {{ row["end_date"].year }} + {%- elif row.get("date") is not none and row["date"]|string != 'NaT' -%} + {{ row["date"].year }} {%- endif -%} {%- endmacro -%} \ No newline at end of file