diff --git a/.github/workflows/update_website.yml b/.github/workflows/update_website.yml
index f0aed89..6aed176 100644
--- a/.github/workflows/update_website.yml
+++ b/.github/workflows/update_website.yml
@@ -55,6 +55,10 @@ jobs:
- name: Run Notebooks
run: |
+ jupyter nbconvert --to python members.ipynb
+ python3 members.py
+ jupyter nbconvert --to python articles.ipynb
+ python3 articles.py
jupyter nbconvert --to python create_htmls.ipynb
python3 create_htmls.py
working-directory: ./groupwebsite_generator/notebooks
diff --git a/notebooks/articles.ipynb b/notebooks/articles.ipynb
new file mode 100644
index 0000000..08ede56
--- /dev/null
+++ b/notebooks/articles.ipynb
@@ -0,0 +1,1133 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "891e79e2-0a9e-4744-af34-0c53ec563a49",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import json\n",
+ "import pandas as pd\n",
+ "from pathlib import Path\n",
+ "import shutil\n",
+ "import re\n",
+ "from datetime import datetime\n",
+ "\n",
+ "# Constants\n",
+ "GROUP_DATA_DIR = Path(\"../../group-data\")\n",
+ "ARTICLE_DIR_PATH = Path(\"../../research_news/articles\")\n",
+ "HOSTING_PATH = GROUP_DATA_DIR.parent / \"kerzendorf-lab.github.io\"\n",
+ "ARTICLE_IMAGE_DESTINATION_DIR = HOSTING_PATH / \"website_files\" / \"images\" / \"article_content\"\n",
+ "\n",
+ "DEFAULT_COVER_IMAGE_HEIGHT = \"330px\"\n",
+ "DEFAULT_COVER_IMAGE_WIDTH = \"520px\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "652c31c8-4cbf-4b2e-bf6f-e8a396c85c2f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "# Utility function\n",
+ "def urlize_content(content_text, members_df, current_members_df):\n",
+ " \"\"\"Replace [member_id] with linked names\"\"\"\n",
+ " def replace_id(match):\n",
+ " id_to_fetch = match.group(1)\n",
+ " if id_to_fetch in members_df.index:\n",
+ " name = members_df.loc[id_to_fetch, 'full_name']\n",
+ " if id_to_fetch in current_members_df.index:\n",
+ " return f'{name}'\n",
+ " return name\n",
+ " return id_to_fetch.replace('_', ' ').title()\n",
+ "\n",
+ " return re.sub(r'\\[(\\w+)\\]', replace_id, content_text)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "9bf8a6eb-93c8-4831-a71d-d817b6484731",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "class ArticleDataLoader:\n",
+ " def __init__(self, article_dir: Path, image_dest_dir: Path, members_df: pd.DataFrame, current_members_df: pd.DataFrame, platform_filter: str = \"kg\"):\n",
+ " self.article_dir = article_dir\n",
+ " self.image_dest_dir = image_dest_dir\n",
+ " self.members_df = members_df\n",
+ " self.current_members_df = current_members_df\n",
+ " self.platform_filter = platform_filter\n",
+ " self.category_replacements = {\"Overview\": \"Computational Metascience\"} if platform_filter == \"kg\" else {}\n",
+ "\n",
+ " def _copy_image(self, source_dir, image_path_str):\n",
+ " \"\"\"Copy image from article media to destination, return new path\"\"\"\n",
+ " # Skip URLs\n",
+ " if image_path_str.startswith(('http://', 'https://')):\n",
+ " return image_path_str\n",
+ "\n",
+ " image_name = Path(image_path_str).name\n",
+ " source = source_dir.parent / \"media\" / \"images\" / image_name\n",
+ " dest = self.image_dest_dir / image_name\n",
+ " dest.parent.mkdir(parents=True, exist_ok=True)\n",
+ " shutil.copy2(source, dest)\n",
+ " return f\"website_files/images/article_content/{image_name}\"\n",
+ "\n",
+ " def _process_images(self, article, source_dir):\n",
+ " \"\"\"Process all images in article (cover + content)\"\"\"\n",
+ " if article[\"cover_image\"]:\n",
+ " article[\"cover_image\"] = self._copy_image(source_dir, article[\"cover_image\"])\n",
+ "\n",
+ " for key, val in article[\"content\"].items():\n",
+ " if \"img\" in key and val:\n",
+ " article[\"content\"][key] = self._copy_image(source_dir, val)\n",
+ "\n",
+ " def split_news_research(self):\n",
+ " \"\"\"Split articles into news and research dataframes\"\"\"\n",
+ " is_news = (\n",
+ " (self.articles_df[\"category\"] == \"News\") |\n",
+ " self.articles_df[\"tags\"].apply(lambda x: \"news\" in x if isinstance(x, list) else False)\n",
+ " )\n",
+ "\n",
+ " self.news_df = self.articles_df[is_news].sort_values(\"date\", ascending=False)\n",
+ " self.research_df = self.articles_df[~is_news].sort_values([\"category\", \"date\"], ascending=[True, False])\n",
+ "\n",
+ " def load_all_articles(self):\n",
+ " \"\"\"Load articles filtered by platform and date\"\"\"\n",
+ " articles = []\n",
+ " today = datetime.now()\n",
+ "\n",
+ " for info_json in self.article_dir.rglob('info.json'):\n",
+ " article = json.loads(info_json.read_text())\n",
+ "\n",
+ " if self.platform_filter not in article[\"platforms\"]:\n",
+ " continue\n",
+ "\n",
+ " article_date = pd.to_datetime(article[\"date\"], format=\"%m-%d-%Y\")\n",
+ " if article_date > today:\n",
+ " continue\n",
+ "\n",
+ " article[\"date\"] = article_date\n",
+ " self._process_images(article, info_json)\n",
+ "\n",
+ " if article[\"category\"] == \"News\" or (\"news\" in article[\"tags\"]):\n",
+ " for key, val in article[\"content\"].items():\n",
+ " if \"para\" in key:\n",
+ " article[\"content\"][key] = urlize_content(val, self.members_df, self.current_members_df)\n",
+ "\n",
+ " articles.append(article)\n",
+ "\n",
+ " self.articles_df = pd.DataFrame(articles).set_index('article_id')\n",
+ " self.articles_df[\"cover_image_height\"] = self.articles_df[\"cover_image_height\"].fillna(DEFAULT_COVER_IMAGE_HEIGHT).replace(\"\", DEFAULT_COVER_IMAGE_HEIGHT)\n",
+ " self.articles_df[\"cover_image_width\"] = self.articles_df[\"cover_image_width\"].fillna(DEFAULT_COVER_IMAGE_WIDTH).replace(\"\", DEFAULT_COVER_IMAGE_WIDTH)\n",
+ " self.articles_df[\"category\"] = self.articles_df[\"category\"].replace(self.category_replacements)\n",
+ " self.articles_df['image_name'] = self.articles_df['cover_image'].apply(lambda x: Path(x).name)\n",
+ "\n",
+ " self.split_news_research()\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "b2592170-940e-45e6-b940-166b7ccc30bb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "members_df = pd.read_csv(\"members.csv\", index_col=0)\n",
+ "current_members_df = pd.read_csv(\"current_members.csv\", index_col=0)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "d316e23c-bbb0-4b55-ac7e-baf2a03f02aa",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " first_name | \n",
+ " last_name | \n",
+ " image_path | \n",
+ " cover_image_path | \n",
+ " introduction | \n",
+ " full_name | \n",
+ " github_handle | \n",
+ " linkedin | \n",
+ " email | \n",
+ " nick_name | \n",
+ " website | \n",
+ " orcid | \n",
+ " twitter_handle | \n",
+ " linkedin_handle | \n",
+ " ads | \n",
+ " academic_role | \n",
+ " current_project_title | \n",
+ "
\n",
+ " \n",
+ " | id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | gracie_tvrdik | \n",
+ " Gracie | \n",
+ " Tvrdik | \n",
+ " media/images/gracie.jpg | \n",
+ " media/images/cover.jpg | \n",
+ " I am an undergraduate student at Bowling Green... | \n",
+ " Gracie Tvrdik | \n",
+ " gracietv | \n",
+ " www.linkedin.com/in/grayson-tvrdik-34b7872a7 | \n",
+ " graysontvrdik1@gmail.com | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " REU student | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | josh_shields | \n",
+ " Joshua | \n",
+ " Shields | \n",
+ " media/images/josh_photo.jpg | \n",
+ " media/images/cover.jpg | \n",
+ " Josh is a senior graduate student in astrophys... | \n",
+ " Josh Shields | \n",
+ " jvshields | \n",
+ " NaN | \n",
+ " shield90@msu.edu | \n",
+ " Josh | \n",
+ " https://jvshields.github.io/ | \n",
+ " 0000-0002-1560-5286 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Graduate Student | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | anirban_dutta | \n",
+ " Anirban | \n",
+ " Dutta | \n",
+ " media/images/anirban_dutta.jpg | \n",
+ " media/images/cover.jpg | \n",
+ " Hi there! This is Anirban. | \n",
+ " Anirban Dutta | \n",
+ " Knights-Templars | \n",
+ " NaN | \n",
+ " anirbaniamdutta@gmail.com | \n",
+ " NaN | \n",
+ " https://sites.google.com/view/anirbaniamdutta | \n",
+ " 0000-0002-7708-3831 | \n",
+ " Anirban29Dutta | \n",
+ " anirban-dutta-6a0377238 | \n",
+ " NaN | \n",
+ " Postdoctoral Researcher | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " first_name last_name image_path \\\n",
+ "id \n",
+ "gracie_tvrdik Gracie Tvrdik media/images/gracie.jpg \n",
+ "josh_shields Joshua Shields media/images/josh_photo.jpg \n",
+ "anirban_dutta Anirban Dutta media/images/anirban_dutta.jpg \n",
+ "\n",
+ " cover_image_path \\\n",
+ "id \n",
+ "gracie_tvrdik media/images/cover.jpg \n",
+ "josh_shields media/images/cover.jpg \n",
+ "anirban_dutta media/images/cover.jpg \n",
+ "\n",
+ " introduction \\\n",
+ "id \n",
+ "gracie_tvrdik I am an undergraduate student at Bowling Green... \n",
+ "josh_shields Josh is a senior graduate student in astrophys... \n",
+ "anirban_dutta Hi there! This is Anirban. \n",
+ "\n",
+ " full_name github_handle \\\n",
+ "id \n",
+ "gracie_tvrdik Gracie Tvrdik gracietv \n",
+ "josh_shields Josh Shields jvshields \n",
+ "anirban_dutta Anirban Dutta Knights-Templars \n",
+ "\n",
+ " linkedin \\\n",
+ "id \n",
+ "gracie_tvrdik www.linkedin.com/in/grayson-tvrdik-34b7872a7 \n",
+ "josh_shields NaN \n",
+ "anirban_dutta NaN \n",
+ "\n",
+ " email nick_name \\\n",
+ "id \n",
+ "gracie_tvrdik graysontvrdik1@gmail.com NaN \n",
+ "josh_shields shield90@msu.edu Josh \n",
+ "anirban_dutta anirbaniamdutta@gmail.com NaN \n",
+ "\n",
+ " website \\\n",
+ "id \n",
+ "gracie_tvrdik NaN \n",
+ "josh_shields https://jvshields.github.io/ \n",
+ "anirban_dutta https://sites.google.com/view/anirbaniamdutta \n",
+ "\n",
+ " orcid twitter_handle linkedin_handle \\\n",
+ "id \n",
+ "gracie_tvrdik NaN NaN NaN \n",
+ "josh_shields 0000-0002-1560-5286 NaN NaN \n",
+ "anirban_dutta 0000-0002-7708-3831 Anirban29Dutta anirban-dutta-6a0377238 \n",
+ "\n",
+ " ads academic_role current_project_title \n",
+ "id \n",
+ "gracie_tvrdik NaN REU student NaN \n",
+ "josh_shields NaN Graduate Student NaN \n",
+ "anirban_dutta NaN Postdoctoral Researcher NaN "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "members_df.head(3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "3669510d-d4a8-4afe-b3e0-1689c53d2cfe",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " current_role | \n",
+ " first_name | \n",
+ " last_name | \n",
+ " image_path | \n",
+ " cover_image_path | \n",
+ " introduction | \n",
+ " full_name | \n",
+ " github_handle | \n",
+ " linkedin | \n",
+ " email | \n",
+ " nick_name | \n",
+ " website | \n",
+ " orcid | \n",
+ " twitter_handle | \n",
+ " linkedin_handle | \n",
+ " ads | \n",
+ " current_project_title | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | wolfgang_kerzendorf | \n",
+ " Professor | \n",
+ " Wolfgang | \n",
+ " Kerzendorf | \n",
+ " media/images/wolfgang.jpg | \n",
+ " media/images/cover.jpg | \n",
+ " I am an astrophysicist deeply intrigued by nuc... | \n",
+ " Wolfgang Kerzendorf | \n",
+ " wkerzendorf | \n",
+ " NaN | \n",
+ " wkerzend@msu.edu | \n",
+ " NaN | \n",
+ " https://wolfgangkerzendorf.com | \n",
+ " 0000-0002-0479-7235 | \n",
+ " wkerzendorf | \n",
+ " wolfgang-kerzendorf-598a0466 | \n",
+ " NaN | \n",
+ " Supernovae & Computational Metaresearch | \n",
+ "
\n",
+ " \n",
+ " | connor_mcclellan | \n",
+ " Postdoctoral Researcher | \n",
+ " Connor | \n",
+ " McClellan | \n",
+ " media/images/profile.png | \n",
+ " media/images/cover.jpg | \n",
+ " I joined the TARDIS group in 2025 as a post-do... | \n",
+ " Connor McClellan | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | jing_lu | \n",
+ " Postdoctoral Researcher | \n",
+ " Jing | \n",
+ " Lu | \n",
+ " media/images/jing.jpg | \n",
+ " media/images/cover.jpg | \n",
+ " I will be joining TARDIS group in summer 2023 ... | \n",
+ " Jing Lu | \n",
+ " DeerWhale | \n",
+ " NaN | \n",
+ " lujingeve158@gmail.com | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0000-0002-3900-1452 | \n",
+ " NaN | \n",
+ " jing-lu-bb89211bb | \n",
+ " NaN | \n",
+ " Explore the hidden Helium in Type Ic Supernovae | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " current_role first_name last_name \\\n",
+ "wolfgang_kerzendorf Professor Wolfgang Kerzendorf \n",
+ "connor_mcclellan Postdoctoral Researcher Connor McClellan \n",
+ "jing_lu Postdoctoral Researcher Jing Lu \n",
+ "\n",
+ " image_path cover_image_path \\\n",
+ "wolfgang_kerzendorf media/images/wolfgang.jpg media/images/cover.jpg \n",
+ "connor_mcclellan media/images/profile.png media/images/cover.jpg \n",
+ "jing_lu media/images/jing.jpg media/images/cover.jpg \n",
+ "\n",
+ " introduction \\\n",
+ "wolfgang_kerzendorf I am an astrophysicist deeply intrigued by nuc... \n",
+ "connor_mcclellan I joined the TARDIS group in 2025 as a post-do... \n",
+ "jing_lu I will be joining TARDIS group in summer 2023 ... \n",
+ "\n",
+ " full_name github_handle linkedin \\\n",
+ "wolfgang_kerzendorf Wolfgang Kerzendorf wkerzendorf NaN \n",
+ "connor_mcclellan Connor McClellan NaN NaN \n",
+ "jing_lu Jing Lu DeerWhale NaN \n",
+ "\n",
+ " email nick_name \\\n",
+ "wolfgang_kerzendorf wkerzend@msu.edu NaN \n",
+ "connor_mcclellan NaN NaN \n",
+ "jing_lu lujingeve158@gmail.com NaN \n",
+ "\n",
+ " website orcid \\\n",
+ "wolfgang_kerzendorf https://wolfgangkerzendorf.com 0000-0002-0479-7235 \n",
+ "connor_mcclellan NaN NaN \n",
+ "jing_lu NaN 0000-0002-3900-1452 \n",
+ "\n",
+ " twitter_handle linkedin_handle ads \\\n",
+ "wolfgang_kerzendorf wkerzendorf wolfgang-kerzendorf-598a0466 NaN \n",
+ "connor_mcclellan NaN NaN NaN \n",
+ "jing_lu NaN jing-lu-bb89211bb NaN \n",
+ "\n",
+ " current_project_title \n",
+ "wolfgang_kerzendorf Supernovae & Computational Metaresearch \n",
+ "connor_mcclellan NaN \n",
+ "jing_lu Explore the hidden Helium in Type Ic Supernovae "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "current_members_df.head(3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "ffc002c3-a837-4d83-a455-cf468fa0611a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "article_loader = ArticleDataLoader(\n",
+ " ARTICLE_DIR_PATH,\n",
+ " ARTICLE_IMAGE_DESTINATION_DIR,\n",
+ " members_df,\n",
+ " current_members_df\n",
+ ")\n",
+ "article_loader.load_all_articles()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "13ce104f-a74d-4d2c-bb3a-3a057c877403",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " title | \n",
+ " author_id | \n",
+ " display | \n",
+ " date | \n",
+ " category | \n",
+ " tags | \n",
+ " platforms | \n",
+ " short_description | \n",
+ " cover_image | \n",
+ " content | \n",
+ " people_involved_ids | \n",
+ " links | \n",
+ " twitter | \n",
+ " cover_image_height | \n",
+ " cover_image_width | \n",
+ " research_id | \n",
+ " image_name | \n",
+ "
\n",
+ " \n",
+ " | article_id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | geonintern_international_ben | \n",
+ " Unveiling Earth's Secrets with AI: Our Undergr... | \n",
+ " benjamin_mellon | \n",
+ " True | \n",
+ " 2024-03-18 | \n",
+ " News | \n",
+ " [undergraduate, internship] | \n",
+ " [kg, dti] | \n",
+ " This upcoming August, Benjamin Mellon and fell... | \n",
+ " website_files/images/article_content/2BCAFnorw... | \n",
+ " {'1_para': 'This upcoming August, Benjamin Mel... | \n",
+ " [benjamin_mellon] | \n",
+ " {} | \n",
+ " None | \n",
+ " 330px | \n",
+ " 520px | \n",
+ " NaN | \n",
+ " 2BCAFnorway_geo.jpg | \n",
+ "
\n",
+ " \n",
+ " | reu_student_announcement | \n",
+ " Summer REU Students Join Kerzendorf Group | \n",
+ " richard_dow | \n",
+ " True | \n",
+ " 2023-06-23 | \n",
+ " News | \n",
+ " [New Team Member, undergraduate] | \n",
+ " [kg] | \n",
+ " Two undergraduate research assistants have joi... | \n",
+ " website_files/images/article_content/nsflogo.jpg | \n",
+ " {'1_para': 'Tripp Dow and Iliomar Rodriguez Ra... | \n",
+ " [richard_dow, iliomar_rodriguez_ramos] | \n",
+ " {} | \n",
+ " None | \n",
+ " 330px | \n",
+ " 520px | \n",
+ " NaN | \n",
+ " nsflogo.jpg | \n",
+ "
\n",
+ " \n",
+ " | prur_conference | \n",
+ " Peer Review Under Review - Workshop at Europea... | \n",
+ " vicente_amado | \n",
+ " True | \n",
+ " 2023-02-12 | \n",
+ " News | \n",
+ " [Metascience, Conference] | \n",
+ " [dti, kg] | \n",
+ " DeepThought Initiative and collaborators organ... | \n",
+ " website_files/images/article_content/img_PRUR.png | \n",
+ " {'1_para': 'Wolfgang Kerzendorf and collaborat... | \n",
+ " [vicente_amado, wolfgang_kerzendorf] | \n",
+ " {'NASA ADS': 'https://ui.adsabs.harvard.edu/ab... | \n",
+ " None | \n",
+ " 330px | \n",
+ " 520px | \n",
+ " NaN | \n",
+ " img_PRUR.png | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " title \\\n",
+ "article_id \n",
+ "geonintern_international_ben Unveiling Earth's Secrets with AI: Our Undergr... \n",
+ "reu_student_announcement Summer REU Students Join Kerzendorf Group \n",
+ "prur_conference Peer Review Under Review - Workshop at Europea... \n",
+ "\n",
+ " author_id display date category \\\n",
+ "article_id \n",
+ "geonintern_international_ben benjamin_mellon True 2024-03-18 News \n",
+ "reu_student_announcement richard_dow True 2023-06-23 News \n",
+ "prur_conference vicente_amado True 2023-02-12 News \n",
+ "\n",
+ " tags platforms \\\n",
+ "article_id \n",
+ "geonintern_international_ben [undergraduate, internship] [kg, dti] \n",
+ "reu_student_announcement [New Team Member, undergraduate] [kg] \n",
+ "prur_conference [Metascience, Conference] [dti, kg] \n",
+ "\n",
+ " short_description \\\n",
+ "article_id \n",
+ "geonintern_international_ben This upcoming August, Benjamin Mellon and fell... \n",
+ "reu_student_announcement Two undergraduate research assistants have joi... \n",
+ "prur_conference DeepThought Initiative and collaborators organ... \n",
+ "\n",
+ " cover_image \\\n",
+ "article_id \n",
+ "geonintern_international_ben website_files/images/article_content/2BCAFnorw... \n",
+ "reu_student_announcement website_files/images/article_content/nsflogo.jpg \n",
+ "prur_conference website_files/images/article_content/img_PRUR.png \n",
+ "\n",
+ " content \\\n",
+ "article_id \n",
+ "geonintern_international_ben {'1_para': 'This upcoming August, Benjamin Mel... \n",
+ "reu_student_announcement {'1_para': 'Tripp Dow and Iliomar Rodriguez Ra... \n",
+ "prur_conference {'1_para': 'Wolfgang Kerzendorf and collaborat... \n",
+ "\n",
+ " people_involved_ids \\\n",
+ "article_id \n",
+ "geonintern_international_ben [benjamin_mellon] \n",
+ "reu_student_announcement [richard_dow, iliomar_rodriguez_ramos] \n",
+ "prur_conference [vicente_amado, wolfgang_kerzendorf] \n",
+ "\n",
+ " links \\\n",
+ "article_id \n",
+ "geonintern_international_ben {} \n",
+ "reu_student_announcement {} \n",
+ "prur_conference {'NASA ADS': 'https://ui.adsabs.harvard.edu/ab... \n",
+ "\n",
+ " twitter cover_image_height cover_image_width \\\n",
+ "article_id \n",
+ "geonintern_international_ben None 330px 520px \n",
+ "reu_student_announcement None 330px 520px \n",
+ "prur_conference None 330px 520px \n",
+ "\n",
+ " research_id image_name \n",
+ "article_id \n",
+ "geonintern_international_ben NaN 2BCAFnorway_geo.jpg \n",
+ "reu_student_announcement NaN nsflogo.jpg \n",
+ "prur_conference NaN img_PRUR.png "
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "article_loader.articles_df.head(3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "5b1f488d-f94c-418c-a31c-7d6d53091cfd",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " title | \n",
+ " author_id | \n",
+ " display | \n",
+ " date | \n",
+ " category | \n",
+ " tags | \n",
+ " platforms | \n",
+ " short_description | \n",
+ " cover_image | \n",
+ " content | \n",
+ " people_involved_ids | \n",
+ " links | \n",
+ " twitter | \n",
+ " cover_image_height | \n",
+ " cover_image_width | \n",
+ " research_id | \n",
+ " image_name | \n",
+ "
\n",
+ " \n",
+ " | article_id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | stardis_release | \n",
+ " Introducing STARDIS - An Open and Modular Stel... | \n",
+ " josh_shields | \n",
+ " True | \n",
+ " 2025-08-28 | \n",
+ " News | \n",
+ " [paper, Astrophysics] | \n",
+ " [kg, tardis] | \n",
+ " We introduce STARDIS, a new open-source Python... | \n",
+ " website_files/images/article_content/halpha_so... | \n",
+ " {'1_para': 'We are excited to announce the rel... | \n",
+ " [josh_shields, wolfgang_kerzendorf, ryan_grone... | \n",
+ " {'manuscript': 'https://iopscience.iop.org/art... | \n",
+ " | \n",
+ " 330px | \n",
+ " 520px | \n",
+ " NaN | \n",
+ " halpha_sol.png | \n",
+ "
\n",
+ " \n",
+ " | tardis_summer_school_25 | \n",
+ " TARDIS Summer School 2025: Explosive Transient... | \n",
+ " josh_shields | \n",
+ " True | \n",
+ " 2025-08-15 | \n",
+ " News | \n",
+ " [Education, Summer School, TARDIS, Radiative T... | \n",
+ " [kg, tardis] | \n",
+ " We hosted a week-long summer school where 14 p... | \n",
+ " website_files/images/article_content/cachedIma... | \n",
+ " {'1_para': 'We successfully hosted the TARDIS ... | \n",
+ " [josh_shields, wolfgang_kerzendorf, jing_lu, a... | \n",
+ " {} | \n",
+ " | \n",
+ " 330px | \n",
+ " 520px | \n",
+ " NaN | \n",
+ " cachedImage.PNG | \n",
+ "
\n",
+ " \n",
+ " | thesis_defense_deeksha | \n",
+ " Deeksha Mohanty Defends Master's Thesis on Enh... | \n",
+ " deeksha_mohanty | \n",
+ " True | \n",
+ " 2025-07-07 | \n",
+ " News | \n",
+ " [Master's Thesis, talk] | \n",
+ " [kg, tardis] | \n",
+ " Deeksha Mohanty successfully defended her mast... | \n",
+ " website_files/images/article_content/defense_b... | \n",
+ " {'1_para': 'We congratulate Deeksha Mohanty on... | \n",
+ " [deeksha_mohanty] | \n",
+ " {} | \n",
+ " None | \n",
+ " 390px | \n",
+ " 520px | \n",
+ " NaN | \n",
+ " defense_before.jpeg | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " title \\\n",
+ "article_id \n",
+ "stardis_release Introducing STARDIS - An Open and Modular Stel... \n",
+ "tardis_summer_school_25 TARDIS Summer School 2025: Explosive Transient... \n",
+ "thesis_defense_deeksha Deeksha Mohanty Defends Master's Thesis on Enh... \n",
+ "\n",
+ " author_id display date category \\\n",
+ "article_id \n",
+ "stardis_release josh_shields True 2025-08-28 News \n",
+ "tardis_summer_school_25 josh_shields True 2025-08-15 News \n",
+ "thesis_defense_deeksha deeksha_mohanty True 2025-07-07 News \n",
+ "\n",
+ " tags \\\n",
+ "article_id \n",
+ "stardis_release [paper, Astrophysics] \n",
+ "tardis_summer_school_25 [Education, Summer School, TARDIS, Radiative T... \n",
+ "thesis_defense_deeksha [Master's Thesis, talk] \n",
+ "\n",
+ " platforms \\\n",
+ "article_id \n",
+ "stardis_release [kg, tardis] \n",
+ "tardis_summer_school_25 [kg, tardis] \n",
+ "thesis_defense_deeksha [kg, tardis] \n",
+ "\n",
+ " short_description \\\n",
+ "article_id \n",
+ "stardis_release We introduce STARDIS, a new open-source Python... \n",
+ "tardis_summer_school_25 We hosted a week-long summer school where 14 p... \n",
+ "thesis_defense_deeksha Deeksha Mohanty successfully defended her mast... \n",
+ "\n",
+ " cover_image \\\n",
+ "article_id \n",
+ "stardis_release website_files/images/article_content/halpha_so... \n",
+ "tardis_summer_school_25 website_files/images/article_content/cachedIma... \n",
+ "thesis_defense_deeksha website_files/images/article_content/defense_b... \n",
+ "\n",
+ " content \\\n",
+ "article_id \n",
+ "stardis_release {'1_para': 'We are excited to announce the rel... \n",
+ "tardis_summer_school_25 {'1_para': 'We successfully hosted the TARDIS ... \n",
+ "thesis_defense_deeksha {'1_para': 'We congratulate Deeksha Mohanty on... \n",
+ "\n",
+ " people_involved_ids \\\n",
+ "article_id \n",
+ "stardis_release [josh_shields, wolfgang_kerzendorf, ryan_grone... \n",
+ "tardis_summer_school_25 [josh_shields, wolfgang_kerzendorf, jing_lu, a... \n",
+ "thesis_defense_deeksha [deeksha_mohanty] \n",
+ "\n",
+ " links \\\n",
+ "article_id \n",
+ "stardis_release {'manuscript': 'https://iopscience.iop.org/art... \n",
+ "tardis_summer_school_25 {} \n",
+ "thesis_defense_deeksha {} \n",
+ "\n",
+ " twitter cover_image_height cover_image_width \\\n",
+ "article_id \n",
+ "stardis_release 330px 520px \n",
+ "tardis_summer_school_25 330px 520px \n",
+ "thesis_defense_deeksha None 390px 520px \n",
+ "\n",
+ " research_id image_name \n",
+ "article_id \n",
+ "stardis_release NaN halpha_sol.png \n",
+ "tardis_summer_school_25 NaN cachedImage.PNG \n",
+ "thesis_defense_deeksha NaN defense_before.jpeg "
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "article_loader.news_df.head(3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "08700e38-47ff-49d6-9354-34d9d1146644",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " title | \n",
+ " author_id | \n",
+ " display | \n",
+ " date | \n",
+ " category | \n",
+ " tags | \n",
+ " platforms | \n",
+ " short_description | \n",
+ " cover_image | \n",
+ " content | \n",
+ " people_involved_ids | \n",
+ " links | \n",
+ " twitter | \n",
+ " cover_image_height | \n",
+ " cover_image_width | \n",
+ " research_id | \n",
+ " image_name | \n",
+ "
\n",
+ " \n",
+ " | article_id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | midsure22_poster_bea | \n",
+ " MIDSURE 2022 | \n",
+ " bea_lu | \n",
+ " True | \n",
+ " 2022-07-22 | \n",
+ " Computational Metascience | \n",
+ " [research] | \n",
+ " [kg, dti] | \n",
+ " Poster presentation at the Mid-Michigan Sympos... | \n",
+ " website_files/images/article_content/bea_midsu... | \n",
+ " {'1_para': 'Abstract: Interdisciplinary scient... | \n",
+ " [bea_lu, vicente_amado, wolfgang_kerzendorf] | \n",
+ " {} | \n",
+ " None | \n",
+ " 330px | \n",
+ " 520px | \n",
+ " NaN | \n",
+ " bea_midsure_poster.jpg | \n",
+ "
\n",
+ " \n",
+ " | uuraf21_poster_vicente | \n",
+ " MSU UURAF 2021 | \n",
+ " vicente_amado | \n",
+ " True | \n",
+ " 2021-04-19 | \n",
+ " Computational Metascience | \n",
+ " [research] | \n",
+ " [kg, dti] | \n",
+ " Poster presentation for MSU's University Under... | \n",
+ " website_files/images/article_content/MAST_Post... | \n",
+ " {'1_para': 'Abstract: The modern scientific co... | \n",
+ " [vicente_amado, wolfgang_kerzendorf, jack_o_br... | \n",
+ " {} | \n",
+ " None | \n",
+ " 330px | \n",
+ " 520px | \n",
+ " NaN | \n",
+ " MAST_Poster.jpg | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " title author_id display date \\\n",
+ "article_id \n",
+ "midsure22_poster_bea MIDSURE 2022 bea_lu True 2022-07-22 \n",
+ "uuraf21_poster_vicente MSU UURAF 2021 vicente_amado True 2021-04-19 \n",
+ "\n",
+ " category tags platforms \\\n",
+ "article_id \n",
+ "midsure22_poster_bea Computational Metascience [research] [kg, dti] \n",
+ "uuraf21_poster_vicente Computational Metascience [research] [kg, dti] \n",
+ "\n",
+ " short_description \\\n",
+ "article_id \n",
+ "midsure22_poster_bea Poster presentation at the Mid-Michigan Sympos... \n",
+ "uuraf21_poster_vicente Poster presentation for MSU's University Under... \n",
+ "\n",
+ " cover_image \\\n",
+ "article_id \n",
+ "midsure22_poster_bea website_files/images/article_content/bea_midsu... \n",
+ "uuraf21_poster_vicente website_files/images/article_content/MAST_Post... \n",
+ "\n",
+ " content \\\n",
+ "article_id \n",
+ "midsure22_poster_bea {'1_para': 'Abstract: Interdisciplinary scient... \n",
+ "uuraf21_poster_vicente {'1_para': 'Abstract: The modern scientific co... \n",
+ "\n",
+ " people_involved_ids \\\n",
+ "article_id \n",
+ "midsure22_poster_bea [bea_lu, vicente_amado, wolfgang_kerzendorf] \n",
+ "uuraf21_poster_vicente [vicente_amado, wolfgang_kerzendorf, jack_o_br... \n",
+ "\n",
+ " links twitter cover_image_height cover_image_width \\\n",
+ "article_id \n",
+ "midsure22_poster_bea {} None 330px 520px \n",
+ "uuraf21_poster_vicente {} None 330px 520px \n",
+ "\n",
+ " research_id image_name \n",
+ "article_id \n",
+ "midsure22_poster_bea NaN bea_midsure_poster.jpg \n",
+ "uuraf21_poster_vicente NaN MAST_Poster.jpg "
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "article_loader.research_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "6c80e21f-b016-46ce-9242-453e90ad192a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "article_loader.articles_df.to_csv(\"articles.csv\")\n",
+ "article_loader.news_df.to_csv(\"news.csv\")\n",
+ "article_loader.research_df.to_csv(\"research.csv\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "29c2640e-2024-4ac8-8251-ba66450f4359",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/create_htmls.ipynb b/notebooks/create_htmls.ipynb
index 355848f..3775210 100644
--- a/notebooks/create_htmls.ipynb
+++ b/notebooks/create_htmls.ipynb
@@ -1,1500 +1,480 @@
{
"cells": [
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {
- "jp-MarkdownHeadingCollapsed": true
- },
- "source": [
- "### This notebook consist of code for creating the html files for the website each time data is updated."
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Set-up"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Importing classes"
- ]
- },
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2023-11-21T07:07:36.145701234Z",
- "start_time": "2023-11-21T07:07:35.993568325Z"
- }
- },
+ "execution_count": 1,
+ "id": "788cfd21-41e7-4af6-9c39-61bc422e98d5",
+ "metadata": {},
"outputs": [],
"source": [
+ "import ast\n",
"import json\n",
"import pandas as pd\n",
- "from jinja2 import Environment, FileSystemLoader\n",
"from pathlib import Path\n",
"import shutil\n",
- "from datetime import datetime, date\n",
- "import numpy as np\n",
- "from PIL import Image"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Defining paths"
+ "from PIL import Image\n",
+ "from jinja2 import Environment, FileSystemLoader\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2023-11-21T07:07:36.276371007Z",
- "start_time": "2023-11-21T07:07:36.010519700Z"
- }
- },
+ "execution_count": 2,
+ "id": "25047e15-0a70-4d44-921a-edbda7e3b938",
+ "metadata": {},
"outputs": [],
"source": [
+ "# Constants\n",
+ "CSV_DIR_PATH = Path(\".\")\n",
"GROUP_DATA_DIR = Path(\"../../group-data\")\n",
+ "HOSTING_PATH = GROUP_DATA_DIR.parent / \"kerzendorf-lab.github.io\"\n",
"TEMPLATE_DIR_PATH = GROUP_DATA_DIR.parent / \"groupwebsite_generator\" / \"templates\"\n",
"WEBSITE_DATA_PATH = GROUP_DATA_DIR / \"website_data/\"\n",
- "HOSTING_PATH = GROUP_DATA_DIR.parent / \"kerzendorf-lab.github.io\"\n",
- "ARTICLE_DIR_PATH = Path(\"../../research_news/articles\")\n",
- "ARTICLE_IMAGE_DESTINATION_DIR = (HOSTING_PATH / \"website_files\" / \"images\" / \"article_content\")\n",
- "MEMBERS_DIR_PATH = GROUP_DATA_DIR / \"members/\"\n",
+ "GALLERY_CONTENT_SOURCE = WEBSITE_DATA_PATH / \"content\" / \"gallery\"\n",
+ "SOURCE_ASSETS = GROUP_DATA_DIR.parent / \"groupwebsite_generator\" / \"assets\"\n",
"SUB_RESEARCH_PATH = HOSTING_PATH / \"sub_research\"\n",
"OPPORTUNITIES_PATH = WEBSITE_DATA_PATH / \"content\" / \"opportunities.json\"\n",
- "ROLE_HIERARCHY_PATH = WEBSITE_DATA_PATH / \"role_hierarchy.json\"\n",
- "\n",
- "GENERAL_TAGS = [\n",
- " \"Paper\",\n",
- " \"Poster\", \n",
- " \"Talk\",\n",
- " \"Award\",\n",
- " \"New Team Member\",\n",
- " \"PhD\",\n",
- " \"Conference\",\n",
- " \"Undergraduate\",\n",
- " \"Event\",\n",
- " \"Achievement\"\n",
- "]\n",
"\n",
- "# Define tag colors mapping\n",
"TAG_COLORS = {\n",
- " 'paper': '#FF6B6B', # Coral red\n",
- " 'poster': '#4ECDC4', # Turquoise\n",
- " 'talk': '#45B7D1', # Light blue\n",
- " 'award': '#96CEB4', # Sage green\n",
- " 'new team member': '#FFBE0B', # Golden yellow\n",
- " 'phd': '#9B5DE5', # Purple\n",
- " 'conference': '#FF006E', # Pink\n",
- " 'undergraduate': '#8338EC', # Violet\n",
- " 'event': '#3A86FF', # Royal blue\n",
- " 'achievement': '#FB5607', # Orange\n",
- " 'astrophysics': '#2EC4B6', # Teal\n",
- " 'machine learning': '#FF9F1C', # Light orange\n",
- " 'software': '#E71D36', # Bright red\n",
- " 'research': '#011627', # Dark blue\n",
- " 'news': '#41EAD4' # Cyan\n",
+ " 'paper': '#FF6B6B',\n",
+ " 'poster': '#4ECDC4',\n",
+ " 'talk': '#45B7D1',\n",
+ " 'award': '#96CEB4',\n",
+ " 'new team member': '#FFBE0B',\n",
+ " 'phd': '#9B5DE5',\n",
+ " 'conference': '#FF006E',\n",
+ " 'undergraduate': '#8338EC',\n",
+ " 'event': '#3A86FF',\n",
+ " 'achievement': '#FB5607',\n",
+ " 'astrophysics': '#2EC4B6',\n",
+ " 'machine learning': '#FF9F1C',\n",
+ " 'software': '#E71D36',\n",
+ " 'research': '#011627',\n",
+ " 'news': '#41EAD4'\n",
"}"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "jp-MarkdownHeadingCollapsed": true
- },
- "source": [
- "Setting up jinja environment"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2023-11-21T07:07:36.311056453Z",
- "start_time": "2023-11-21T07:07:36.038221785Z"
- }
- },
- "outputs": [],
+ "id": "4f3ee0ad-abd4-4603-a275-2ffad7469a12",
+ "metadata": {},
"source": [
- "# Function to create proper HTML file names by replacing spaces with underscores\n",
- "def page_link(a):\n",
- " \"\"\"Return the HTML file name after replacing blank spaces(\" \") with underscores(\"-\")\"\"\"\n",
- " return a.replace(\" \", \"_\") if \" \" in a else a\n",
- "\n",
- "# Function to get tag color, returns a default if tag not in mapping\n",
- "def get_tag_color(tag):\n",
- " \"\"\"Get color for a specific tag, with fallback to default\"\"\"\n",
- " tag = tag.lower()\n",
- " return TAG_COLORS.get(tag, '#6c757d') # Default gray if tag not found\n",
- "\n"
+ "# Setup Jinja2 environment"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
+ "id": "83b7c7e4-fd73-46eb-ab6e-70b179e2dd66",
"metadata": {},
"outputs": [],
"source": [
+ "# Setup Jinja2 environment\n",
"environment = Environment(\n",
" loader=FileSystemLoader(TEMPLATE_DIR_PATH), extensions=[\"jinja2.ext.loopcontrols\", \"jinja2.ext.do\"]\n",
")\n",
- "environment.globals[\"page_link\"] = page_link\n",
- "# Add tag colors to jinja environment globals\n",
- "environment.globals['tag_colors'] = TAG_COLORS\n",
- "environment.globals['get_tag_color'] = get_tag_color"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Data Processing Setup"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Data Processing Parameters"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2023-11-21T07:07:36.308005773Z",
- "start_time": "2023-11-21T07:07:36.024555660Z"
- }
- },
- "outputs": [],
- "source": [
- "# Needed columns for articles\n",
- "ARTICLE_METADATA_FIELDS = [\n",
- " \"article_id\",\n",
- " \"category\",\n",
- " \"date\",\n",
- " \"tags\",\n",
- " \"title\",\n",
- " \"cover_image\",\n",
- " \"short_description\"\n",
- "]\n",
- "# Groups and institution used in filtering data\n",
- "GROUP_FILTER = [\"DTI\", \"TARDIS\", \"ICER\", \"kerzendorf\"]\n",
- "INSTITUTION_FILTER = \"Michigan State University\"\n",
"\n",
- "# Map roles to standardized roles for consistency\n",
- "ROLE_MAP = {\n",
- " \"Assistant Professor\": \"Professor\",\n",
- " \"Professorial Assistant\": \"Undergraduate Student\",\n",
- " \"Visiting Researcher\": \"Postdoctoral Researcher\"\n",
- "}\n",
+ "# Helper Functions\n",
+ "def page_link(a):\n",
+ " \"\"\"Return the HTML file name after replacing blank spaces with underscores\"\"\"\n",
+ " return a.replace(\" \", \"_\") if \" \" in a else a\n",
+ "def get_tag_color(tag):\n",
+ " \"\"\"Get color for a specific tag, with fallback to default\"\"\"\n",
+ " return TAG_COLORS.get(tag.lower(), '#6c757d')\n",
"\n",
- "# Map degrees to standardized academic levels\n",
- "DEGREE_MAP = {\n",
- " \"Masters\": \"Graduate Student\",\n",
- " \"PhD\": \"Postdoctorate\", # if end_date is present\n",
- " \"Bachelors\": \"Undergraduate Student\",\n",
- "}\n",
+ "environment.globals[\"page_link\"] = page_link\n",
+ "environment.globals['tag_colors'] = TAG_COLORS\n",
+ "environment.globals['get_tag_color'] = get_tag_color\n",
"\n",
- "INDIVIDUAL_MEMBER_SECTION_MAP = {\n",
- " \"education\": \"Education\",\n",
- " \"experiences\": \"Experience\",\n",
- " \"projects\": \"Projects\",\n",
- " \"awards\": \"Awards & Recognition\",\n",
- " \"outreach\": \"Outreach Programs\",\n",
- "}"
+ "def create_page(template, html, **kwargs):\n",
+ " \"\"\"Create an HTML page using a Jinja2 template and save it to a specified path\"\"\"\n",
+ " page_template = environment.get_template(template)\n",
+ " template_level = html.count(\"/\")\n",
+ " page_html_path = HOSTING_PATH / html\n",
+ " page_html_path.parent.mkdir(parents=True, exist_ok=True)\n",
+ " page_content = page_template.render(TEMPLATE_LEVEL=template_level, **kwargs)\n",
+ " with open(page_html_path, mode=\"w\", encoding=\"utf-8\") as page:\n",
+ " page.write(page_content)"
]
},
{
"cell_type": "markdown",
+ "id": "d2784b7b-9456-4fd3-92a4-baf46a5a09d5",
"metadata": {},
"source": [
- "# Functions for Data Handling"
+ "# Read Data"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
+ "id": "00852806-a096-462c-be87-2ad74af2869e",
"metadata": {},
"outputs": [],
"source": [
- "def loading_website_data(file_to_load):\n",
- " \"\"\"\n",
- " Load data from JSON files specified in a list of file names.\n",
- "\n",
- " Parameters:\n",
- " ----------\n",
- " json_data_list : list of str\n",
- " A list of file names (without extension) to load as JSON.\n",
+ "# Load member and article data from CSVs\n",
+ "members_df = pd.read_csv(CSV_DIR_PATH / \"members.csv\", index_col=0)\n",
+ "education_df = pd.read_csv(CSV_DIR_PATH / \"education.csv\", index_col=0)\n",
+ "experiences_df = pd.read_csv(CSV_DIR_PATH / \"experiences.csv\", index_col=0)\n",
+ "projects_df = pd.read_csv(CSV_DIR_PATH / \"projects.csv\", index_col=0)\n",
+ "awards_df = pd.read_csv(CSV_DIR_PATH / \"awards.csv\", index_col=0)\n",
+ "outreach_df = pd.read_csv(CSV_DIR_PATH / \"outreach.csv\", index_col=0)\n",
+ "documents_df = pd.read_csv(CSV_DIR_PATH / \"documents.csv\", index_col=0)\n",
"\n",
- " Returns:\n",
- " -------\n",
- " dict\n",
- " A dictionary where keys are file names and values are the corresponding JSON data.\n",
- "\n",
- " Raises:\n",
- " ------\n",
- " FileNotFoundError:\n",
- " If a specified file does not exist.\n",
- " json.JSONDecodeError:\n",
- " If there's an issue decoding the JSON content from a file.\n",
- "\n",
- " \"\"\"\n",
- " loaded_data = {}\n",
- " file_matches = WEBSITE_DATA_PATH/ f\"{file_to_load}.json\"\n",
- " if file_matches:\n",
- " try:\n",
- " with open(file_matches, \"r\") as json_file:\n",
- " loaded_data = json.load(json_file)\n",
- " except json.JSONDecodeError:\n",
- " print(f\"Error decoding JSON in '{file_matches}'.\")\n",
- " else:\n",
- " print(f\"File '{file_to_load}.json' not found.\")\n",
- "\n",
- " return loaded_data"
+ "current_members_with_info = pd.read_csv(CSV_DIR_PATH / \"current_members.csv\", index_col=0)\n",
+ "# Replace NaN with empty string for current_project_title to avoid displaying \"nan\"\n",
+ "current_members_with_info['current_project_title'] = current_members_with_info['current_project_title'].fillna('')\n",
+ "alumni_members_with_info = pd.read_csv(CSV_DIR_PATH / \"alumni_members.csv\", index_col=0)\n",
+ "articles_df = pd.read_csv(CSV_DIR_PATH / \"articles.csv\", index_col=0, parse_dates=['date'])\n",
+ "news_df = pd.read_csv(CSV_DIR_PATH / \"news.csv\", index_col=0, parse_dates=['date'])\n",
+ "research_df = pd.read_csv(CSV_DIR_PATH / \"research.csv\", index_col=0, parse_dates=['date'])\n",
+ "\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
+ "id": "8489c0c4-db14-4437-926e-a038f3dc3db7",
"metadata": {},
"outputs": [],
"source": [
- "def read_member_data_jsons(file_to_read):\n",
- " member_data_list = []\n",
- " member_data_df = pd.DataFrame([])\n",
- " for single_info_file_path in MEMBERS_DIR_PATH.glob(\"*/info.json\"):\n",
- " with open(single_info_file_path, \"r\") as f_info:\n",
- " member_data = json.load(f_info)\n",
- " member_unique_id = member_data[\"id\"]\n",
- " file_to_read_path = single_info_file_path.parent / \"jsons\" / file_to_read\n",
+ "# Parse dates for dataframes with date columns\n",
+ "for df in [education_df, experiences_df, projects_df, outreach_df]:\n",
+ " if 'start_date' in df.columns:\n",
+ " df['start_date'] = pd.to_datetime(df['start_date'])\n",
+ " if 'end_date' in df.columns:\n",
+ " df['end_date'] = pd.to_datetime(df['end_date'])\n",
"\n",
- " if file_to_read_path.exists():\n",
- " with file_to_read_path.open(\"r\") as f_data:\n",
- " member_other_data = json.load(f_data)\n",
- " for entry in member_other_data:\n",
- " entry[\"id\"] = member_unique_id\n",
- " member_data_list.append(\n",
- " pd.DataFrame(member_other_data)\n",
- " )\n",
- " # else:\n",
- " # data_path_in_kl = KERZENDORF_GROUP_DATA / \"members\" / member_unique_id / \"jsons\" / file_to_read\n",
- " # if data_path_in_kl.exists():\n",
- " # with data_path_in_kl.open(\"r\") as data_file:\n",
- " # member_other_data_kl = json.load(data_file)\n",
- " # for entry in member_other_data_kl:\n",
- " # entry[\"id\"] = member_unique_id\n",
- " # member_data_list.append(\n",
- " # pd.DataFrame(member_other_data_kl)\n",
- " # )\n",
- "\n",
- " if member_data_list:\n",
- " member_data_df = pd.concat(\n",
- " member_data_list, ignore_index=True\n",
- " )\n",
- " member_data_df.set_index(\"id\", inplace=True)\n",
- "\n",
- " return member_data_df"
+ "# Awards has additional 'date' column\n",
+ "if 'date' in awards_df.columns:\n",
+ " awards_df['date'] = pd.to_datetime(awards_df['date'])\n",
+ "if 'start_date' in awards_df.columns:\n",
+ " awards_df['start_date'] = pd.to_datetime(awards_df['start_date'])\n",
+ "if 'end_date' in awards_df.columns:\n",
+ " awards_df['end_date'] = pd.to_datetime(awards_df['end_date'])"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
+ "id": "83d6e833-c0f7-4818-a362-a8e36d580614",
"metadata": {},
"outputs": [],
"source": [
- "def set_new_image_path(source_dir, old_image_path):\n",
- " article_image_path = source_dir.parent / \"media\" / \"images\"\n",
- " image_source = article_image_path / old_image_path.name\n",
- " image_destination = ARTICLE_IMAGE_DESTINATION_DIR / old_image_path.name\n",
- "\n",
- " # Create destination directory if it doesn't exist\n",
- " image_destination.parent.mkdir(parents=True, exist_ok=True)\n",
- "\n",
- " website_files_index = image_destination.parts.index(\"website_files\")\n",
- " new_image_path = Path(*image_destination.parts[website_files_index:])\n",
- " shutil.copy2(image_source, image_destination)\n",
- " return str(new_image_path)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# DataFrame Creation and Processing"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Creating dataframes for articles which can be updated further "
+ "# Convert string columns back to their original types\n",
+ "for df in [articles_df, news_df, research_df]:\n",
+ " df['content'] = df['content'].apply(ast.literal_eval)\n",
+ " df['links'] = df['links'].apply(ast.literal_eval)\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2023-11-21T07:07:36.355136270Z",
- "start_time": "2023-11-21T07:07:36.080422459Z"
- }
- },
+ "execution_count": 7,
+ "id": "8b9d6812-991d-4072-aaa2-b9bc185ee83f",
+ "metadata": {},
"outputs": [],
"source": [
- "# Reading all articles\n",
- "article_content_list = []\n",
- "today = date.today()\n",
- "for content_file_name in ARTICLE_DIR_PATH.rglob('info.json'):\n",
- " with open(content_file_name, \"r\") as fcontent:\n",
- " article_content = json.load(fcontent)\n",
- " today_datetime = datetime.combine(today, datetime.min.time())\n",
- " article_date = datetime.strptime(article_content[\"date\"], \"%m-%d-%Y\")\n",
- " if \"kg\" in article_content[\"platforms\"] and article_date <= today_datetime:\n",
- " image_path = Path(article_content[\"cover_image\"])\n",
- " article_content[\"cover_image\"] = set_new_image_path(content_file_name, image_path)\n",
- " for content_key, content_value in article_content[\"content\"].items():\n",
- " if \"img\" in content_key:\n",
- " new_content_value = set_new_image_path(content_file_name, Path(content_value))\n",
- " article_content[\"content\"][content_key] = new_content_value\n",
- " article_content_list.append(article_content)\n",
- "article_content_df = pd.DataFrame(article_content_list)\n",
- "\n",
- "article_content_df[\"date\"] = pd.to_datetime(\n",
- " article_content_df[\"date\"], format=\"%m-%d-%Y\"\n",
- ")\n",
- "\n",
- "article_content_df[\"cover_image_height\"] = (\n",
- " article_content_df[\"cover_image_height\"].fillna(\"330px\").replace(\"\", \"330px\")\n",
- ")\n",
- "article_content_df[\"cover_image_width\"] = (\n",
- " article_content_df[\"cover_image_width\"].fillna(\"520px\").replace(\"\", \"520px\")\n",
- ")\n",
+ "# Group dataframes\n",
+ "def group_df(df):\n",
+ " \"\"\"Group dataframe by index and convert to nested dict format for templates\"\"\"\n",
+ " return df.fillna(\"\").groupby(level=0).apply(lambda x: x.to_dict('records')).to_frame('info').to_dict('index')\n",
"\n",
- "#THis line is only for kerzendorf lab and is not needed on dti\n",
- "article_content_df[\"category\"] = article_content_df[\"category\"].replace(\n",
- " \"Overview\", \"Computational Metascience\"\n",
- ")\n",
"\n",
- "article_content_df['image_name'] = article_content_df['cover_image'].apply(lambda x: Path(x).name)"
+ "education = group_df(education_df)\n",
+ "experience = group_df(experiences_df)\n",
+ "projects = group_df(projects_df)\n",
+ "awards = group_df(awards_df)\n",
+ "outreach = group_df(outreach_df)\n",
+ "documents = group_df(documents_df)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
+ "id": "9b2c0b5c-a413-4484-ad35-ef104801679b",
"metadata": {},
"outputs": [],
"source": [
- "news_content_df = article_content_df[\n",
- " (article_content_df[\"category\"] == \"News\")\n",
- " | (\n",
- " article_content_df[\"tags\"].apply(\n",
- " lambda x: \"news\" in x if isinstance(x, list) else False\n",
- " )\n",
- " )\n",
- "].sort_values(by=[\"date\"], ascending=[False])\n",
- "\n",
- "research_content_df = article_content_df[\n",
- " article_content_df[\"category\"] != \"News\"\n",
- "].sort_values(by=[\"category\", \"date\"], ascending=[True, False])"
+ "# Setup socials and dicts\n",
+ "# Load social fields from schema\n",
+ "social_schema_path = GROUP_DATA_DIR / \"schemas/members/social_links.json\"\n",
+ "social_schema = json.loads(social_schema_path.read_text())\n",
+ "social_cols = list(social_schema['properties'].keys())\n",
+ "socials = members_df[social_cols].fillna('').to_dict('index')"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
+ "id": "8f1b6484-021f-4df7-8b30-ddbf5d350fbe",
"metadata": {},
"outputs": [],
"source": [
- "info_json_list = []\n",
- "for single_info_file_path in MEMBERS_DIR_PATH.glob(\"*/info.json\"):\n",
- " with open(single_info_file_path, \"r\") as f_info:\n",
- " member_data = json.load(f_info)\n",
- " # if len(member_data.keys()) == 1:\n",
- " # info_json_path = (\n",
- " # KERZENDORF_GROUP_DATA / \"members\" / member_data[\"id\"] / \"info.json\"\n",
- " # )\n",
- " # member_images_dir = HOSTING_PATH / \"members\" / member_data[\"id\"] / \"media\"\n",
- " # with open(info_json_path, \"r\") as f_info_kl:\n",
- " # member_data_from_kl = json.load(f_info_kl)\n",
- " # member_images_dir_source = (\n",
- " # KERZENDORF_GROUP_DATA / \"members\" / member_data[\"id\"] / \"media\"\n",
- " # )\n",
- "\n",
- " # shutil.copytree(member_images_dir_source, member_images_dir, dirs_exist_ok=True)\n",
- " # info_json_list.append(member_data_from_kl)\n",
- " # else:\n",
- " info_json_list.append(member_data)\n",
- "info_json_df = pd.DataFrame(info_json_list)\n",
- "info_json_df.set_index(\"id\", inplace=True)\n",
- "info_json_df[\"full_name\"] = info_json_df.apply(\n",
- " lambda row: (\n",
- " row[\"nick_name\"] + \" \" + row[\"last_name\"]\n",
- " if pd.notna(row[\"nick_name\"])\n",
- " else row[\"first_name\"] + \" \" + row[\"last_name\"]\n",
- " ),\n",
- " axis=1,\n",
- ")\n",
- "info_json_dict = info_json_df.to_dict(\"index\")"
+ "all_members_dict = members_df.to_dict(\"index\")\n",
+ "all_articles_dict = {\n",
+ " aid: {**data, 'article_id': aid}\n",
+ " for aid, data in articles_df.to_dict(\"index\").items()\n",
+ "}\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
+ "id": "22ab9d8b-7bc2-4e85-9801-2d6414106978",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Created 34 individual member pages\n"
+ ]
+ }
+ ],
"source": [
- "exp_df = read_member_data_jsons(\"experiences.json\")[\n",
- " [\n",
- " \"role\",\n",
- " \"start_date\",\n",
- " \"end_date\",\n",
- " \"institution\",\n",
- " \"group\",\n",
- " ]\n",
- "]\n",
- "edu_df = read_member_data_jsons(\"education.json\")[\n",
- " [\"start_date\", \"end_date\", \"institution\", \"subject\", \"degree\"]\n",
- "]"
+ "general = json.loads((WEBSITE_DATA_PATH / \"general.json\").read_text())\n",
+ "\n",
+ "for person_id, person_data in members_df.iterrows():\n",
+ " create_page(\n",
+ " \"individual_person.html.j2\",\n",
+ " f\"members/{person_id}/{person_id}.html\",\n",
+ " general=general,\n",
+ " member_id=person_id,\n",
+ " member_data=person_data,\n",
+ " socials=socials,\n",
+ " documents=documents,\n",
+ " education=education,\n",
+ " experience=experience,\n",
+ " projects=projects,\n",
+ " awards=awards,\n",
+ " outreach=outreach,\n",
+ " content=all_articles_dict,\n",
+ " )\n",
+ "print(f\"Created {len(members_df)} individual member pages\")\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
+ "id": "c7e55573-9b43-44e2-b214-88fd870724ee",
"metadata": {},
"outputs": [],
"source": [
- "edu_df['end_date'] = pd.to_datetime(edu_df['end_date'], format='%Y-%m-%d')\n",
- "edu_df['start_date'] = pd.to_datetime(edu_df['start_date'], format='%Y-%m-%d')\n",
+ "# Copy assets and load JSON files\n",
+ "shutil.copytree(SOURCE_ASSETS, HOSTING_PATH / \"assets\", dirs_exist_ok=True)\n",
"\n",
- "def most_recent_row(group):\n",
- " sorted_group = group.sort_values(by=['start_date', 'end_date'], ascending=[False, True])\n",
- " return sorted_group.iloc[0:1] # Force single row\n",
+ "homepage = json.loads((WEBSITE_DATA_PATH / \"homepage.json\").read_text())\n",
+ "contact = json.loads((WEBSITE_DATA_PATH / \"contact.json\").read_text())\n",
+ "support = json.loads((WEBSITE_DATA_PATH / \"support.json\").read_text())\n",
+ "research = json.loads((WEBSITE_DATA_PATH / \"research_categories.json\").read_text())\n",
"\n",
- " \n",
- "edu_df_most_recent = (\n",
- " edu_df.groupby(\"id\").apply(most_recent_row).droplevel(0)\n",
- ")\n",
+ "# Get recent content for homepage\n",
+ "recent_content_df = articles_df.sort_values(\n",
+ " [\"category\", \"date\"], ascending=[True, False]\n",
+ ").groupby(\"category\").head(1)\n",
"\n",
"\n",
- "edu_df_most_recent['academic_role'] = \"\"\n",
- "for edu_mem_id, edu_mem_value in edu_df_most_recent.iterrows():\n",
- " if edu_mem_value['institution'] == INSTITUTION_FILTER:\n",
- " if edu_mem_value['degree'] == \"Bachelors\":\n",
- " edu_df_most_recent.at[edu_mem_id, 'academic_role'] = \"Undergraduate Student\"\n",
- " elif edu_mem_value['degree'] in [\"PhD\", \"Masters\"]:\n",
- " edu_df_most_recent.at[edu_mem_id, 'academic_role'] = \"Graduate Student\"\n",
- "edu_df_most_recent_diff_suffix = edu_df_most_recent.add_suffix(\"_edu\")"
+ "# Create homepage\n",
+ "create_page(\n",
+ " \"homepage.html.j2\",\n",
+ " \"index.html\",\n",
+ " general=general,\n",
+ " homepage=homepage,\n",
+ " recent_content=recent_content_df.reset_index().to_dict(orient=\"records\"),\n",
+ ")\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
+ "id": "bf471fde-5a05-4ddd-9ae8-038f6f35e9be",
"metadata": {},
"outputs": [],
"source": [
- "social_link_list = []\n",
- "for single_member_file_path in MEMBERS_DIR_PATH.rglob(\"social_links.json\"):\n",
- " with open(single_member_file_path, \"r\") as fname:\n",
- " member_social_link = json.load(fname)\n",
- " info_json_file_path = single_member_file_path.parent.parent / \"info.json\"\n",
- " with open(info_json_file_path, \"r\") as file_info:\n",
- " member_info_data = json.load(file_info)\n",
- " mem_id = member_info_data[\"id\"]\n",
- " member_social_link[\"id\"] = mem_id\n",
- " social_link_list.append(member_social_link)\n",
- "social_links_df = pd.DataFrame(social_link_list)\n",
- "social_links_df.set_index(\"id\", inplace=True)\n",
- "social_links_df.fillna(\"\", inplace=True)"
+ "\n",
+ "# Create current members page\n",
+ "create_page(\n",
+ " \"current_members.html.j2\",\n",
+ " \"current_members.html\",\n",
+ " general=general,\n",
+ " current_members=current_members_with_info,\n",
+ " socials=socials\n",
+ ")\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
+ "id": "daa3b00e-9fa3-4468-b42f-fa0fbbd88d00",
"metadata": {},
"outputs": [],
"source": [
- "recent_content = article_content_df.sort_values(\n",
- " by=[\"category\", \"date\"], ascending=[True, False]\n",
- ")\n",
- "# Get the first row for each category using groupby and head\n",
- "recent_content = recent_content.groupby(\"category\").head(1).copy()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Page Creation"
+ "# Create alumni page\n",
+ "create_page(\n",
+ " \"alumni_members.html.j2\",\n",
+ " \"alumni_members.html\",\n",
+ " general=general,\n",
+ " alumni_members=alumni_members_with_info,\n",
+ ")"
]
},
{
- "cell_type": "markdown",
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "4a638ad6-aad1-4b3a-b3db-46b8f3f36aed",
"metadata": {},
+ "outputs": [],
"source": [
- "Function to create a page"
+ "# Create contact page\n",
+ "create_page(\n",
+ " \"contact.html.j2\",\n",
+ " \"Contact.html\",\n",
+ " general=general,\n",
+ " contact=contact\n",
+ ")"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2023-11-21T07:07:36.337418487Z",
- "start_time": "2023-11-21T07:07:36.065742781Z"
- }
- },
+ "execution_count": 15,
+ "id": "04895fd5-cc5e-4238-bfdb-30377b5d8f5d",
+ "metadata": {},
"outputs": [],
"source": [
- "def create_page(template, html, **kwargs):\n",
- " \"\"\"\n",
- " Create an HTML page using a Jinja2 template and save it to a specified path.\n",
"\n",
- " Parameters:\n",
- " ----------\n",
- " template : str\n",
- " The filename of the Jinja2 template to be used.\n",
- " html : str\n",
- " The filename of the HTML file to be generated.\n",
- " **kwargs : dict\n",
- " Additional keyword arguments to be passed to the Jinja2 template for rendering.\n",
- "\n",
- " Returns:\n",
- " -------\n",
- " None\n",
- "\n",
- " \"\"\"\n",
- " page_template = environment.get_template(template)\n",
- " template_level = html.count(\"/\")\n",
- " page_html_path = HOSTING_PATH / html\n",
- " page_html_path.parent.mkdir(parents=True, exist_ok=True)\n",
- " page_content = page_template.render(TEMPLATE_LEVEL=template_level, **kwargs)\n",
- " with open(page_html_path, mode=\"w\", encoding=\"utf-8\") as page:\n",
- " page.write(page_content)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Processing List Of JSON files"
+ "# Create support page\n",
+ "create_page(\n",
+ " \"support.html.j2\",\n",
+ " \"Support.html\",\n",
+ " general=general,\n",
+ " support=support\n",
+ ")\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2023-11-21T07:07:36.392640967Z",
- "start_time": "2023-11-21T07:07:36.099540795Z"
- }
- },
- "outputs": [],
- "source": [
- "# Function Call\n",
- "general = loading_website_data(\"general\")\n",
- "homepage = loading_website_data(\"homepage\")\n",
- "contact = loading_website_data(\"contact\")\n",
- "research = loading_website_data(\"research_categories\")\n",
- "support = loading_website_data(\"support\")"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Homepage"
- ]
- },
- {
- "cell_type": "markdown",
+ "execution_count": 16,
+ "id": "505c0a4f-9b47-4499-9dc4-22d54b36a125",
"metadata": {},
- "source": [
- "Storing selected columns for Homepage only"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2023-11-21T07:07:36.460321141Z",
- "start_time": "2023-11-21T07:07:36.164866903Z"
- }
- },
"outputs": [],
"source": [
+ "# Create research page\n",
"create_page(\n",
- " \"homepage.html.j2\",\n",
- " \"index.html\",\n",
+ " \"research.html.j2\",\n",
+ " \"Research.html\",\n",
" general=general,\n",
- " homepage=homepage,\n",
- " recent_content=recent_content.to_dict(orient=\"records\"),\n",
+ " content=research_df.reset_index(),\n",
+ " research=research,\n",
+ " current_members=all_members_dict,\n",
")"
]
},
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Current Members Page"
- ]
- },
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2023-11-21T07:07:36.460567962Z",
- "start_time": "2023-11-21T07:07:36.165051029Z"
- }
- },
+ "execution_count": 17,
+ "id": "006d5615-5c19-4cc2-ada0-c86535bf2118",
+ "metadata": {},
"outputs": [],
"source": [
- "exp_df['end_date'] = pd.to_datetime(exp_df['end_date'], format='%Y-%m-%d')\n",
- "exp_df['start_date'] = pd.to_datetime(exp_df['start_date'], format='%Y-%m-%d')\n",
- "exp_df = exp_df.fillna(\"\")\n",
- "filtered_exp_df = exp_df[(exp_df[\"end_date\"].isna()) | (exp_df[\"end_date\"].dt.date >= datetime.now().date())]\n",
- "\n",
- "def most_recent_row(group):\n",
- " sorted_group = group.sort_values(by=['start_date', 'end_date'], ascending=[False, True])\n",
- " # Filter the sorted group by the condition that the group name is in GROUP_FILTER\n",
- " relevant_group = sorted_group[sorted_group['group'].str.contains('|'.join(GROUP_FILTER))]\n",
- " # Return the most recent relevant experience\n",
- " return relevant_group.iloc[0:1] if len(relevant_group) > 0 else sorted_group.iloc[0:1]\n",
- " \n",
- " \n",
- "filtered_exp_df_most_recent = exp_df.groupby(\"id\").apply(most_recent_row).droplevel(0)\n",
- "exp_df_most_recent = exp_df.groupby(\"id\").apply(most_recent_row).droplevel(0)\n",
- "exp_df_most_recent_diff_suffix = exp_df_most_recent.add_suffix('_exp')"
+ "# Create sub_research directory\n",
+ "SUB_RESEARCH_PATH.mkdir(parents=True, exist_ok=True)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
+ "id": "17654174-2dfc-4446-9298-aa5f791c3c6b",
"metadata": {},
"outputs": [],
"source": [
- "merged_edu_exp_df = exp_df_most_recent_diff_suffix.merge(edu_df_most_recent_diff_suffix, on='id', how='outer')"
+ "\n",
+ "# Create category pages\n",
+ "for category in research_df[\"category\"].unique():\n",
+ " create_page(\n",
+ " \"sub_research_frontpage.html.j2\",\n",
+ " f\"sub_research/{page_link(category.lower())}.html\",\n",
+ " general=general,\n",
+ " research=research,\n",
+ " content=research_df.reset_index(),\n",
+ " category=category,\n",
+ " current_members=all_members_dict,\n",
+ " )\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 19,
+ "id": "48dd8fd1-8983-4a3f-8883-4249409733f1",
"metadata": {},
"outputs": [],
"source": [
- "merged_edu_exp_df"
+ "\n",
+ "# Create individual research pages\n",
+ "for article_id, ind_research_values in research_df.iterrows():\n",
+ " destination_research_path = f\"sub_research/{page_link(ind_research_values.category.lower())}/{page_link(article_id.lower())}.html\"\n",
+ " if ind_research_values['category'] == \"Software\":\n",
+ " destination_research_path = f\"sub_research/{page_link(article_id.lower())}.html\"\n",
+ "\n",
+ " folder_path = SUB_RESEARCH_PATH / page_link(ind_research_values.category.lower())\n",
+ " folder_path.mkdir(parents=True, exist_ok=True)\n",
+ " create_page(\n",
+ " \"research_page_no_twitter.html.j2\",\n",
+ " destination_research_path,\n",
+ " general=general,\n",
+ " content=ind_research_values,\n",
+ " member_data=all_members_dict,\n",
+ " article_id=article_id,\n",
+ " )\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 20,
+ "id": "f532fcfd-f0b3-4b79-9e08-0db138ad3d4c",
"metadata": {},
"outputs": [],
"source": [
- "merged_edu_exp_df['isCurrent'] = False\n",
- "merged_edu_exp_df['current_role'] = \"\"\n",
- "for merged_mem_id, merged_mem_value in merged_edu_exp_df.iterrows():\n",
- " if merged_mem_value['institution_edu'] == INSTITUTION_FILTER:\n",
- " if pd.isna(merged_mem_value['end_date_edu']) or merged_mem_value['end_date_edu'] >= datetime.now():\n",
- " merged_edu_exp_df.at[merged_mem_id, 'isCurrent'] = True\n",
- " if pd.notna(merged_mem_value['end_date_exp']):\n",
- " merged_edu_exp_df.at[merged_mem_id, 'isCurrent'] = False\n",
- " acad_role = merged_mem_value.get('academic_role_edu')\n",
- " if acad_role:\n",
- " merged_edu_exp_df.at[merged_mem_id, 'current_role'] = merged_mem_value[\"academic_role_edu\"]\n",
- " else:\n",
- " merged_edu_exp_df.at[merged_mem_id, 'isCurrent'] = False\n",
- " acad_role = merged_mem_value.get('academic_role_edu')\n",
- " if acad_role:\n",
- " merged_edu_exp_df.at[merged_mem_id, 'current_role'] = merged_mem_value[\"academic_role_edu\"]\n",
- " else:\n",
- " merged_edu_exp_df.at[merged_mem_id, 'current_role'] = merged_mem_value[\"role_exp\"]\n",
- " elif merged_mem_value['group_exp'] in GROUP_FILTER and (pd.isna(merged_mem_value['end_date_exp']) or merged_mem_value['end_date_exp'] >= datetime.now()):\n",
- " merged_edu_exp_df.at[merged_mem_id, 'isCurrent'] = True\n",
- " merged_edu_exp_df.at[merged_mem_id, 'current_role'] = merged_mem_value[\"role_exp\"]\n",
- " else:\n",
- " merged_edu_exp_df.at[merged_mem_id, 'isCurrent'] = False\n",
- " acad_role = merged_mem_value.get('academic_role_edu')\n",
- " if acad_role:\n",
- " merged_edu_exp_df.at[merged_mem_id, 'current_role'] = merged_mem_value[\"academic_role_edu\"]\n",
- " else:\n",
- " merged_edu_exp_df.at[merged_mem_id, 'current_role'] = merged_mem_value[\"role_exp\"]\n",
- "merged_edu_exp_df['current_role'] = merged_edu_exp_df['current_role'].replace(ROLE_MAP)"
+ "\n",
+ "# Create news page\n",
+ "create_page(\n",
+ " \"news.html.j2\",\n",
+ " \"News.html\",\n",
+ " general=general,\n",
+ " content=news_df.reset_index(),\n",
+ " category=\"News\",\n",
+ " member_data=all_members_dict,\n",
+ ")\n",
+ "\n",
+ "# Create individual news pages\n",
+ "news_dict_list = news_df.reset_index().to_dict('records')\n",
+ "for news_item in news_dict_list:\n",
+ " create_page(\n",
+ " \"news_page_no_twitter.html.j2\",\n",
+ " f\"news/{page_link(news_item['article_id'].lower())}.html\",\n",
+ " general=general,\n",
+ " content=news_item,\n",
+ " member_data=all_members_dict,\n",
+ " category=\"News\"\n",
+ " )\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "current_member_df = merged_edu_exp_df[merged_edu_exp_df['isCurrent'] == True][[\"current_role\"]]\n",
- "current_member_df_with_info = pd.merge(current_member_df, info_json_df, on='id', how='inner')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "alumni_member_df = merged_edu_exp_df[merged_edu_exp_df['isCurrent'] == False][[\"current_role\"]]\n",
- "alumni_member_df_with_info = pd.merge(alumni_member_df, info_json_df, on='id', how='inner')[['current_role', 'full_name']]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "projects_df = read_member_data_jsons(\"projects.json\").sort_values(\n",
- " by=[\"end_date\"], ascending=False\n",
- ")\n",
- "projects_df['end_date'] = pd.to_datetime(projects_df['end_date'], format='%Y-%m-%d')\n",
- "projects_df['start_date'] = pd.to_datetime(projects_df['start_date'], format='%Y-%m-%d')\n",
- "projects_df.fillna(\"\", inplace=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "for mem_key, mem_value in current_member_df.iterrows():\n",
- " if mem_key in projects_df.index:\n",
- " mem_projects = projects_df.loc[mem_key]\n",
- " if not mem_projects.empty:\n",
- " if isinstance(mem_projects, pd.Series):\n",
- " current_project_title = mem_projects[\"project_title\"]\n",
- " else:\n",
- " current_project_title = mem_projects.iloc[0][\"project_title\"]\n",
- " else:\n",
- " current_project_title = \"\"\n",
- " current_member_df_with_info.loc[mem_key, \"current_project_title\"] = current_project_title"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "current_member_df"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Function to sort the members on basis of their roles"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "with open(ROLE_HIERARCHY_PATH, \"r\") as file_name: \n",
- " role_hierarchy = json.load(file_name)\n",
- "current_member_df_with_info['rank'] = current_member_df_with_info['current_role'].map(role_hierarchy)\n",
- "\n",
- "current_member_df_with_info = current_member_df_with_info.sort_values(by='rank')\n",
- "current_member_df_with_info = current_member_df_with_info.drop(columns='rank')\n",
- "current_member_df_with_info[['current_role', 'full_name', 'image_path', 'cover_image_path','current_project_title']]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Current Members Page"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "create_page(\n",
- " \"current_members.html.j2\",\n",
- " \"current_members.html\",\n",
- " general=general,\n",
- " current_members=current_member_df_with_info,\n",
- " socials=social_links_df.to_dict(\"index\")\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Alumni Members Page"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "create_page(\n",
- " \"alumni_members.html.j2\",\n",
- " \"alumni_members.html\",\n",
- " general=general,\n",
- " alumni_members=alumni_member_df_with_info,\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Individual People Page"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def group_df(df):\n",
- " new_df = (df.fillna(\"\").groupby(\"id\")\n",
- " .apply(lambda x: x.to_dict(orient=\"records\"))\n",
- " .reset_index(name=\"info\")\n",
- " .set_index(\"id\")\n",
- " .to_dict(orient=\"index\"))\n",
- " return new_df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "document_df = read_member_data_jsons(\"documents.json\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "outreach_df = read_member_data_jsons(\"outreach.json\")\n",
- "if not outreach_df.empty:\n",
- " outreach_grouped = group_df(outreach_df)\n",
- "else:\n",
- " outreach_grouped = {}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "awards_df = read_member_data_jsons(\"awards.json\")\n",
- "awards_grouped = group_df(awards_df)\n",
- "\n",
- "exp_grouped = group_df(exp_df)\n",
- "edu_grouped = group_df(edu_df)\n",
- "projects_grouped = group_df(projects_df)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "info_json_df.fillna(\"\", inplace=True)\n",
- "info_json_df[\"academic_role\"] = \"\"\n",
- "info_json_df[\"current_project_title\"] = \"\"\n",
- "for member_id, member_data in info_json_df.iterrows():\n",
- " if member_id in current_member_df_with_info.index:\n",
- " # Handle case where member has duplicate entries (returns Series)\n",
- " current_role_value = current_member_df_with_info.loc[member_id, \"current_role\"]\n",
- " if isinstance(current_role_value, pd.Series):\n",
- " current_role_value = current_role_value.iloc[0]\n",
- " \n",
- " current_project_value = current_member_df_with_info.loc[member_id, \"current_project_title\"]\n",
- " if isinstance(current_project_value, pd.Series):\n",
- " current_project_value = current_project_value.iloc[0]\n",
- " \n",
- " info_json_df.at[member_id, \"academic_role\"] = current_role_value\n",
- " info_json_df.at[member_id, \"current_project_title\"] = current_project_value\n",
- " elif member_id in alumni_member_df.index:\n",
- " role_value = alumni_member_df.loc[member_id, \"current_role\"]\n",
- " # Handle case where member has duplicate entries (returns Series)\n",
- " if isinstance(role_value, pd.Series):\n",
- " role_value = role_value.iloc[0]\n",
- " # Convert NaN to empty string\n",
- " if pd.isna(role_value):\n",
- " role_value = \"\"\n",
- " info_json_df.at[member_id, \"academic_role\"] = role_value\n",
- "alumni_member_df.replace(\"nan\", np.nan, inplace=True)\n",
- "alumni_member_df.fillna(\"\", inplace=True)\n",
- "current_member_df_with_info.fillna(\"\", inplace=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "for person_id, person_data in info_json_df.iterrows():\n",
- " create_page(\n",
- " \"individual_person.html.j2\",\n",
- " f\"members/{person_id}/{person_id}.html\",\n",
- " general=general,\n",
- " member_id=person_id,\n",
- " member_data=person_data,\n",
- " socials=social_links_df.to_dict(\"index\"),\n",
- " documents=document_df.to_dict(\"index\"),\n",
- " education=edu_grouped,\n",
- " experience=exp_grouped,\n",
- " projects=projects_grouped,\n",
- " awards=awards_grouped,\n",
- " outreach=outreach_grouped,\n",
- " section_headings=INDIVIDUAL_MEMBER_SECTION_MAP,\n",
- " content=article_content_df.to_dict(\"index\"),\n",
- " )"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Contact Page"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2023-11-21T07:07:36.569018310Z",
- "start_time": "2023-11-21T07:07:36.457030906Z"
- }
- },
- "outputs": [],
- "source": [
- "create_page(\n",
- " \"contact.html.j2\",\n",
- " \"Contact.html\",\n",
- " general=general,\n",
- " contact=contact\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Support Page"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2023-11-21T07:07:36.588932886Z",
- "start_time": "2023-11-21T07:07:36.457249500Z"
- }
- },
- "outputs": [],
- "source": [
- "create_page(\n",
- " \"support.html.j2\",\n",
- " \"Support.html\",\n",
- " general=general,\n",
- " support=support\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Research Front Page"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "For adding more columns in dataframe to render front pages and individual article pages"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2023-11-21T07:07:36.589247191Z",
- "start_time": "2023-11-21T07:07:36.501093779Z"
- }
- },
- "outputs": [],
- "source": [
- "create_page(\n",
- " \"research.html.j2\",\n",
- " \"Research.html\",\n",
- " general=general,\n",
- " content=research_content_df,\n",
- " research=research,\n",
- " current_members=info_json_dict,\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2023-11-21T07:07:36.589414282Z",
- "start_time": "2023-11-21T07:07:36.544920583Z"
- }
- },
- "outputs": [],
- "source": [
- "SUB_RESEARCH_PATH.mkdir(parents=True, exist_ok=True)\n",
- "\n",
- "for category in article_content_df.loc[\n",
- " article_content_df.category != \"News\", \"category\"\n",
- "].unique():\n",
- " create_page(\n",
- " \"sub_research_frontpage.html.j2\",\n",
- " f\"sub_research/{page_link(category.lower())}.html\",\n",
- " general=general,\n",
- " research=research,\n",
- " content=research_content_df,\n",
- " category=category,\n",
- " current_members=info_json_dict,\n",
- " )"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "ExecuteTime": {
- "end_time": "2023-11-21T07:07:36.589414282Z",
- "start_time": "2023-11-21T07:07:36.544920583Z"
- }
- },
- "source": [
- "Individual Research Page\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2023-11-21T07:07:36.589414282Z",
- "start_time": "2023-11-21T07:07:36.544920583Z"
- }
- },
- "outputs": [],
- "source": [
- "for ind_research_keys, ind_research_values in research_content_df.iterrows():\n",
- " destination_research_path = f\"sub_research/{page_link(ind_research_values.category.lower())}/{page_link(ind_research_values.article_id.lower())}.html\"\n",
- " if ind_research_values['category'] == \"Software\":\n",
- " destination_research_path = f\"sub_research/{page_link(ind_research_values.article_id.lower())}.html\"\n",
- "\n",
- " folder_path = SUB_RESEARCH_PATH / page_link(ind_research_values.category.lower())\n",
- " folder_path.mkdir(parents=True, exist_ok=True)\n",
- " create_page(\n",
- " \"research_page_no_twitter.html.j2\",\n",
- " destination_research_path,\n",
- " general=general,\n",
- " content=ind_research_values,\n",
- " member_data=info_json_dict,\n",
- " article_id=ind_research_values[\"article_id\"],\n",
- " )"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# News Page"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import re\n",
- "\n",
- "def urlize_content(content):\n",
- " \"\"\"\n",
- " Replaces IDs wrapped in [] with corresponding names from an existing DataFrame,\n",
- " and wraps the names in anchor tags.\n",
- "\n",
- " Args:\n",
- " content (str): The text content containing IDs in square brackets.\n",
- "\n",
- " Returns:\n",
- " str: The updated content with IDs replaced by anchor tags.\n",
- " \"\"\"\n",
- "\n",
- " def replace_id(match):\n",
- " id_to_fetch= match.group(1)\n",
- " replace_string=\"\"\n",
- " if id_to_fetch in info_json_df.index:\n",
- " name = info_json_df.loc[id_to_fetch, 'full_name']\n",
- " if id_to_fetch in current_member_df_with_info.index:\n",
- " replace_string =f'{name}'\n",
- " else:\n",
- " replace_string = name\n",
- " else:\n",
- " replace_string = id_to_fetch.replace('_', ' ').title()\n",
- "\n",
- " return replace_string\n",
- "\n",
- " urlized_content = re.sub(r'\\[(\\w+)\\]', replace_id, content)\n",
- "\n",
- " return urlized_content\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "for index, row in news_content_df.iterrows():\n",
- " content = row['content']\n",
- " for content_key in content:\n",
- " if \"para\" in content_key:\n",
- " content[content_key] = urlize_content(content[content_key])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2023-11-21T07:10:29.508008736Z",
- "start_time": "2023-11-21T07:10:29.418263240Z"
- }
- },
- "outputs": [],
- "source": [
- "create_page(\n",
- " \"news.html.j2\",\n",
- " \"News.html\",\n",
- " general=general,\n",
- " content=news_content_df,\n",
- " category=\"News\",\n",
- " member_data=info_json_dict,\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Individual News Page"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "for ind_news_keys, ind_news_values in news_content_df.iterrows():\n",
- " folder_path = HOSTING_PATH / \"news\" / page_link(ind_news_values.article_id.lower())\n",
- " create_page(\n",
- " \"news_page_no_twitter.html.j2\",\n",
- " f\"news/{page_link(ind_news_values.article_id.lower())}.html\",\n",
- " general=general,\n",
- " content=ind_news_values,\n",
- " member_data=info_json_dict,\n",
- " category=\"News\"\n",
- " )"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Join Us Page"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
+ "execution_count": 21,
+ "id": "c5df40b7-acb7-47c7-9a87-54cb6327373b",
"metadata": {},
"outputs": [],
"source": [
+ "# Create join us page\n",
"with open(OPPORTUNITIES_PATH, 'r') as f_opp:\n",
- " OPPORTUNITIES = json.load(f_opp)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
+ " opportunities = json.load(f_opp)\n",
+ "\n",
"create_page(\n",
" \"join_us.html.j2\",\n",
" \"Join_Us.html\",\n",
" general=general,\n",
- " opportunities=OPPORTUNITIES\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# New Research"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# all_research_data = []\n",
- "# for json_file in RESEARCH_CONTENT_SOURCE.rglob(\"info.json\"):\n",
- "# sub_research = []\n",
- "# relative_path = json_file.relative_to(RESEARCH_CONTENT_SOURCE.parent).with_suffix(\"\")\n",
- "# for sub_dir in json_file.parent.iterdir():\n",
- "# if sub_dir.is_dir():\n",
- "# if sub_dir.name != \"media\":\n",
- "# sub_dir_name = sub_dir.name\n",
- "# sub_research.append(sub_dir_name)\n",
- "# else:\n",
- "# dest_path = HOSTING_PATH / relative_path.parent\n",
- "# shutil.copytree(sub_dir, dest_path / \"media\", dirs_exist_ok=True)\n",
- " \n",
- "# # Parse the JSON file\n",
- "# with open(json_file, \"r\") as f_research:\n",
- "# data = json.load(f_research)\n",
- "# if 'research_id' in data:\n",
- "# data['sub_research'] = sub_research\n",
- "# data['url'] = f\"{relative_path}.html\"\n",
- "# all_research_data.append(data)\n",
- "# all_research_df = pd.DataFrame(all_research_data)\n",
- "# indexed_research_df = all_research_df.set_index('research_id')\n",
- "# # Fill all NaN values with empty strings in the DataFrame\n",
- "# indexed_research_df = indexed_research_df.fillna(\"\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# d = {}\n",
- "\n",
- "# for index, article in article_content_df.iterrows():\n",
- "# res_articles, news_articles = [], []\n",
- "# if pd.notna(article[\"research_id\"]):\n",
- "# res_id = article[\"research_id\"]\n",
- "# article_id = article[\"article_id\"]\n",
- "# if article['category'] == 'Research':\n",
- "# res_articles.append((article_id, article['date']))\n",
- "# if article['category'] == 'News':\n",
- "# news_articles.append((article_id, article['date']))\n",
- "\n",
- "# if res_id not in d:\n",
- "# d[res_id] = {\"res_articles\": [], \"news_articles\": []}\n",
- "# d[res_id][\"res_articles\"].extend(res_articles)\n",
- "# d[res_id][\"news_articles\"].extend(news_articles)\n",
- "\n",
- "# def get_aggregated_articles(research_id, visited=None):\n",
- "# if visited is None:\n",
- "# visited = set()\n",
- "\n",
- "# # Avoid processing the same research_id multiple times\n",
- "# if research_id in visited:\n",
- "# return {\"res_articles\": [], \"news_articles\": []}\n",
- " \n",
- "# visited.add(research_id)\n",
- "\n",
- "# # Start with articles for the current research_id\n",
- "# aggregated_articles = d.get(research_id, {\"res_articles\": [], \"news_articles\": []}).copy()\n",
- "\n",
- "# # Get sub-research IDs from `indexed_research_df`\n",
- "# sub_researches = indexed_research_df.loc[research_id, \"sub_research\"] if research_id in indexed_research_df.index else []\n",
- "# if isinstance(sub_researches, list) and len(sub_researches) > 0:\n",
- "# for sub_research in sub_researches:\n",
- "# sub_articles = get_aggregated_articles(sub_research, visited)\n",
- "# aggregated_articles[\"res_articles\"].extend(sub_articles[\"res_articles\"])\n",
- "# aggregated_articles[\"news_articles\"].extend(sub_articles[\"news_articles\"])\n",
- "\n",
- "# return aggregated_articles\n",
- "\n",
- "# f = {}\n",
- "# for research_index in indexed_research_df.index:\n",
- "# f[research_index] = get_aggregated_articles(research_index)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# def sort_articles(articles):\n",
- "# # Sort by date in descending order\n",
- "# sorted_articles = sorted(articles, key=lambda x: x[1], reverse=True)\n",
- "# # Extract only article IDs\n",
- "# return [article[0] for article in sorted_articles]\n",
- "\n",
- "# # Update `f` with sorted articles\n",
- "# for research_index in f:\n",
- "# f[research_index][\"res_articles\"] = sort_articles(f[research_index][\"res_articles\"])\n",
- "# f[research_index][\"news_articles\"] = sort_articles(f[research_index][\"news_articles\"])\n",
- "\n",
- "# # Add sorted articles to `indexed_research_df`\n",
- "# indexed_research_df[\"res_articles\"] = indexed_research_df.index.map(\n",
- "# lambda idx: f.get(idx, {}).get(\"res_articles\", [])\n",
- "# )\n",
- "# indexed_research_df[\"news_articles\"] = indexed_research_df.index.map(\n",
- "# lambda idx: f.get(idx, {}).get(\"news_articles\", [])\n",
- "# )\n",
- "\n",
- "# # Display the updated DataFrame\n",
- "# indexed_research_df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# indexed_article_df = article_content_df.set_index('article_id', inplace=False)\n",
- "# for index, research in indexed_research_df.iterrows():\n",
- "# create_page(\n",
- "# \"sub_research_frontpage.html.j2\",\n",
- "# research['url'],\n",
- "# general=general,\n",
- "# data=research,\n",
- "# current_research_id=index,\n",
- "# indexed_research_df=indexed_research_df,\n",
- "# indexed_article_df=indexed_article_df,\n",
- "# member_data=info_json_dict\n",
- "# )"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Gallery page"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "GALLERY_CONTENT_SOURCE = WEBSITE_DATA_PATH / \"content\" / \"gallery\"\n",
- "events = []\n",
- "\n",
- "for event_file in GALLERY_CONTENT_SOURCE.rglob(\"info.json\"):\n",
- " with open(event_file, \"r\") as f_event:\n",
- " event_data = json.load(f_event)\n",
- " \n",
- " if \"date\" in event_data:\n",
- " event_data[\"date\"] = pd.to_datetime(event_data[\"date\"])\n",
- " event_id = event_data.get(\"event_id\", \"unknown_event\") # Default if event_id is missing\n",
- " \n",
- " # Define new destination path using event_id\n",
- " dest_image_dir = HOSTING_PATH / \"website_files\" / \"images\" / \"gallery\" / event_id / \"media\" / \"images\"\n",
- " \n",
- " # Create destination directory if it doesn't exist\n",
- " dest_image_dir.mkdir(parents=True, exist_ok=True)\n",
- " \n",
- " # Copy images directory to the structured destination\n",
- " source_image_dir = event_file.parent / \"media\" / \"images\"\n",
- " if source_image_dir.exists():\n",
- " shutil.copytree(source_image_dir, dest_image_dir, dirs_exist_ok=True)\n",
- "\n",
- " # # Update image paths in event data to use website path\n",
- " # for key in event_data:\n",
- " # if isinstance(event_data[key], str) and \"images\" in event_data[key]:\n",
- " # event_data[key] = str(Path(\"website_files\") / \"images\" / \"gallery\" / Path(event_data[key]).name)\n",
- " for image in event_data.get(\"images\", []):\n",
- " image_path = GALLERY_CONTENT_SOURCE / event_id / image[\"image_path\"]\n",
- " with Image.open(image_path) as img:\n",
- " width, height = img.size\n",
- " new_width = int(width * 0.7) # Reduce by 30%\n",
- " new_height = int(height * 0.7) # Reduce by 30%\n",
- "\n",
- " image[\"scaled_width\"] = new_width\n",
- " image[\"scaled_height\"] = new_height\n",
- " events.append(event_data)\n",
- "\n",
- "create_page(\n",
- " \"gallery.html.j2\",\n",
- " \"Gallery.html\",\n",
- " general=general,\n",
- " member_data=info_json_dict,\n",
- " events=events\n",
+ " opportunities=opportunities\n",
")"
]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Copy assets"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "source_assets = GROUP_DATA_DIR.parent / \"groupwebsite_generator\" / \"assets\"\n",
- "shutil.copytree(source_assets, HOSTING_PATH / \"assets\", dirs_exist_ok=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
@@ -1517,5 +497,5 @@
}
},
"nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
}
diff --git a/notebooks/members.ipynb b/notebooks/members.ipynb
new file mode 100644
index 0000000..efd970f
--- /dev/null
+++ b/notebooks/members.ipynb
@@ -0,0 +1,611 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "b9379b22-dbcf-48a4-8a82-db05404d12ec",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "import json\n",
+ "import pandas as pd\n",
+ "from pathlib import Path\n",
+ "from datetime import datetime\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "9e91e830-4d98-4a57-b412-4c1e237437f1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "# Constants\n",
+ "GROUP_DATA_DIR = Path(\"../../group-data\")\n",
+ "MEMBERS_DIR_PATH = GROUP_DATA_DIR / \"members/\"\n",
+ "WEBSITE_DATA_PATH = GROUP_DATA_DIR / \"website_data/\"\n",
+ "ROLE_HIERARCHY_PATH = WEBSITE_DATA_PATH / \"role_hierarchy.json\"\n",
+ "\n",
+ "GROUP_FILTER = [\"DTI\", \"TARDIS\", \"kerzendorf\"]\n",
+ "INSTITUTION_FILTER = \"Michigan State University\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "8e5497ad-454d-4fb6-a90d-78a7cbade567",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "class MemberDataLoader:\n",
+ " def __init__(self, members_dir: Path = MEMBERS_DIR_PATH):\n",
+ " self.members_dir = members_dir\n",
+ "\n",
+ " def _load_records(self, jsons_dir, filename, member_id):\n",
+ " path = jsons_dir / filename\n",
+ " if not path.exists():\n",
+ " return []\n",
+ " records = json.loads(path.read_text())\n",
+ " for record in records:\n",
+ " record['member_id'] = member_id\n",
+ " return records\n",
+ "\n",
+ " def _parse_dates(self, records, date_fields, member_id=None):\n",
+ " for record in records:\n",
+ " for field in date_fields:\n",
+ " if field not in record:\n",
+ " continue\n",
+ "\n",
+ " if not record[field]:\n",
+ " record[field] = pd.NaT\n",
+ " continue\n",
+ "\n",
+ " try:\n",
+ " record[field] = pd.to_datetime(record[field])\n",
+ " except ValueError as e:\n",
+ " if member_id:\n",
+ " print(e, member_id)\n",
+ " return records\n",
+ "\n",
+ " def load_all_data(self):\n",
+ " data_types = ['education', 'experiences', 'projects', 'awards', 'outreach', 'documents', 'posters', 'publications']\n",
+ " data_config = {dt: f\"{dt}.json\" for dt in data_types}\n",
+ " data = {key: [] for key in data_config}\n",
+ "\n",
+ " start_end_dates = ['education.json', 'experiences.json', 'projects.json', 'outreach.json']\n",
+ " single_date_with_errors = ['publications.json']\n",
+ " dual_date_format = ['awards.json']\n",
+ "\n",
+ " members_data = []\n",
+ "\n",
+ " for member_dir in self.members_dir.glob(\"*\"):\n",
+ " info_path = member_dir / \"info.json\"\n",
+ " member_info = json.loads(info_path.read_text())\n",
+ " member_id = member_info[\"id\"]\n",
+ "\n",
+ " full_name = (\n",
+ " f\"{member_info.get('nick_name', member_info.get('first_name', ''))} {member_info.get('last_name', '')}\"\n",
+ " if member_info.get('nick_name')\n",
+ " else f\"{member_info.get('first_name', '')} {member_info.get('last_name', '')}\"\n",
+ " )\n",
+ " member_info['full_name'] = full_name.strip()\n",
+ "\n",
+ " jsons_dir = member_dir / \"jsons\"\n",
+ "\n",
+ " social_path = jsons_dir / \"social_links.json\"\n",
+ " if social_path.exists():\n",
+ " social_data = json.loads(social_path.read_text())\n",
+ " member_info.update(social_data)\n",
+ "\n",
+ " members_data.append(member_info)\n",
+ "\n",
+ " for key, filename in data_config.items():\n",
+ " records = self._load_records(jsons_dir, filename, member_id)\n",
+ " if filename in start_end_dates:\n",
+ " records = self._parse_dates(records, ['start_date', 'end_date'])\n",
+ " elif filename in single_date_with_errors:\n",
+ " records = self._parse_dates(records, ['date'], member_id)\n",
+ " elif filename in dual_date_format:\n",
+ " records = self._parse_dates(records, ['date', 'start_date', 'end_date'], member_id)\n",
+ " data[key].extend(records)\n",
+ "\n",
+ " members_df = pd.DataFrame(members_data).set_index('id')\n",
+ " self.members_df = members_df\n",
+ "\n",
+ " for key in data:\n",
+ " df = pd.DataFrame(data[key]).set_index('member_id')\n",
+ " setattr(self, f\"{key}_df\", df)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "db3ee435-4a70-4f62-aed4-7f0ef64cd4fc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "class CurrentMemberProcessor:\n",
+ " def __init__(self, members_df, education_df, experiences_df, projects_df):\n",
+ " self.members_df = members_df\n",
+ " self.education_df = education_df\n",
+ " self.experiences_df = experiences_df\n",
+ " self.projects_df = projects_df\n",
+ "\n",
+ " with open(ROLE_HIERARCHY_PATH, \"r\") as file_name:\n",
+ " self.role_hierarchy = json.load(file_name)\n",
+ "\n",
+ " def process_education(self):\n",
+ " \"\"\"Get most recent education and determine academic role\"\"\"\n",
+ " def most_recent_row(group):\n",
+ " sorted_group = group.sort_values(by=['start_date', 'end_date'], ascending=[False, True])\n",
+ " return sorted_group.iloc[0:1]\n",
+ "\n",
+ " self.edu_most_recent = self.education_df.groupby(level=0).apply(most_recent_row).droplevel(0)\n",
+ "\n",
+ " self.edu_most_recent['academic_role'] = \"\"\n",
+ " msu_mask = self.edu_most_recent['institution'] == INSTITUTION_FILTER\n",
+ " bachelors_mask = msu_mask & (self.edu_most_recent['degree'] == \"Bachelors\")\n",
+ " grad_mask = msu_mask & (self.edu_most_recent['degree'].isin([\"PhD\", \"Masters\"]))\n",
+ "\n",
+ " self.edu_most_recent.loc[bachelors_mask, 'academic_role'] = \"Undergraduate Student\"\n",
+ " self.edu_most_recent.loc[grad_mask, 'academic_role'] = \"Graduate Student\"\n",
+ "\n",
+ " def process_experiences(self):\n",
+ " \"\"\"Get most recent experience per member\"\"\"\n",
+ " self.experiences_df = self.experiences_df.fillna(\"\")\n",
+ "\n",
+ " def most_recent_row(group):\n",
+ " sorted_group = group.sort_values(by=['start_date', 'end_date'], ascending=[False, True])\n",
+ " relevant_group = sorted_group[sorted_group['group'].str.contains('|'.join(GROUP_FILTER))]\n",
+ " return relevant_group.iloc[0:1] if not relevant_group.empty else sorted_group.iloc[0:1]\n",
+ "\n",
+ " self.exp_most_recent = self.experiences_df.groupby(level=0).apply(most_recent_row).droplevel(0)\n",
+ "\n",
+ " def _merge_edu_exp(self):\n",
+ " \"\"\"Merge education and experience dataframes\"\"\"\n",
+ " exp_suffixed = self.exp_most_recent.add_suffix('_exp')\n",
+ " edu_suffixed = self.edu_most_recent.add_suffix('_edu')\n",
+ " return exp_suffixed.merge(edu_suffixed, left_index=True, right_index=True, how='outer')\n",
+ "\n",
+ " def _determine_status_and_role(self, row):\n",
+ " \"\"\"Determine if member is current and their role\"\"\"\n",
+ " if row['institution_edu'] == INSTITUTION_FILTER:\n",
+ " is_current_edu = pd.isna(row['end_date_edu']) or row['end_date_edu'] >= datetime.now()\n",
+ " has_ended_exp = pd.notna(row['end_date_exp'])\n",
+ " is_current = is_current_edu and not has_ended_exp\n",
+ "\n",
+ " if row['academic_role_edu']:\n",
+ " current_role = row['academic_role_edu']\n",
+ " else:\n",
+ " current_role = row['role_exp']\n",
+ "\n",
+ " return pd.Series({'isCurrent': is_current, 'current_role': current_role})\n",
+ " elif row['group_exp'] in GROUP_FILTER and (pd.isna(row['end_date_exp']) or row['end_date_exp'] >= datetime.now()):\n",
+ " return pd.Series({'isCurrent': True, 'current_role': row['role_exp']})\n",
+ " else:\n",
+ " current_role = row['academic_role_edu'] if row['academic_role_edu'] else row['role_exp']\n",
+ " return pd.Series({'isCurrent': False, 'current_role': current_role})\n",
+ "\n",
+ " def _add_projects(self, df):\n",
+ " \"\"\"Add current project titles to members\"\"\"\n",
+ " df[\"current_project_title\"] = \"\"\n",
+ "\n",
+ " common_members = df.index.intersection(self.projects_df.index)\n",
+ " projects_first = self.projects_df.loc[common_members].groupby(level=0).first()\n",
+ " df.loc[common_members, \"current_project_title\"] = projects_first[\"project_title\"]\n",
+ "\n",
+ " def _sort_by_hierarchy(self, df):\n",
+ " \"\"\"Sort members by role hierarchy\"\"\"\n",
+ " df['rank'] = df['current_role'].map(self.role_hierarchy)\n",
+ " df = df.sort_values(by='rank')\n",
+ " return df.drop(columns='rank')\n",
+ "\n",
+ " def merge_and_determine_status(self):\n",
+ " \"\"\"Merge edu/exp and determine current vs alumni status\"\"\"\n",
+ " merged = self._merge_edu_exp()\n",
+ " status_role = merged.apply(self._determine_status_and_role, axis=1)\n",
+ " merged = pd.concat([merged, status_role], axis=1)\n",
+ "\n",
+ " self.current_members = merged[merged['isCurrent']][[\"current_role\"]]\n",
+ " self.alumni_members = merged[~merged['isCurrent']][[\"current_role\"]]\n",
+ "\n",
+ " self.current_members_with_info = pd.merge(self.current_members, self.members_df, left_index=True, right_index=True, how='inner')\n",
+ " self.alumni_members_with_info = pd.merge(self.alumni_members, self.members_df, left_index=True, right_index=True, how='inner')[['current_role', 'full_name']]\n",
+ "\n",
+ " self._add_projects(self.current_members_with_info)\n",
+ " self.current_members_with_info = self._sort_by_hierarchy(self.current_members_with_info)\n",
+ "\n",
+ " def process(self):\n",
+ " \"\"\"Run full pipeline\"\"\"\n",
+ " self.process_education()\n",
+ " self.process_experiences()\n",
+ " self.merge_and_determine_status()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "54df076c-712d-4ce4-ae1f-0432802421f1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Members: 34\n",
+ "Education records: 44\n",
+ "Experiences records: 53\n",
+ "Projects records: 31\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Load member data\n",
+ "loader = MemberDataLoader()\n",
+ "loader.load_all_data()\n",
+ "\n",
+ "print(f\"Members: {len(loader.members_df)}\")\n",
+ "print(f\"Education records: {len(loader.education_df)}\")\n",
+ "print(f\"Experiences records: {len(loader.experiences_df)}\")\n",
+ "print(f\"Projects records: {len(loader.projects_df)}\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "5312ec3d-4e35-4925-9e0a-e9d68060b71a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " first_name | \n",
+ " last_name | \n",
+ " image_path | \n",
+ " cover_image_path | \n",
+ " introduction | \n",
+ " full_name | \n",
+ " github_handle | \n",
+ " linkedin | \n",
+ " email | \n",
+ " nick_name | \n",
+ " website | \n",
+ " orcid | \n",
+ " twitter_handle | \n",
+ " linkedin_handle | \n",
+ " ads | \n",
+ "
\n",
+ " \n",
+ " | id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | gracie_tvrdik | \n",
+ " Gracie | \n",
+ " Tvrdik | \n",
+ " media/images/gracie.jpg | \n",
+ " media/images/cover.jpg | \n",
+ " I am an undergraduate student at Bowling Green... | \n",
+ " Gracie Tvrdik | \n",
+ " gracietv | \n",
+ " www.linkedin.com/in/grayson-tvrdik-34b7872a7 | \n",
+ " graysontvrdik1@gmail.com | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | josh_shields | \n",
+ " Joshua | \n",
+ " Shields | \n",
+ " media/images/josh_photo.jpg | \n",
+ " media/images/cover.jpg | \n",
+ " Josh is a senior graduate student in astrophys... | \n",
+ " Josh Shields | \n",
+ " jvshields | \n",
+ " NaN | \n",
+ " shield90@msu.edu | \n",
+ " Josh | \n",
+ " https://jvshields.github.io/ | \n",
+ " 0000-0002-1560-5286 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | anirban_dutta | \n",
+ " Anirban | \n",
+ " Dutta | \n",
+ " media/images/anirban_dutta.jpg | \n",
+ " media/images/cover.jpg | \n",
+ " Hi there! This is Anirban. | \n",
+ " Anirban Dutta | \n",
+ " Knights-Templars | \n",
+ " NaN | \n",
+ " anirbaniamdutta@gmail.com | \n",
+ " NaN | \n",
+ " https://sites.google.com/view/anirbaniamdutta | \n",
+ " 0000-0002-7708-3831 | \n",
+ " Anirban29Dutta | \n",
+ " anirban-dutta-6a0377238 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | erin_visser | \n",
+ " Erin | \n",
+ " Visser | \n",
+ " media/images/erin_visser_website_pic.jpg | \n",
+ " media/images/cover.jpg | \n",
+ " NaN | \n",
+ " Erin Visser | \n",
+ " erinvisser | \n",
+ " NaN | \n",
+ " visserer@msu.edu | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0009-0001-8470-275X | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | abhinav_ohri | \n",
+ " Abhinav | \n",
+ " Ohri | \n",
+ " media/images/abhinav_ohri.jpg | \n",
+ " media/images/cover.jpg | \n",
+ " Hi there! This is Abhinav. | \n",
+ " Abhinav Ohri | \n",
+ " KasukabeDefenceForce | \n",
+ " NaN | \n",
+ " abhinavohri13@gmail.com | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " first_name last_name image_path \\\n",
+ "id \n",
+ "gracie_tvrdik Gracie Tvrdik media/images/gracie.jpg \n",
+ "josh_shields Joshua Shields media/images/josh_photo.jpg \n",
+ "anirban_dutta Anirban Dutta media/images/anirban_dutta.jpg \n",
+ "erin_visser Erin Visser media/images/erin_visser_website_pic.jpg \n",
+ "abhinav_ohri Abhinav Ohri media/images/abhinav_ohri.jpg \n",
+ "\n",
+ " cover_image_path \\\n",
+ "id \n",
+ "gracie_tvrdik media/images/cover.jpg \n",
+ "josh_shields media/images/cover.jpg \n",
+ "anirban_dutta media/images/cover.jpg \n",
+ "erin_visser media/images/cover.jpg \n",
+ "abhinav_ohri media/images/cover.jpg \n",
+ "\n",
+ " introduction \\\n",
+ "id \n",
+ "gracie_tvrdik I am an undergraduate student at Bowling Green... \n",
+ "josh_shields Josh is a senior graduate student in astrophys... \n",
+ "anirban_dutta Hi there! This is Anirban. \n",
+ "erin_visser NaN \n",
+ "abhinav_ohri Hi there! This is Abhinav. \n",
+ "\n",
+ " full_name github_handle \\\n",
+ "id \n",
+ "gracie_tvrdik Gracie Tvrdik gracietv \n",
+ "josh_shields Josh Shields jvshields \n",
+ "anirban_dutta Anirban Dutta Knights-Templars \n",
+ "erin_visser Erin Visser erinvisser \n",
+ "abhinav_ohri Abhinav Ohri KasukabeDefenceForce \n",
+ "\n",
+ " linkedin \\\n",
+ "id \n",
+ "gracie_tvrdik www.linkedin.com/in/grayson-tvrdik-34b7872a7 \n",
+ "josh_shields NaN \n",
+ "anirban_dutta NaN \n",
+ "erin_visser NaN \n",
+ "abhinav_ohri NaN \n",
+ "\n",
+ " email nick_name \\\n",
+ "id \n",
+ "gracie_tvrdik graysontvrdik1@gmail.com NaN \n",
+ "josh_shields shield90@msu.edu Josh \n",
+ "anirban_dutta anirbaniamdutta@gmail.com NaN \n",
+ "erin_visser visserer@msu.edu NaN \n",
+ "abhinav_ohri abhinavohri13@gmail.com NaN \n",
+ "\n",
+ " website \\\n",
+ "id \n",
+ "gracie_tvrdik NaN \n",
+ "josh_shields https://jvshields.github.io/ \n",
+ "anirban_dutta https://sites.google.com/view/anirbaniamdutta \n",
+ "erin_visser NaN \n",
+ "abhinav_ohri NaN \n",
+ "\n",
+ " orcid twitter_handle linkedin_handle \\\n",
+ "id \n",
+ "gracie_tvrdik NaN NaN NaN \n",
+ "josh_shields 0000-0002-1560-5286 NaN NaN \n",
+ "anirban_dutta 0000-0002-7708-3831 Anirban29Dutta anirban-dutta-6a0377238 \n",
+ "erin_visser 0009-0001-8470-275X NaN NaN \n",
+ "abhinav_ohri NaN NaN NaN \n",
+ "\n",
+ " ads \n",
+ "id \n",
+ "gracie_tvrdik NaN \n",
+ "josh_shields NaN \n",
+ "anirban_dutta NaN \n",
+ "erin_visser NaN \n",
+ "abhinav_ohri NaN "
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "loader.members_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8b3545f7-65ef-45ae-93fd-433381fbbe5a",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4f9ef05e-fe14-41c4-9389-131d9a66f299",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "6d7f9710-9ba9-466a-8290-13c52c9e07c7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "# Process current and alumni members\n",
+ "processor = CurrentMemberProcessor(loader.members_df, loader.education_df, loader.experiences_df, loader.projects_df)\n",
+ "processor.process()\n",
+ "\n",
+ "# Add academic role and project info to members_df\n",
+ "loader.members_df[\"academic_role\"] = \"\"\n",
+ "loader.members_df[\"current_project_title\"] = \"\"\n",
+ "\n",
+ "loader.members_df.loc[processor.current_members_with_info.index, \"academic_role\"] = processor.current_members_with_info[\"current_role\"]\n",
+ "loader.members_df.loc[processor.current_members_with_info.index, \"current_project_title\"] = processor.current_members_with_info[\"current_project_title\"]\n",
+ "\n",
+ "alumni_only = processor.alumni_members_with_info.index.difference(processor.current_members_with_info.index)\n",
+ "loader.members_df.loc[alumni_only, \"academic_role\"] = processor.alumni_members_with_info.loc[alumni_only, \"current_role\"]\n",
+ "\n",
+ "processor.alumni_members_with_info = processor.alumni_members_with_info.replace(\"nan\", pd.NA).fillna(\"\")\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "21c8fe04-8b7e-49ab-9742-d56738fb3eb2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved members.csv (34 members)\n",
+ "Saved current_members.csv (10 current members)\n",
+ "Saved alumni_members.csv (24 alumni members)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Save to CSV\n",
+ "loader.members_df.to_csv(\"members.csv\")\n",
+ "processor.current_members_with_info.to_csv(\"current_members.csv\")\n",
+ "processor.alumni_members_with_info.to_csv(\"alumni_members.csv\")\n",
+ "loader.education_df.to_csv(\"education.csv\")\n",
+ "loader.experiences_df.to_csv(\"experiences.csv\")\n",
+ "loader.projects_df.to_csv(\"projects.csv\")\n",
+ "loader.awards_df.to_csv(\"awards.csv\")\n",
+ "loader.outreach_df.to_csv(\"outreach.csv\")\n",
+ "loader.documents_df.to_csv(\"documents.csv\")\n",
+ "\n",
+ "print(f\"Saved members.csv ({len(loader.members_df)} members)\")\n",
+ "print(f\"Saved current_members.csv ({len(processor.current_members_with_info)} current members)\")\n",
+ "print(f\"Saved alumni_members.csv ({len(processor.alumni_members_with_info)} alumni members)\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "95f09b05-33b3-467f-ba70-2f352a19c259",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4f788225-0603-4c85-91db-9a499ec3d52d",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/templates/individual_person.html.j2 b/templates/individual_person.html.j2
index 82d1082..f86cb87 100644
--- a/templates/individual_person.html.j2
+++ b/templates/individual_person.html.j2
@@ -1,6 +1,14 @@
{% extends 'base.html.j2' %}
{% from 'macros.html.j2' import format_years %}
+{% set section_headings = {
+ "education": "Education",
+ "experiences": "Experience",
+ "projects": "Projects",
+ "awards": "Awards & Recognition",
+ "outreach": "Outreach Programs"
+} %}
+
{% block title %}
{{member_data["full_name"]}} | {{ general.website_title }}
{% endblock %}
diff --git a/templates/macros.html.j2 b/templates/macros.html.j2
index 11c2156..1621678 100644
--- a/templates/macros.html.j2
+++ b/templates/macros.html.j2
@@ -11,14 +11,11 @@
{%- endmacro -%}
{%- macro format_years(row) -%}
- {%- if row.get("start_date") is not none -%}
- {{ row["start_date"].year }} -
- {%- if row.get("end_date") is not none -%}
- {{ row["end_date"].year }}
- {%- else -%}
- Present
- {%- endif -%}
- {%- elif row.get("date") is string -%}
- {{ row["date"][:4] }}
+ {%- if row.get("start_date") is not none and row["start_date"]|string != 'NaT' -%}
+ {{ row["start_date"].year }} - {{ row["end_date"].year if (row.get("end_date") is not none and row["end_date"]|string != 'NaT') else "Present" }}
+ {%- elif row.get("end_date") is not none and row["end_date"]|string != 'NaT' -%}
+ {{ row["end_date"].year }}
+ {%- elif row.get("date") is not none and row["date"]|string != 'NaT' -%}
+ {{ row["date"].year }}
{%- endif -%}
{%- endmacro -%}
\ No newline at end of file