From 05ba7fce1901b3e8d3da880d8f564fbb46be5ea3 Mon Sep 17 00:00:00 2001
From: QuanMPhm <qmpham2019@gmail.com>
Date: Thu, 18 Apr 2024 14:22:33 -0400
Subject: [PATCH] Added pre-commit linting check

---
 .github/workflows/pre-commit.yaml  |  16 ++
 .pre-commit-config.yaml            |  16 ++
 README.md                          |   2 +-
 process_report/institute_map.json  |   2 +-
 process_report/process_report.py   | 144 +++++++-----
 process_report/tests/unit_tests.py | 366 ++++++++++++++++++-----------
 requirements.txt                   |   2 +-
 7 files changed, 349 insertions(+), 199 deletions(-)
 create mode 100644 .github/workflows/pre-commit.yaml
 create mode 100644 .pre-commit-config.yaml

diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
new file mode 100644
index 0000000..b16bd03
--- /dev/null
+++ b/.github/workflows/pre-commit.yaml
@@ -0,0 +1,16 @@
+
+name: pre-commit
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - uses: actions/setup-python@v3
+    - uses: pre-commit/action@v3.0.1
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..907cd60
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,16 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: trailing-whitespace
+      - id: check-merge-conflict
+      - id: end-of-file-fixer
+      - id: check-added-large-files
+      - id: check-case-conflict
+      - id: detect-private-key
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.2.1
+    hooks:
+      - id: ruff
+      - id: ruff-format
diff --git a/README.md b/README.md
index 8e1ce38..c4ebc66 100644
--- a/README.md
+++ b/README.md
@@ -69,5 +69,5 @@ In this example, `project foo` will not be billed for September 2023 and August
 
 ## Combine CSVs
 
-This script also combines the 3 separate Invoice data CSVs into 1 Invoice CSV. It combines 
+This script also combines the 3 separate Invoice data CSVs into 1 Invoice CSV. It combines
 OpenShift SU, OpenStack SU, and Storage SU data.
diff --git a/process_report/institute_map.json b/process_report/institute_map.json
index cd2d60d..be959af 100644
--- a/process_report/institute_map.json
+++ b/process_report/institute_map.json
@@ -12,7 +12,7 @@
     "bidmc.harvard.edu"     : "Beth Israel Deaconess Medical Center",
     "fas.harvard.edu"       : "Harvard University",
     "cga.harvard.edu"       : "Harvard University",
-    "iq.harvard.edu"        : "Harvard University", 
+    "iq.harvard.edu"        : "Harvard University",
     "hks.harvard.edu"       : "Harvard University",
     "hsph.harvard.edu"      : "Harvard University",
     "seas.harvard.edu"      : "Harvard University",
diff --git a/process_report/process_report.py b/process_report/process_report.py
index 4e3d55b..b2d2dd9 100644
--- a/process_report/process_report.py
+++ b/process_report/process_report.py
@@ -7,35 +7,35 @@
 
 
 ### Invoice field names
-INVOICE_DATE_FIELD = 'Invoice Month'
-PROJECT_FIELD = 'Project - Allocation'
-PROJECT_ID_FIELD = 'Project - Allocation ID'
-PI_FIELD = 'Manager (PI)'
-INVOICE_EMAIL_FIELD = 'Invoice Email'
-INVOICE_ADDRESS_FIELD = 'Invoice Address'
-INSTITUTION_FIELD = 'Institution'
-INSTITUTION_ID_FIELD = 'Institution - Specific Code'
-SU_HOURS_FIELD = 'SU Hours (GBhr or SUhr)'
-SU_TYPE_FIELD = 'SU Type'
-COST_FIELD = 'Cost'
-CREDIT_FIELD = 'Credit'
-CREDIT_CODE_FIELD = 'Credit Code'
-BALANCE_FIELD = 'Balance'
+INVOICE_DATE_FIELD = "Invoice Month"
+PROJECT_FIELD = "Project - Allocation"
+PROJECT_ID_FIELD = "Project - Allocation ID"
+PI_FIELD = "Manager (PI)"
+INVOICE_EMAIL_FIELD = "Invoice Email"
+INVOICE_ADDRESS_FIELD = "Invoice Address"
+INSTITUTION_FIELD = "Institution"
+INSTITUTION_ID_FIELD = "Institution - Specific Code"
+SU_HOURS_FIELD = "SU Hours (GBhr or SUhr)"
+SU_TYPE_FIELD = "SU Type"
+COST_FIELD = "Cost"
+CREDIT_FIELD = "Credit"
+CREDIT_CODE_FIELD = "Credit Code"
+BALANCE_FIELD = "Balance"
 ###
 
 
 def get_institution_from_pi(institute_map, pi_uname):
-    institution_key = pi_uname.split('@')[-1]
-    institution_name = institute_map.get(institution_key, '')
+    institution_key = pi_uname.split("@")[-1]
+    institution_name = institute_map.get(institution_key, "")
 
-    if institution_name == '':
+    if institution_name == "":
         print(f"Warning: PI name {pi_uname} does not match any institution!")
-    
+
     return institution_name
 
 
 def load_institute_map() -> dict:
-    with open('institute_map.json', 'r') as f:
+    with open("institute_map.json", "r") as f:
         institute_map = json.load(f)
 
     return institute_map
@@ -46,18 +46,18 @@ def load_old_pis(old_pi_file):
 
     try:
         with open(old_pi_file) as f:
-            for pi_info in f: 
-                pi, first_month = pi_info.strip().split(',')
+            for pi_info in f:
+                pi, first_month = pi_info.strip().split(",")
                 old_pi_dict[pi] = first_month
     except FileNotFoundError:
-        print('Applying credit 0002 failed. Old PI file does not exist')
+        print("Applying credit 0002 failed. Old PI file does not exist")
         sys.exit(1)
-    
+
     return old_pi_dict
 
 
 def is_old_pi(old_pi_dict, pi, invoice_month):
-    if pi in old_pi_dict and old_pi_dict[pi] != invoice_month: 
+    if pi in old_pi_dict and old_pi_dict[pi] != invoice_month:
         return True
     return False
 
@@ -97,24 +97,24 @@ def main():
         "--output-folder",
         required=False,
         default="pi_invoices",
-        help="Name of output folder containing pi-specific invoice csvs"
+        help="Name of output folder containing pi-specific invoice csvs",
     )
     parser.add_argument(
         "--HU-invoice-file",
         required=False,
         default="HU_only.csv",
-        help="Name of output csv for HU invoices"
+        help="Name of output csv for HU invoices",
     )
     parser.add_argument(
         "--HU-BU-invoice-file",
         required=False,
         default="HU_BU.csv",
-        help="Name of output csv for HU and BU invoices"
+        help="Name of output csv for HU and BU invoices",
     )
     parser.add_argument(
         "--old-pi-file",
         required=False,
-        help="Name of csv file listing previously billed PIs"
+        help="Name of csv file listing previously billed PIs",
     )
     args = parser.parse_args()
     merged_dataframe = merge_csv(args.csv_files)
@@ -167,7 +167,7 @@ def get_invoice_date(dataframe):
     be the same for every row.
     """
     invoice_date_str = dataframe[INVOICE_DATE_FIELD][0]
-    invoice_date = pandas.to_datetime(invoice_date_str, format='%Y-%m')
+    invoice_date = pandas.to_datetime(invoice_date_str, format="%Y-%m")
     return invoice_date
 
 
@@ -176,16 +176,22 @@ def timed_projects(timed_projects_file, invoice_date):
     dataframe = pandas.read_csv(timed_projects_file)
 
     # convert to pandas timestamp objects
-    dataframe['Start Date'] = pandas.to_datetime(dataframe['Start Date'], format="%Y-%m")
-    dataframe['End Date'] = pandas.to_datetime(dataframe['End Date'], format="%Y-%m")
+    dataframe["Start Date"] = pandas.to_datetime(
+        dataframe["Start Date"], format="%Y-%m"
+    )
+    dataframe["End Date"] = pandas.to_datetime(dataframe["End Date"], format="%Y-%m")
 
-    mask = (dataframe['Start Date'] <= invoice_date) & (invoice_date <= dataframe['End Date'])
-    return dataframe[mask]['Project'].to_list()
+    mask = (dataframe["Start Date"] <= invoice_date) & (
+        invoice_date <= dataframe["End Date"]
+    )
+    return dataframe[mask]["Project"].to_list()
 
 
 def remove_non_billables(dataframe, pi, projects):
     """Removes projects and PIs that should not be billed from the dataframe"""
-    filtered_dataframe = dataframe[~dataframe[PI_FIELD].isin(pi) & ~dataframe[PROJECT_FIELD].isin(projects)]
+    filtered_dataframe = dataframe[
+        ~dataframe[PI_FIELD].isin(pi) & ~dataframe[PROJECT_FIELD].isin(projects)
+    ]
     return filtered_dataframe
 
 
@@ -194,14 +200,16 @@ def remove_billables(dataframe, pi, projects, output_file):
 
     So this *keeps* the projects/pis that should not be billed.
     """
-    filtered_dataframe = dataframe[dataframe[PI_FIELD].isin(pi) | dataframe[PROJECT_FIELD].isin(projects)]
+    filtered_dataframe = dataframe[
+        dataframe[PI_FIELD].isin(pi) | dataframe[PROJECT_FIELD].isin(projects)
+    ]
     filtered_dataframe.to_csv(output_file, index=False)
 
 
 def validate_pi_names(dataframe):
     invalid_pi_projects = dataframe[pandas.isna(dataframe[PI_FIELD])]
     for i, row in invalid_pi_projects.iterrows():
-        print(f'Warning: Project {row[PROJECT_FIELD]} has empty PI field')
+        print(f"Warning: Project {row[PROJECT_FIELD]} has empty PI field")
     dataframe = dataframe[~pandas.isna(dataframe[PI_FIELD])]
 
     return dataframe
@@ -219,11 +227,13 @@ def export_pi_billables(dataframe: pandas.DataFrame, output_folder):
     pi_list = dataframe[PI_FIELD].unique()
 
     for pi in pi_list:
-        if pandas.isna(pi): 
+        if pandas.isna(pi):
             continue
         pi_projects = dataframe[dataframe[PI_FIELD] == pi]
         pi_instituition = pi_projects[INSTITUTION_FIELD].iat[0]
-        pi_projects.to_csv(output_folder + f"/{pi_instituition}_{pi}_{invoice_month}.csv")
+        pi_projects.to_csv(
+            output_folder + f"/{pi_instituition}_{pi}_{invoice_month}.csv"
+        )
 
 
 def apply_credits_new_pi(dataframe, old_pi_file):
@@ -251,24 +261,24 @@ def apply_credits_new_pi(dataframe, old_pi_file):
                 project_cost = row[COST_FIELD]
                 applied_credit = min(project_cost, remaining_credit)
 
-                dataframe.at[i, CREDIT_FIELD] =  applied_credit
+                dataframe.at[i, CREDIT_FIELD] = applied_credit
                 dataframe.at[i, CREDIT_CODE_FIELD] = new_pi_credit_code
                 dataframe.at[i, BALANCE_FIELD] = row[COST_FIELD] - applied_credit
                 remaining_credit -= applied_credit
 
                 if remaining_credit == 0:
                     break
-    
+
     return dataframe
 
 
 def add_institution(dataframe: pandas.DataFrame):
     """Determine every PI's institution name, logging any PI whose institution cannot be determined
-    This is performed by `get_institution_from_pi()`, which tries to match the PI's username to 
+    This is performed by `get_institution_from_pi()`, which tries to match the PI's username to
     a list of known institution email domains (i.e bu.edu), or to several edge cases (i.e rudolph) if
     the username is not an email address.
-    
-    Exact matches are then mapped to the corresponding institution name. 
+
+    Exact matches are then mapped to the corresponding institution name.
 
     I.e "foo@bu.edu" would match with "bu.edu", which maps to the instition name "Boston University"
 
@@ -277,42 +287,50 @@ def add_institution(dataframe: pandas.DataFrame):
     institute_map = load_institute_map()
     for i, row in dataframe.iterrows():
         pi_name = row[PI_FIELD]
-        if pandas.isna(pi_name): 
+        if pandas.isna(pi_name):
             print(f"Project {row[PROJECT_FIELD]} has no PI")
-        else: 
-            dataframe.at[i, INSTITUTION_FIELD] = get_institution_from_pi(institute_map, pi_name)
+        else:
+            dataframe.at[i, INSTITUTION_FIELD] = get_institution_from_pi(
+                institute_map, pi_name
+            )
 
     return dataframe
 
 
 def export_HU_only(dataframe, output_file):
-    HU_projects = dataframe[dataframe[INSTITUTION_FIELD] == 'Harvard University']
+    HU_projects = dataframe[dataframe[INSTITUTION_FIELD] == "Harvard University"]
     HU_projects.to_csv(output_file)
 
 
 def export_HU_BU(dataframe, output_file):
-    HU_BU_projects = dataframe[(dataframe[INSTITUTION_FIELD] == 'Harvard University') | 
-                               (dataframe[INSTITUTION_FIELD] == 'Boston University')]
-    HU_BU_projects.to_csv(output_file)        
+    HU_BU_projects = dataframe[
+        (dataframe[INSTITUTION_FIELD] == "Harvard University")
+        | (dataframe[INSTITUTION_FIELD] == "Boston University")
+    ]
+    HU_BU_projects.to_csv(output_file)
 
 
 def export_lenovo(dataframe: pandas.DataFrame, output_file=None):
+    lenovo_file_name = (
+        output_file or f"Lenovo_{dataframe[INVOICE_DATE_FIELD].iat[0]}.csv"
+    )
 
-    lenovo_file_name = output_file or f'Lenovo_{dataframe[INVOICE_DATE_FIELD].iat[0]}.csv'
-
-    LENOVO_SU_TYPES = ['OpenShift GPUA100SXM4', 'OpenStack GPUA100SXM4']
+    LENOVO_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]
     SU_CHARGE_MULTIPLIER = 1
 
-    lenovo_df = dataframe[dataframe[SU_TYPE_FIELD].isin(LENOVO_SU_TYPES)][[
-            INVOICE_DATE_FIELD, 
-            PROJECT_FIELD, 
-            INSTITUTION_FIELD, 
-            SU_HOURS_FIELD, 
-            SU_TYPE_FIELD]]
-    
-    lenovo_df.rename(columns={SU_HOURS_FIELD: 'SU Hours'}, inplace=True)
-    lenovo_df.insert(len(lenovo_df.columns), 'SU Charge', SU_CHARGE_MULTIPLIER)
-    lenovo_df['Charge'] = lenovo_df['SU Hours'] * lenovo_df['SU Charge']
+    lenovo_df = dataframe[dataframe[SU_TYPE_FIELD].isin(LENOVO_SU_TYPES)][
+        [
+            INVOICE_DATE_FIELD,
+            PROJECT_FIELD,
+            INSTITUTION_FIELD,
+            SU_HOURS_FIELD,
+            SU_TYPE_FIELD,
+        ]
+    ]
+
+    lenovo_df.rename(columns={SU_HOURS_FIELD: "SU Hours"}, inplace=True)
+    lenovo_df.insert(len(lenovo_df.columns), "SU Charge", SU_CHARGE_MULTIPLIER)
+    lenovo_df["Charge"] = lenovo_df["SU Hours"] * lenovo_df["SU Charge"]
     lenovo_df.to_csv(lenovo_file_name)
 
 
diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py
index 35676f1..8f0c381 100644
--- a/process_report/tests/unit_tests.py
+++ b/process_report/tests/unit_tests.py
@@ -1,5 +1,4 @@
 from unittest import TestCase
-from unittest import skipIf
 import tempfile
 import pandas
 import os
@@ -8,10 +7,11 @@
 
 from process_report import process_report
 
+
 class TestGetInvoiceDate(TestCase):
     def test_get_invoice_date(self):
         # The month in sample data is not the same
-        data = {'Invoice Month': ['2023-01', '2023-02', '2023-03']}
+        data = {"Invoice Month": ["2023-01", "2023-02", "2023-03"]}
         dataframe = pandas.DataFrame(data)
 
         invoice_date = process_report.get_invoice_date(dataframe)
@@ -19,26 +19,27 @@ def test_get_invoice_date(self):
         self.assertIsInstance(invoice_date, pandas.Timestamp)
 
         # Assert that the invoice_date is the first item
-        expected_date = pandas.Timestamp('2023-01')
+        expected_date = pandas.Timestamp("2023-01")
         self.assertEqual(invoice_date, expected_date)
 
 
 class TestTimedProjects(TestCase):
     def setUp(self):
-
         # Without the dedent method, our data will have leading spaces which
         # messes up the first key. Also the '\' is imporant to ignore the first
         # new line we added so it's more readable in code.
-        self.csv_data = dedent("""\
+        self.csv_data = dedent(
+            """\
         Project,Start Date,End Date
         ProjectA,2022-09,2023-08
         ProjectB,2022-09,2023-09
         ProjectC,2023-09,2024-08
         ProjectD,2022-09,2024-08
-        """)
-        self.invoice_date = pandas.Timestamp('2023-09')
+        """
+        )
+        self.invoice_date = pandas.Timestamp("2023-09")
 
-        self.csv_file = tempfile.NamedTemporaryFile(delete=False, mode='w')
+        self.csv_file = tempfile.NamedTemporaryFile(delete=False, mode="w")
         self.csv_file.write(self.csv_data)
         self.csv_file.close()
 
@@ -46,24 +47,31 @@ def tearDown(self):
         os.remove(self.csv_file.name)
 
     def test_timed_projects(self):
-        excluded_projects = process_report.timed_projects(self.csv_file.name, self.invoice_date)
+        excluded_projects = process_report.timed_projects(
+            self.csv_file.name, self.invoice_date
+        )
 
-        expected_projects = ['ProjectB', 'ProjectC', 'ProjectD']
+        expected_projects = ["ProjectB", "ProjectC", "ProjectD"]
         self.assertEqual(excluded_projects, expected_projects)
 
 
 class TestRemoveNonBillables(TestCase):
     def setUp(self):
-
         data = {
-            'Manager (PI)': ['PI1', 'PI2', 'PI3', 'PI4', 'PI5'],
-            'Project - Allocation': ['ProjectA', 'ProjectB', 'ProjectC', 'ProjectD', 'ProjectE'],
-            'Untouch Data Column': ['DataA', 'DataB', 'DataC', 'DataD', 'DataE']
+            "Manager (PI)": ["PI1", "PI2", "PI3", "PI4", "PI5"],
+            "Project - Allocation": [
+                "ProjectA",
+                "ProjectB",
+                "ProjectC",
+                "ProjectD",
+                "ProjectE",
+            ],
+            "Untouch Data Column": ["DataA", "DataB", "DataC", "DataD", "DataE"],
         }
         self.dataframe = pandas.DataFrame(data)
 
-        self.pi_to_exclude = ['PI2', 'PI3']
-        self.projects_to_exclude = ['ProjectB', 'ProjectD']
+        self.pi_to_exclude = ["PI2", "PI3"]
+        self.projects_to_exclude = ["ProjectB", "ProjectD"]
 
         self.output_file = tempfile.NamedTemporaryFile(delete=False)
         self.output_file2 = tempfile.NamedTemporaryFile(delete=False)
@@ -73,54 +81,67 @@ def tearDown(self):
         os.remove(self.output_file2.name)
 
     def test_remove_non_billables(self):
-        billables_df = process_report.remove_non_billables(self.dataframe, self.pi_to_exclude, self.projects_to_exclude)
+        billables_df = process_report.remove_non_billables(
+            self.dataframe, self.pi_to_exclude, self.projects_to_exclude
+        )
         process_report.export_billables(billables_df, self.output_file.name)
 
         result_df = pandas.read_csv(self.output_file.name)
 
-        self.assertNotIn('PI2', result_df['Manager (PI)'].tolist())
-        self.assertNotIn('PI3', result_df['Manager (PI)'].tolist())
-        self.assertNotIn('PI4', result_df['Manager (PI)'].tolist()) # indirect because ProjectD was removed
-        self.assertNotIn('ProjectB', result_df['Project - Allocation'].tolist())
-        self.assertNotIn('ProjectC', result_df['Project - Allocation'].tolist()) # indirect because PI3 was removed
-        self.assertNotIn('ProjectD', result_df['Project - Allocation'].tolist())
-
-        self.assertIn('PI1', result_df['Manager (PI)'].tolist())
-        self.assertIn('PI5', result_df['Manager (PI)'].tolist())
-        self.assertIn('ProjectA', result_df['Project - Allocation'].tolist())
-        self.assertIn('ProjectE', result_df['Project - Allocation'].tolist())
+        self.assertNotIn("PI2", result_df["Manager (PI)"].tolist())
+        self.assertNotIn("PI3", result_df["Manager (PI)"].tolist())
+        self.assertNotIn(
+            "PI4", result_df["Manager (PI)"].tolist()
+        )  # indirect because ProjectD was removed
+        self.assertNotIn("ProjectB", result_df["Project - Allocation"].tolist())
+        self.assertNotIn(
+            "ProjectC", result_df["Project - Allocation"].tolist()
+        )  # indirect because PI3 was removed
+        self.assertNotIn("ProjectD", result_df["Project - Allocation"].tolist())
+
+        self.assertIn("PI1", result_df["Manager (PI)"].tolist())
+        self.assertIn("PI5", result_df["Manager (PI)"].tolist())
+        self.assertIn("ProjectA", result_df["Project - Allocation"].tolist())
+        self.assertIn("ProjectE", result_df["Project - Allocation"].tolist())
 
     def test_remove_billables(self):
-        process_report.remove_billables(self.dataframe, self.pi_to_exclude, self.projects_to_exclude, self.output_file2.name)
+        process_report.remove_billables(
+            self.dataframe,
+            self.pi_to_exclude,
+            self.projects_to_exclude,
+            self.output_file2.name,
+        )
 
         result_df = pandas.read_csv(self.output_file2.name)
 
-        self.assertIn('PI2', result_df['Manager (PI)'].tolist())
-        self.assertIn('PI3', result_df['Manager (PI)'].tolist())
-        self.assertIn('PI4', result_df['Manager (PI)'].tolist())
-        self.assertIn('ProjectB', result_df['Project - Allocation'].tolist())
-        self.assertIn('ProjectC', result_df['Project - Allocation'].tolist())
-        self.assertIn('ProjectD', result_df['Project - Allocation'].tolist())
+        self.assertIn("PI2", result_df["Manager (PI)"].tolist())
+        self.assertIn("PI3", result_df["Manager (PI)"].tolist())
+        self.assertIn("PI4", result_df["Manager (PI)"].tolist())
+        self.assertIn("ProjectB", result_df["Project - Allocation"].tolist())
+        self.assertIn("ProjectC", result_df["Project - Allocation"].tolist())
+        self.assertIn("ProjectD", result_df["Project - Allocation"].tolist())
 
-        self.assertNotIn('PI1', result_df['Manager (PI)'].tolist())
-        self.assertNotIn('PI5', result_df['Manager (PI)'].tolist())
-        self.assertNotIn('ProjectA', result_df['Project - Allocation'].tolist())
-        self.assertNotIn('ProjectE', result_df['Project - Allocation'].tolist())
+        self.assertNotIn("PI1", result_df["Manager (PI)"].tolist())
+        self.assertNotIn("PI5", result_df["Manager (PI)"].tolist())
+        self.assertNotIn("ProjectA", result_df["Project - Allocation"].tolist())
+        self.assertNotIn("ProjectE", result_df["Project - Allocation"].tolist())
 
 
 class TestMergeCSV(TestCase):
     def setUp(self):
-        self.header = ['ID', 'Name', 'Age']
+        self.header = ["ID", "Name", "Age"]
         self.data = [
-            [1, 'Alice', 25],
-            [2, 'Bob', 30],
-            [3, 'Charlie', 28],
+            [1, "Alice", 25],
+            [2, "Bob", 30],
+            [3, "Charlie", 28],
         ]
 
         self.csv_files = []
 
         for _ in range(3):
-            csv_file = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.csv')
+            csv_file = tempfile.NamedTemporaryFile(
+                delete=False, mode="w", suffix=".csv"
+            )
             self.csv_files.append(csv_file)
             dataframe = pandas.DataFrame(self.data, columns=self.header)
             dataframe.to_csv(csv_file, index=False)
@@ -131,10 +152,14 @@ def tearDown(self):
             os.remove(csv_file.name)
 
     def test_merge_csv(self):
-        merged_dataframe = process_report.merge_csv([csv_file.name for csv_file in self.csv_files])
+        merged_dataframe = process_report.merge_csv(
+            [csv_file.name for csv_file in self.csv_files]
+        )
 
         expected_rows = len(self.data) * 3
-        self.assertEqual(len(merged_dataframe), expected_rows) # `len` for a pandas dataframe excludes the header row
+        self.assertEqual(
+            len(merged_dataframe), expected_rows
+        )  # `len` for a pandas dataframe excludes the header row
 
         # Assert that the headers in the merged DataFrame match the expected headers
         self.assertListEqual(merged_dataframe.columns.tolist(), self.header)
@@ -142,13 +167,18 @@ def test_merge_csv(self):
 
 class TestExportPICSV(TestCase):
     def setUp(self):
-
         data = {
-            'Invoice Month': ['2023-01','2023-01','2023-01','2023-01','2023-01'],
-            'Manager (PI)': ['PI1', 'PI1', 'PI1', 'PI2', 'PI2'],
-            'Institution': ['BU', 'BU', 'BU', 'HU', 'HU'],
-            'Project - Allocation': ['ProjectA', 'ProjectB', 'ProjectC', 'ProjectD', 'ProjectE'],
-            'Untouch Data Column': ['DataA', 'DataB', 'DataC', 'DataD', 'DataE']
+            "Invoice Month": ["2023-01", "2023-01", "2023-01", "2023-01", "2023-01"],
+            "Manager (PI)": ["PI1", "PI1", "PI1", "PI2", "PI2"],
+            "Institution": ["BU", "BU", "BU", "HU", "HU"],
+            "Project - Allocation": [
+                "ProjectA",
+                "ProjectB",
+                "ProjectC",
+                "ProjectD",
+                "ProjectE",
+            ],
+            "Untouch Data Column": ["DataA", "DataB", "DataC", "DataD", "DataE"],
         }
         self.dataframe = pandas.DataFrame(data)
 
@@ -160,70 +190,101 @@ def test_export_pi(self):
         pi_csv_2 = f'{self.dataframe["Institution"][3]}_{self.dataframe["Manager (PI)"][3]}_{self.dataframe["Invoice Month"][3]}.csv'
         self.assertIn(pi_csv_1, os.listdir(output_dir.name))
         self.assertIn(pi_csv_2, os.listdir(output_dir.name))
-        self.assertEqual(len(os.listdir(output_dir.name)), len(self.dataframe['Manager (PI)'].unique()))
+        self.assertEqual(
+            len(os.listdir(output_dir.name)),
+            len(self.dataframe["Manager (PI)"].unique()),
+        )
 
-        pi_df = pandas.read_csv(output_dir.name + '/' + pi_csv_1)
-        self.assertEqual(len(pi_df['Manager (PI)'].unique()), 1)
-        self.assertEqual(pi_df['Manager (PI)'].unique()[0], self.dataframe['Manager (PI)'][0])
+        pi_df = pandas.read_csv(output_dir.name + "/" + pi_csv_1)
+        self.assertEqual(len(pi_df["Manager (PI)"].unique()), 1)
+        self.assertEqual(
+            pi_df["Manager (PI)"].unique()[0], self.dataframe["Manager (PI)"][0]
+        )
 
-        self.assertIn('ProjectA', pi_df['Project - Allocation'].tolist())
-        self.assertIn('ProjectB', pi_df['Project - Allocation'].tolist())
-        self.assertIn('ProjectC', pi_df['Project - Allocation'].tolist())
+        self.assertIn("ProjectA", pi_df["Project - Allocation"].tolist())
+        self.assertIn("ProjectB", pi_df["Project - Allocation"].tolist())
+        self.assertIn("ProjectC", pi_df["Project - Allocation"].tolist())
 
-        pi_df = pandas.read_csv(output_dir.name + '/' + pi_csv_2)
-        self.assertEqual(len(pi_df['Manager (PI)'].unique()), 1)
-        self.assertEqual(pi_df['Manager (PI)'].unique()[0], self.dataframe['Manager (PI)'][3])
+        pi_df = pandas.read_csv(output_dir.name + "/" + pi_csv_2)
+        self.assertEqual(len(pi_df["Manager (PI)"].unique()), 1)
+        self.assertEqual(
+            pi_df["Manager (PI)"].unique()[0], self.dataframe["Manager (PI)"][3]
+        )
 
-        self.assertIn('ProjectD', pi_df['Project - Allocation'].tolist())
-        self.assertIn('ProjectE', pi_df['Project - Allocation'].tolist())
-        self.assertNotIn('ProjectA', pi_df['Project - Allocation'].tolist())
-        self.assertNotIn('ProjectB', pi_df['Project - Allocation'].tolist())
-        self.assertNotIn('ProjectC', pi_df['Project - Allocation'].tolist())
+        self.assertIn("ProjectD", pi_df["Project - Allocation"].tolist())
+        self.assertIn("ProjectE", pi_df["Project - Allocation"].tolist())
+        self.assertNotIn("ProjectA", pi_df["Project - Allocation"].tolist())
+        self.assertNotIn("ProjectB", pi_df["Project - Allocation"].tolist())
+        self.assertNotIn("ProjectC", pi_df["Project - Allocation"].tolist())
 
 
 class TestGetInstitute(TestCase):
     def test_get_pi_institution(self):
-
         institute_map = {
-            "harvard.edu"           : "Harvard University",
-            "bu.edu"                : "Boston University",
-            "bentley.edu"           : "Bentley",
-            "mclean.harvard.edu"    : "McLean Hospital",
-            "meei.harvard.edu"      : "Massachusetts Eye & Ear",
-            "dfci.harvard.edu"      : "Dana-Farber Cancer Institute",
-            "northeastern.edu"      : "Northeastern University",
+            "harvard.edu": "Harvard University",
+            "bu.edu": "Boston University",
+            "bentley.edu": "Bentley",
+            "mclean.harvard.edu": "McLean Hospital",
+            "meei.harvard.edu": "Massachusetts Eye & Ear",
+            "dfci.harvard.edu": "Dana-Farber Cancer Institute",
+            "northeastern.edu": "Northeastern University",
         }
-        
+
         self.assertEqual(
-            process_report.get_institution_from_pi(institute_map, "quanmp@bu.edu"), "Boston University"
+            process_report.get_institution_from_pi(institute_map, "quanmp@bu.edu"),
+            "Boston University",
         )
         self.assertEqual(
-            process_report.get_institution_from_pi(institute_map, "c@mclean.harvard.edu"), "McLean Hospital"
+            process_report.get_institution_from_pi(
+                institute_map, "c@mclean.harvard.edu"
+            ),
+            "McLean Hospital",
         )
         self.assertEqual(
-            process_report.get_institution_from_pi(institute_map, "b@harvard.edu"), "Harvard University"
+            process_report.get_institution_from_pi(institute_map, "b@harvard.edu"),
+            "Harvard University",
         )
         self.assertEqual(
             process_report.get_institution_from_pi(institute_map, "fake"), ""
         )
         self.assertEqual(
-            process_report.get_institution_from_pi(institute_map, "pi@northeastern.edu"), "Northeastern University"
+            process_report.get_institution_from_pi(
+                institute_map, "pi@northeastern.edu"
+            ),
+            "Northeastern University",
         )
 
 
 class TestCredit0002(TestCase):
     def setUp(self):
-
         data = {
-            'Invoice Month': ['2024-03','2024-03','2024-03','2024-03','2024-03','2024-03'],
-            'Manager (PI)': ['PI1', 'PI1', 'PI2', 'PI3', 'PI4', 'PI4'],
-            'Project - Allocation': ['ProjectA', 'ProjectB', 'ProjectC', 'ProjectD', 'ProjectE', 'ProjectF'],
-            'Cost': [10, 100, 10000, 5000, 800, 1000]
+            "Invoice Month": [
+                "2024-03",
+                "2024-03",
+                "2024-03",
+                "2024-03",
+                "2024-03",
+                "2024-03",
+            ],
+            "Manager (PI)": ["PI1", "PI1", "PI2", "PI3", "PI4", "PI4"],
+            "Project - Allocation": [
+                "ProjectA",
+                "ProjectB",
+                "ProjectC",
+                "ProjectD",
+                "ProjectE",
+                "ProjectF",
+            ],
+            "Cost": [10, 100, 10000, 5000, 800, 1000],
         }
         self.dataframe = pandas.DataFrame(data)
-        old_pi = ['PI2,2023-09', 'PI3,2024-02', 'PI4,2024-03'] # Case with old and new pi in pi file
-        old_pi_file = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.csv')
-        for pi in old_pi: 
+        old_pi = [
+            "PI2,2023-09",
+            "PI3,2024-02",
+            "PI4,2024-03",
+        ]  # Case with old and new pi in pi file
+        old_pi_file = tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".csv")
+        for pi in old_pi:
             old_pi_file.write(pi + "\n")
         self.old_pi_file = old_pi_file.name
 
@@ -231,84 +292,123 @@ def tearDown(self):
         os.remove(self.old_pi_file)
 
     def test_apply_credit_0002(self):
-        dataframe = process_report.apply_credits_new_pi(self.dataframe, self.old_pi_file)
+        dataframe = process_report.apply_credits_new_pi(
+            self.dataframe, self.old_pi_file
+        )
 
-        self.assertTrue('Credit' in dataframe)
-        self.assertTrue('Credit Code' in dataframe)
-        self.assertTrue('Balance' in dataframe)
+        self.assertTrue("Credit" in dataframe)
+        self.assertTrue("Credit Code" in dataframe)
+        self.assertTrue("Balance" in dataframe)
 
-        non_credited_project = dataframe[pandas.isna(dataframe['Credit Code'])]
-        credited_projects = dataframe[dataframe['Credit Code'] == '0002']
+        non_credited_project = dataframe[pandas.isna(dataframe["Credit Code"])]
+        credited_projects = dataframe[dataframe["Credit Code"] == "0002"]
 
         self.assertEqual(2, len(non_credited_project))
-        self.assertEqual(non_credited_project.loc[2, 'Cost'], non_credited_project.loc[2, 'Balance'])
-        self.assertEqual(non_credited_project.loc[3, 'Cost'], non_credited_project.loc[3, 'Balance'])
-
+        self.assertEqual(
+            non_credited_project.loc[2, "Cost"], non_credited_project.loc[2, "Balance"]
+        )
+        self.assertEqual(
+            non_credited_project.loc[3, "Cost"], non_credited_project.loc[3, "Balance"]
+        )
 
         self.assertEqual(4, len(credited_projects.index))
-        self.assertTrue('PI2' not in credited_projects['Manager (PI)'].unique())
-        self.assertTrue('PI3' not in credited_projects['Manager (PI)'].unique())
+        self.assertTrue("PI2" not in credited_projects["Manager (PI)"].unique())
+        self.assertTrue("PI3" not in credited_projects["Manager (PI)"].unique())
 
-        self.assertEqual(10, credited_projects.loc[0, 'Credit'])
-        self.assertEqual(100, credited_projects.loc[1, 'Credit'])
-        self.assertEqual(800, credited_projects.loc[4, 'Credit'])
-        self.assertEqual(200, credited_projects.loc[5, 'Credit'])
+        self.assertEqual(10, credited_projects.loc[0, "Credit"])
+        self.assertEqual(100, credited_projects.loc[1, "Credit"])
+        self.assertEqual(800, credited_projects.loc[4, "Credit"])
+        self.assertEqual(200, credited_projects.loc[5, "Credit"])
 
-        self.assertEqual(0, credited_projects.loc[0, 'Balance'])
-        self.assertEqual(0, credited_projects.loc[1, 'Balance'])
-        self.assertEqual(0, credited_projects.loc[4, 'Balance'])
-        self.assertEqual(800, credited_projects.loc[5, 'Balance'])
+        self.assertEqual(0, credited_projects.loc[0, "Balance"])
+        self.assertEqual(0, credited_projects.loc[1, "Balance"])
+        self.assertEqual(0, credited_projects.loc[4, "Balance"])
+        self.assertEqual(800, credited_projects.loc[5, "Balance"])
 
 
 class TestValidateBillables(TestCase):
-
     def setUp(self):
-
         data = {
-            'Manager (PI)': ['PI1', math.nan, 'PI1', 'PI2', 'PI2'],
-            'Project - Allocation': ['ProjectA', 'ProjectB', 'ProjectC', 'ProjectD', 'ProjectE'],
+            "Manager (PI)": ["PI1", math.nan, "PI1", "PI2", "PI2"],
+            "Project - Allocation": [
+                "ProjectA",
+                "ProjectB",
+                "ProjectC",
+                "ProjectD",
+                "ProjectE",
+            ],
         }
         self.dataframe = pandas.DataFrame(data)
 
     def test_validate_billables(self):
-        self.assertEqual(1, len(self.dataframe[pandas.isna(self.dataframe['Manager (PI)'])]))
+        self.assertEqual(
+            1, len(self.dataframe[pandas.isna(self.dataframe["Manager (PI)"])])
+        )
         validated_df = process_report.validate_pi_names(self.dataframe)
-        self.assertEqual(0, len(validated_df[pandas.isna(validated_df['Manager (PI)'])]))
+        self.assertEqual(
+            0, len(validated_df[pandas.isna(validated_df["Manager (PI)"])])
+        )
 
 
 class TestExportLenovo(TestCase):
     def setUp(self):
-
         data = {
-            'Invoice Month': ['2023-01','2023-01','2023-01','2023-01','2023-01', '2023-01'],
-            'Project - Allocation': ['ProjectA', 'ProjectB', 'ProjectC', 'ProjectD', 'ProjectE', 'ProjectF'],
-            'Institution': ['A', 'B', 'C', 'D', 'E', 'F'],
-            'SU Hours (GBhr or SUhr)': [1, 10, 100, 4, 432, 10],
-            'SU Type': ['OpenShift GPUA100SXM4', 'OpenShift GPUA100', 'OpenShift GPUA100SXM4', 'OpenStack GPUA100SXM4', 'OpenStack CPU', 'OpenStack GPUK80']
+            "Invoice Month": [
+                "2023-01",
+                "2023-01",
+                "2023-01",
+                "2023-01",
+                "2023-01",
+                "2023-01",
+            ],
+            "Project - Allocation": [
+                "ProjectA",
+                "ProjectB",
+                "ProjectC",
+                "ProjectD",
+                "ProjectE",
+                "ProjectF",
+            ],
+            "Institution": ["A", "B", "C", "D", "E", "F"],
+            "SU Hours (GBhr or SUhr)": [1, 10, 100, 4, 432, 10],
+            "SU Type": [
+                "OpenShift GPUA100SXM4",
+                "OpenShift GPUA100",
+                "OpenShift GPUA100SXM4",
+                "OpenStack GPUA100SXM4",
+                "OpenStack CPU",
+                "OpenStack GPUK80",
+            ],
         }
         self.dataframe = pandas.DataFrame(data)
 
-        output_file = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.csv')
+        output_file = tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".csv")
         self.output_file = output_file.name
 
     def tearDown(self):
         os.remove(self.output_file)
 
-
     def test_apply_credit_0002(self):
         process_report.export_lenovo(self.dataframe, self.output_file)
         output_df = pandas.read_csv(self.output_file)
 
-        self.assertTrue(set([
-            process_report.INVOICE_DATE_FIELD, 
-            process_report.PROJECT_FIELD, 
-            process_report.INSTITUTION_FIELD, 
-            process_report.SU_TYPE_FIELD,
-            'SU Hours', 
-            'SU Charge',
-            'Charge',
-        ]).issubset(output_df))
-        
+        self.assertTrue(
+            set(
+                [
+                    process_report.INVOICE_DATE_FIELD,
+                    process_report.PROJECT_FIELD,
+                    process_report.INSTITUTION_FIELD,
+                    process_report.SU_TYPE_FIELD,
+                    "SU Hours",
+                    "SU Charge",
+                    "Charge",
+                ]
+            ).issubset(output_df)
+        )
+
         for i, row in output_df.iterrows():
-            self.assertIn(row[process_report.SU_TYPE_FIELD], ['OpenShift GPUA100SXM4', 'OpenStack GPUA100SXM4'])
-            self.assertEqual(row['Charge'], row['SU Charge'] * row['SU Hours'])
+            self.assertIn(
+                row[process_report.SU_TYPE_FIELD],
+                ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"],
+            )
+            self.assertEqual(row["Charge"], row["SU Charge"] * row["SU Hours"])
diff --git a/requirements.txt b/requirements.txt
index 1411a4a..fb6c7ed 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1 @@
-pandas
\ No newline at end of file
+pandas