Since all current discounts have been implemented as processors,

the `DiscountInvoice` class is now removed. No invoice class will now class `_prepare()` or `_process()`. `BUSubsidyProcessor`, which handles processing for the BU subsidy, sets `IS_DISCOUNT_BY_NERC` to `False` because the subsidy is not provided by NERC. Because of this, `BU Balance` indicates the money which BU (not the PI they are subsidizing) owes to the MGHPCC. The test cases for the BU Subsidy has been refactored to be more robust and readable. The `Project` field has been added to `invoice.py`
CCI-MOC · Nov 19, 2024 · c81622d · c81622d
1 parent b9d8086
commit c81622d
Show file tree

Hide file tree

Showing 7 changed files with 211 additions and 251 deletions.
diff --git a/process_report/invoices/bu_internal_invoice.py b/process_report/invoices/bu_internal_invoice.py
@@ -1,12 +1,10 @@
 from dataclasses import dataclass
-from decimal import Decimal
 
 import process_report.invoices.invoice as invoice
-import process_report.invoices.discount_invoice as discount_invoice
 
 
 @dataclass
-class BUInternalInvoice(discount_invoice.DiscountInvoice):
+class BUInternalInvoice(invoice.Invoice):
     """
     This invoice operates on data processed by these Processors:
     - ValidateBillablePIsProcessor
@@ -20,39 +18,32 @@ class BUInternalInvoice(discount_invoice.DiscountInvoice):
         invoice.COST_FIELD,
         invoice.CREDIT_FIELD,
         invoice.SUBSIDY_FIELD,
-        invoice.BALANCE_FIELD,
+        invoice.PI_BALANCE_FIELD,
     ]
 
-    subsidy_amount: int
-
-    def _prepare(self):
-        def get_project(row):
-            project_alloc = row[invoice.PROJECT_FIELD]
-            if project_alloc.rfind("-") == -1:
-                return project_alloc
-            else:
-                return project_alloc[: project_alloc.rfind("-")]
+    exported_columns_map = {invoice.PI_BALANCE_FIELD: "Balance"}
 
+    def _prepare_export(self):
         self.data = self.data[
             self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD]
         ]
         self.data = self.data[
             self.data[invoice.INSTITUTION_FIELD] == "Boston University"
-        ].copy()
-        self.data["Project"] = self.data.apply(get_project, axis=1)
-        self.data[invoice.SUBSIDY_FIELD] = Decimal(0)
-
-    def _process(self):
-        data_summed_projects = self._sum_project_allocations(self.data)
-        self.data = self._apply_subsidy(data_summed_projects, self.subsidy_amount)
+        ]
+        self.data = self._sum_project_allocations(self.data)
 
     def _sum_project_allocations(self, dataframe):
         """A project may have multiple allocations, and therefore multiple rows
         in the raw invoices. For BU-Internal invoice, we only want 1 row for
         each unique project, summing up its allocations' costs"""
         project_list = dataframe["Project"].unique()
         data_no_dup = dataframe.drop_duplicates("Project", inplace=False)
-        sum_fields = [invoice.COST_FIELD, invoice.CREDIT_FIELD, invoice.BALANCE_FIELD]
+        sum_fields = [
+            invoice.COST_FIELD,
+            invoice.CREDIT_FIELD,
+            invoice.SUBSIDY_FIELD,
+            invoice.PI_BALANCE_FIELD,
+        ]
         for project in project_list:
             project_mask = dataframe["Project"] == project
             no_dup_project_mask = data_no_dup["Project"] == project
@@ -61,18 +52,3 @@ def _sum_project_allocations(self, dataframe):
             data_no_dup.loc[no_dup_project_mask, sum_fields] = sum_fields_sums
 
         return data_no_dup
-
-    def _apply_subsidy(self, dataframe, subsidy_amount):
-        pi_list = dataframe[invoice.PI_FIELD].unique()
-
-        for pi in pi_list:
-            pi_projects = dataframe[dataframe[invoice.PI_FIELD] == pi]
-            self.apply_flat_discount(
-                dataframe,
-                pi_projects,
-                subsidy_amount,
-                invoice.SUBSIDY_FIELD,
-                invoice.BALANCE_FIELD,
-            )
-
-        return dataframe
diff --git a/process_report/invoices/discount_invoice.py b/process_report/invoices/discount_invoice.py
diff --git a/process_report/invoices/invoice.py b/process_report/invoices/invoice.py
@@ -37,6 +37,7 @@
 IS_BILLABLE_FIELD = "Is Billable"
 MISSING_PI_FIELD = "Missing PI"
 PI_BALANCE_FIELD = "PI Balance"
+PROJECT_NAME_FIELD = "Project"
 ###
 
 

diff --git a/process_report/process_report.py b/process_report/process_report.py
@@ -21,6 +21,7 @@
     lenovo_processor,
     validate_billable_pi_processor,
     new_pi_credit_processor,
+    bu_subsidy_processor,
 )
 
 ### PI file field names
@@ -242,7 +243,12 @@ def main():
     )
     new_pi_credit_proc.process()
 
-    processed_data = new_pi_credit_proc.data
+    bu_subsidy_proc = bu_subsidy_processor.BUSubsidyProcessor(
+        "", invoice_month, new_pi_credit_proc.data.copy(), args.BU_subsidy_amount
+    )
+    bu_subsidy_proc.process()
+
+    processed_data = bu_subsidy_proc.data
 
     ### Initialize invoices
 
@@ -280,7 +286,6 @@ def main():
         name=args.BU_invoice_file,
         invoice_month=invoice_month,
         data=processed_data.copy(),
-        subsidy_amount=args.BU_subsidy_amount,
     )
 
     pi_inv = pi_specific_invoice.PIInvoice(

diff --git a/process_report/processors/bu_subsidy_processor.py b/process_report/processors/bu_subsidy_processor.py
@@ -7,6 +7,8 @@
 
 @dataclass
 class BUSubsidyProcessor(discount_processor.DiscountProcessor):
+    IS_DISCOUNT_BY_NERC = False
+
     subsidy_amount: int
 
     def _prepare(self):
@@ -17,43 +19,35 @@ def get_project(row):
             else:
                 return project_alloc[: project_alloc.rfind("-")]
 
-        self.data = self.data[
-            self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD]
-        ]
-        self.data = self.data[
-            self.data[invoice.INSTITUTION_FIELD] == "Boston University"
-        ].copy()
-        self.data["Project"] = self.data.apply(get_project, axis=1)
+        self.data[invoice.PROJECT_NAME_FIELD] = self.data.apply(get_project, axis=1)
         self.data[invoice.SUBSIDY_FIELD] = Decimal(0)
 
     def _process(self):
-        data_summed_projects = self._sum_project_allocations(self.data)
-        self.data = self._apply_subsidy(data_summed_projects, self.subsidy_amount)
+        self.data = self._apply_subsidy(self.data, self.subsidy_amount)
 
-    def _sum_project_allocations(self, dataframe):
-        """A project may have multiple allocations, and therefore multiple rows
-        in the raw invoices. For BU-Internal invoice, we only want 1 row for
-        each unique project, summing up its allocations' costs"""
-        project_list = dataframe["Project"].unique()
-        data_no_dup = dataframe.drop_duplicates("Project", inplace=False)
-        sum_fields = [invoice.COST_FIELD, invoice.CREDIT_FIELD, invoice.BALANCE_FIELD]
-        for project in project_list:
-            project_mask = dataframe["Project"] == project
-            no_dup_project_mask = data_no_dup["Project"] == project
-
-            sum_fields_sums = dataframe[project_mask][sum_fields].sum().values
-            data_no_dup.loc[no_dup_project_mask, sum_fields] = sum_fields_sums
+    @staticmethod
+    def _get_subsidy_eligible_projects(data):
+        filtered_data = data[
+            data[invoice.IS_BILLABLE_FIELD] & ~data[invoice.MISSING_PI_FIELD]
+        ]
+        filtered_data = filtered_data[
+            filtered_data[invoice.INSTITUTION_FIELD] == "Boston University"
+        ].copy()
 
-        return data_no_dup
+        return filtered_data
 
     def _apply_subsidy(self, dataframe, subsidy_amount):
-        pi_list = dataframe[invoice.PI_FIELD].unique()
+        subsidy_eligible_projects = self._get_subsidy_eligible_projects(dataframe)
+        pi_list = subsidy_eligible_projects[invoice.PI_FIELD].unique()
 
         for pi in pi_list:
-            pi_projects = dataframe[dataframe[invoice.PI_FIELD] == pi]
+            pi_projects = subsidy_eligible_projects[
+                subsidy_eligible_projects[invoice.PI_FIELD] == pi
+            ]
             self.apply_flat_discount(
                 dataframe,
                 pi_projects,
+                invoice.PI_BALANCE_FIELD,
                 subsidy_amount,
                 invoice.SUBSIDY_FIELD,
                 invoice.BALANCE_FIELD,