From 8fc326359f90962b35932dba35c8c7464abe1675 Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Tue, 17 Sep 2024 16:50:05 -0700
Subject: [PATCH] test: address Pandas warnings in workbook annotation

Resolve Pandas warnings in the `test_annotate_workbook` module to
prevent unexpected behavior, specifically:

- Correct chained assignment issues by modifying a copy of the row
then reassigning.
- Specify data types for columns in the annotation workbook to avoid
type-related warnings.
---
 src/spinneret/annotator.py | 43 +++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py
index 1b9c3a2..5adb8a0 100644
--- a/src/spinneret/annotator.py
+++ b/src/spinneret/annotator.py
@@ -123,19 +123,20 @@ def annotate_workbook(workbook_path: str, output_path: str) -> None:
     print(f"Annotating workbook {workbook_path}")
 
     # Load the workbook and EML for processing
-    wb = pd.read_csv(workbook_path, sep="\t", encoding="utf-8")
+    wb = pd.read_csv(workbook_path, sep="\t", encoding="utf-8", dtype=str)
 
     # Iterate over workbook rows and annotate
     wb_additional_rows = pd.DataFrame(columns=wb.columns)
     for index, row in wb.iterrows():
 
-        # Adding standard predicates based on the subject element name
-        if row["element"] == "dataset":
-            wb.loc[index, "predicate"] = "is about"
-            wb.loc[index, "predicate_id"] = "http://purl.obolibrary.org/obo/IAO_0000136"
-        elif row["element"] == "attribute":
-            wb.loc[index, "predicate"] = "contains measurements of type"
-            wb.loc[index, "predicate_id"] = (
+        # Operate on a copy of the row to avoid warnings
+        modified_row = row.copy()
+        if modified_row["element"] == "dataset":
+            modified_row["predicate"] = "is about"
+            modified_row["predicate_id"] = "http://purl.obolibrary.org/obo/IAO_0000136"
+        elif modified_row["element"] == "attribute":
+            modified_row["predicate"] = "contains measurements of type"
+            modified_row["predicate_id"] = (
                 "http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType"
             )
 
@@ -160,16 +161,28 @@ def annotate_workbook(workbook_path: str, output_path: str) -> None:
         # Add annotations to the workbook. Add first annotation to row then the
         # remainder to a separate data frame to be appended at the end.
         if annotation:
-            wb.loc[index, "object"] = annotation[0]["label"]
-            wb.loc[index, "object_id"] = annotation[0]["uri"]
-            wb.loc[index, "author"] = "BioPortal Annotator"
-            wb.loc[index, "date"] = pd.Timestamp.now()
+            modified_row["object"] = annotation[0]["label"]
+            modified_row["object_id"] = annotation[0]["uri"]
+            modified_row["author"] = "BioPortal Annotator"
+            modified_row["date"] = pd.Timestamp.now()
+            wb.loc[index] = modified_row  # Update the row in the workbook
         if len(annotation) > 1:
             for item in annotation[1:]:
                 # Create row
-                new_row = wb.loc[index]
-                new_row.loc["object"] = item["label"]
-                new_row.loc["object_id"] = item["uri"]
+                new_row = row.copy()
+                if new_row["element"] == "dataset":
+                    new_row["predicate"] = "is about"
+                    new_row["predicate_id"] = (
+                        "http://purl.obolibrary.org/obo/IAO_0000136"
+                    )
+                elif new_row["element"] == "attribute":
+                    new_row["predicate"] = "contains measurements of type"
+                    new_row["predicate_id"] = (
+                        "http://ecoinformatics.org/oboe/oboe.1.2/"
+                        "oboe-core.owl#containsMeasurementsOfType"
+                    )
+                new_row["object"] = item["label"]
+                new_row["object_id"] = item["uri"]
                 new_row["author"] = "BioPortal Annotator"
                 new_row["date"] = pd.Timestamp.now()
                 # Append row to additional rows df