Merge pull request #7 from ArtesiaWater/dev

Release 0.3.0
ArtesiaWater · Oct 25, 2022 · 4ff0483 · 4ff0483
2 parents 85bd553 + 3d810f1
commit 4ff0483
Show file tree

Hide file tree

Showing 6 changed files with 224 additions and 81 deletions.
diff --git a/traval/detector.py b/traval/detector.py
@@ -91,7 +91,7 @@ def _validate_input_series(series):
             dtype = series.dtypes
             name = series.name
         elif isinstance(series, pd.DataFrame):
-            dtype = series.dtypes[0]
+            dtype = series.dtypes.values[0]
             name = series.columns[0]
         else:
             raise TypeError(
@@ -458,6 +458,49 @@ def get_corrections_dataframe(self):
         df.columns = list(self.ruleset.rules.keys())
         return df
 
+    def get_corrections_comparison(self, truth=None):
+
+        if truth is None and self.truth is not None:
+            truth = self.truth
+        else:
+            raise ValueError("Supply a time series for 'truth'!")
+
+        comments_traval = self.get_comment_series()
+        comments_traval.name = "traval_comment"
+
+        mask_truth_corrections = truth.iloc[:, 0].isna()
+        comments_truth = truth.loc[mask_truth_corrections]
+
+        k = list(self.comparisons.keys())[-1]
+        comparison = self.comparisons[k].comparison_series()
+        translate = {
+            -1: "Value modified",
+            0: "Flagged in both",
+            1: "Only flagged in 'truth' series",
+            2: "Only flagged in 'traval' series",
+            -9999: "NaN in both"
+        }
+        comparison = comparison.apply(lambda v: translate[v])
+        comparison.name = "comparison_label"
+
+        raw_index = (comments_traval.index
+                     .union(comments_truth.index))
+
+        truth.columns = ["truth_series", "truth_comment"]
+
+        traval_series = self.get_final_result()
+        traval_series.name = "traval_series"
+
+        df = pd.concat([
+            self.series.loc[raw_index.intersection(self.series.index)],
+            traval_series.loc[raw_index.intersection(traval_series.index)],
+            comments_traval,
+            truth.loc[raw_index.intersection(truth.index)],
+            comparison.loc[raw_index.intersection(comparison.index)]
+        ], axis=1)
+
+        return df
+
     def plot_overview(self, mark_suspects=True, **kwargs):
         """Plot timeseries with flagged values per applied rule.
 

diff --git a/traval/params.py b/traval/params.py
@@ -162,10 +162,12 @@ def from_csv(cls, csvfile):
         for i, (v, t) in params.loc[:, ["value", "dtype"]].iterrows():
             if t == "float":
                 v = float(v)
-            if t == "int":
+            elif t == "int":
                 v = int(v)
-            if t == "str":
+            elif t == "str":
                 continue  # already str
+            elif t == "NoneType":
+                v = None
             params.loc[i, "value"] = v
         params.drop(columns=['dtype'], inplace=True)
         parameters, defaults = cls._split_df(params)

diff --git a/traval/plots.py b/traval/plots.py
@@ -140,7 +140,7 @@ def plot_series_comparison(self, mark_unique=True, mark_different=True,
         plot_labels = [i.get_label() for i in plot_handles]
         ax.legend(plot_handles, plot_labels, loc="best",
                   ncol=int(np.ceil(len(plot_handles) / 2.)))
-        ax.grid(b=True)
+        ax.grid(visible=True)
         fig.tight_layout()
         return ax
 
@@ -272,7 +272,7 @@ def plot_validation_result(self, ax=None):
         plot_labels = [i.get_label() for i in plot_handles]
         ax.legend(plot_handles, plot_labels, loc=(0, 1), markerscale=1.25,
                   ncol=len(plot_handles), frameon=False)
-        ax.grid(b=True)
+        ax.grid(visible=True)
         fig.tight_layout()
 
         return ax
@@ -336,7 +336,7 @@ def roc_plot(tpr, fpr, labels, colors=None, ax=None,
 
     ax.set_xlim(0, 1)
     ax.set_ylim(0, 1)
-    ax.grid(b=True)
+    ax.grid(visible=True)
     ax.legend(loc="lower right")
     ax.set_ylabel("True Positive Rate (sensitivity)")
     ax.set_xlabel("False Positive Rate (1-specificity)")
@@ -410,7 +410,7 @@ def det_plot(fpr, fnr, labels, ax=None, **kwargs):
     ax.set_yticks(tick_locations)
     ax.set_yticklabels(tick_labels)
     ax.set_ylim(-3, 3)
-    ax.grid(b=True)
+    ax.grid(visible=True)
 
     ax.set_title("detection error tradeoff plot")