NOAA-OWP · ZahraGhahremani · Jul 19, 2025 · Jul 19, 2025 · Aug 20, 2025 · Aug 20, 2025
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -1,6 +1,15 @@
 All notable changes to this project will be documented in this file.
 We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
 
+## v4.8.x.x - 2025-08-20 - [PR#1603]([https://github.com/NOAA-OWP/inundation-mapping/pull/1603])
+
+This PR fixes issue with box plot generation and introduces a new function to compare two FIM outputs.
+
+### Changes
+`tools/rating_curve_comparison.py` : changes as described above.
+
+<br/>
+
 ## v4.8.10.0 - 2025-07-30 - [PR#1561]([https://github.com/NOAA-OWP/inundation-mapping/pull/1554])
 
 Some minor tweaks that should help the performance of lofi. Also included are a handful of small bugfixes and some cleanup of the CLI defaults.

diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py
@@ -117,6 +117,9 @@ def generate_rating_curve_metrics(args):
     if not elev_table.empty:
         hydrotable = pd.DataFrame()
         for branch in elev_table.levpa_id.unique():
+            # Skip branch 0 (process only GMS branches)
+            # if branch == '0':
+            #     continue
             branch_elev_table = elev_table.loc[elev_table.levpa_id == branch].copy()
             # branch_elev_table = elev_table.loc[(elev_table.levpa_id == branch) & (elev_table.location_id.notnull())].copy()
             branch_hydrotable = pd.read_csv(
@@ -1135,11 +1138,30 @@ def create_static_gpkg(output_dir, output_gpkg, agg_recurr_stats_table, gages_gp
     sns.histplot(ax=ax[0, 0], y='nrmse', data=usgs_gages, binwidth=0.2, binrange=(0, 10))
     sns.countplot(ax=ax[1, 0], y='mean_abs_y_diff_ft', data=usgs_gages)
     sns.countplot(ax=ax[1, 1], y='mean_y_diff_ft', data=usgs_gages)
-    sns.boxplot(
-        ax=ax[0, 1], data=usgs_gages[['2', '5', '10', '25', '50', 'action', 'minor', 'moderate', 'major']]
-    )
-    ax[0, 1].set(ylim=(-12, 12))
-
+    # Define the full set of columns we want for the boxplot
+    boxplot_cols = ['2', '5', '10', '25', '50', 'action', 'minor', 'moderate', 'major']
+    # Find which of these columns exist in the df and have data
+    cols_to_plot = [col for col in boxplot_cols if col in usgs_gages.columns and usgs_gages[col].notna().any]
+    # Create the boxplot if there are cols with data to plot
+    data_is_plottable = False
+    if cols_to_plot:
+        if usgs_gages[cols_to_plot].notna().any().any():
+            data_is_plottable = True
+    if data_is_plottable:
+        sns.boxplot(ax=ax[0, 1], data=usgs_gages[cols_to_plot])
+        ax[0, 1].set(ylim=(-12, 12))
+    else:
+        logging.warning("Cannot create a boxplot because no data is available.")
+        ax[0, 1].text(
+            0.5,
+            0.5,
+            'No data for boxplot',
+            horizontalalignment='center',
+            verticalalignment='center',
+            transform=ax[0, 1].transAxes,
+        )
+        ax[0, 1].set_yticks([])
+        ax[0, 1].set_xticks([])
     fig.tight_layout()
     fig.savefig(join(output_dir, f'{output_gpkg}_summary_plots.png'.replace('.gpkg', '')))
 
@@ -1172,8 +1194,16 @@ def calculate_rc_diff(rc):
         .reset_index()
         .pivot(index='location_id', columns='recurr_interval', values='yhat_minus_y')
     )
+    # Define columns that are expected in the final output
+    expected_cols = ['2', '5', '10', '25', '50', 'action', 'minor', 'moderate', 'major']
+
+    # Check if the column exists. If not, add it and fill with NaN
+    # This prevents a KeyError if a HUC is missing certain recurrence intervals.
+    for col in expected_cols:
+        if col not in rc_unmelt.columns:
+            rc_unmelt[col] = np.nan
     # Reorder columns
-    rc_unmelt = rc_unmelt[['2', '5', '10', '25', '50', 'action', 'minor', 'moderate', 'major']]
+    rc_unmelt = rc_unmelt[expected_cols]
 
     return rc_unmelt
 
@@ -1213,8 +1243,9 @@ def evaluate_results(sierra_results=[], labels=[], save_location=''):
         df['_version'] = label
 
     # Combine all dataframes into one
-    all_results = sierra_results[0]
-    all_results = pd.concat([all_results, sierra_results[1:]])
+    # all_results = sierra_results[0]
+    # all_results = pd.concat([all_results, sierra_results[1:]])
+    all_results = pd.concat(sierra_results, ignore_index=True)
 
     # Melt results for boxplotting
     all_results_melted = all_results.melt(
@@ -1236,6 +1267,119 @@ def evaluate_results(sierra_results=[], labels=[], save_location=''):
     plt.savefig(save_location)
 
 
+def generate_version_comparison(old_stats_path, new_stats_path, output_path, old_label, new_label):
+    """
+    Generates a 2x2 comparison plots for two FIM versions.
+
+    Args:
+        old_stats_path (str): path to the statistics file for the old version.
+        new_stats_path (str): path to the statistics file for the new version.
+        output_path (str): path to save the image.
+        old_label (str): label for the old version (e.g., 'FIMv5')
+        new_label (str): label for the new version (e.g., 'FIMv6')
+    """
+
+    logging.info('Generating evaluation plots...')
+    try:
+        old_stats = pd.read_csv(old_stats_path, dtype={'location_id': str})
+        new_stats = pd.read_csv(new_stats_path, dtype={'location_id': str})
+    except FileNotFoundError as e:
+        logging.error(f"Error: {e}")
+        return
+    # Metrics to compare
+    metrics = ['nrmse', 'mean_abs_y_diff_ft', 'percent_bias']
+
+    comparison_df = pd.merge(
+        old_stats[['location_id'] + metrics],
+        new_stats[['location_id'] + metrics],
+        on='location_id',
+        suffixes=('_old', '_new'),
+    )
+    comparison_df = comparison_df.replace([float('inf'), -float('inf')], pd.NA)
+    comparison_df = comparison_df.dropna(subset=['nrmse_old', 'nrmse_new'])
+
+    logging.info(f"found {len(comparison_df)} common gages to compare.")
+    if len(comparison_df) == 0:
+        logging.warning('No common gages found between the two versions.')
+        return
+
+    fig, axes = plt.subplots(2, 2, figsize=(16, 14))
+    fig.suptitle(f"FIM Version Comparison: {old_label} vs. {new_label}", fontsize=16, y=0.95)
+    # Scatter plot for NRMSE
+    ax1 = axes[0, 0]
+    sns.scatterplot(data=comparison_df, x='nrmse_old', y='nrmse_new', alpha=0.6, ax=ax1, s=20)
+    max_val_nrmse = max(comparison_df['nrmse_old'].max(), comparison_df['nrmse_new'].max())
+    if np.isfinite((max_val_nrmse)):
+        ax1.plot([0, max_val_nrmse], [0, max_val_nrmse], color='red', linestyle='--')
+    ax1.set_title('NRMSE Comparison', fontsize=14)
+    ax1.set_xlabel(f"NRMSE - {old_label}")
+    ax1.set_ylabel(f"NRMSE - {new_label}")
+    ax1.grid(True)
+    # ax1.legend()
+    ax1.set_aspect('equal', adjustable='box')
+    max_lim = comparison_df['nrmse_new'].quantile(0.75) + 5
+    ax1.set_ylim(0, max_lim)
+    ax1.set_xlim(0, max_lim)
+    # Boxplot for NRMSE
+    ax2 = axes[0, 1]
+    nrmse_melted = comparison_df.melt(
+        id_vars='location_id', value_vars=['nrmse_old', 'nrmse_new'], var_name='version', value_name='nrmse'
+    )
+    nrmse_melted['version'] = nrmse_melted['version'].replace(
+        {'nrmse_old': old_label, 'nrmse_new': new_label}
+    )
+    sns.boxplot(data=nrmse_melted, x='version', y='nrmse', ax=ax2)
+    ax2.set_title('NRMSE Distribution', fontsize=14)
+    ax2.set_xlabel('')
+    ax2.set_ylabel('NRMSE')
+    ax2.grid(axis='y')
+    max_lim2 = comparison_df['nrmse_new'].quantile(0.75) + 10
+    ax2.set_ylim(0, max_lim2)
+
+    # Boxplot for mean absolute error
+    ax3 = axes[1, 0]
+    mae_melted = comparison_df.melt(
+        id_vars='location_id',
+        value_vars=['mean_abs_y_diff_ft_old', 'mean_abs_y_diff_ft_new'],
+        var_name='version',
+        value_name='mae',
+    )
+    mae_melted['version'] = mae_melted['version'].replace(
+        {'mean_abs_y_diff_ft_old': old_label, 'mean_abs_y_diff_ft_new': new_label}
+    )
+    sns.boxplot(data=mae_melted, x='version', y='mae', ax=ax3)
+    ax3.set_title('Mean Absolute Error Distribution', fontsize=14)
+    ax3.set_xlabel('')
+    ax3.set_ylabel('Mean Absolute Error (ft)')
+    ax3.grid(axis='y')
+    max_lim3 = mae_melted['mae'].quantile(0.75) + 10
+    ax3.set_ylim(0, max_lim3)
+
+    # Boxplot for percent bias
+    ax4 = axes[1, 1]
+    bias_melted = comparison_df.melt(
+        id_vars='location_id',
+        value_vars=['percent_bias_old', 'percent_bias_new'],
+        var_name='version',
+        value_name='bias',
+    )
+    bias_melted['version'] = bias_melted['version'].replace(
+        {'percent_bias_old': old_label, 'percent_bias_new': new_label}
+    )
+    sns.boxplot(data=bias_melted, x='version', y='bias', ax=ax4)
+    ax4.set_title('Percent Bias Distribution', fontsize=14)
+    ax4.set_xlabel('')
+    ax4.set_ylabel('Percent Bias (%)')
+    ax4.grid(axis='y')
+    max_lim4 = bias_melted['bias'].quantile(0.75) + 10
+    ax4.set_ylim(0, max_lim4)
+
+    # Save
+    plt.tight_layout(rect=[0, 0.3, 1, 0.93])
+    plt.savefig(output_path, dpi=150)
+    plt.close()
+
+
 if __name__ == '__main__':
 
     """
@@ -1247,6 +1391,15 @@ def evaluate_results(sierra_results=[], labels=[], save_location=''):
         -flows /data/inputs/rating_curve/nwm_recur_flows/
         -stages /data/inputs/usgs_gages/usgs_stage_discharge_cms_{date}.csv
         -j 40
+
+    ** Note: To generate the comparison plots, run the script on the first version. For the second vrsion, run it as follows:
+        -fim_dir data/previous_fim/hand_4_8_7_2/
+        -output_dir data/fim_performance/hand_4_8_7_2/rating_curve_comparison/
+        -gages /data/inputs/usgs_gages/usgs_rating_curves_{date}.csv
+        -flows /data/inputs/rating_curve/nwm_recur_flows/
+        -stages /data/inputs/usgs_gages/usgs_stage_discharge_cms_{date}.csv
+        -comp data/fim_performance/hand_4_5_8_0/rating_curve_comparison/agg_nwm_recurr_flow_elev_stats_location_id.csv "FIM5" "FIM6"
+        -j 40
     """
 
     parser = argparse.ArgumentParser(
@@ -1296,6 +1449,17 @@ def evaluate_results(sierra_results=[], labels=[], save_location=''):
         nargs=2,
         action='append',
     )
+    parser.add_argument(
+        '-comp',
+        '--compare_plots',
+        help='Create comparison plots. '
+        'Expects 3 arguments: 1. path to the statistics file of other version for comparison, '
+        '2. ta label for the other version, and 3. a label for the current version being run',
+        required=False,
+        nargs=3,
+        metavar=('other_csv_path', 'other_label', 'current_label'),
+    )
+
     parser.add_argument('-s', '--single-plot', help='Create single plots', action='store_true')
     args = vars(parser.parse_args())
 
@@ -1440,6 +1604,33 @@ def evaluate_results(sierra_results=[], labels=[], save_location=''):
             evaluate_results(
                 sierra_test_dfs, sierra_test_labels, join(output_dir, 'Sierra_Test_Eval_boxplot.png')
             )
+
+        # Generates comparison plots
+        if args['compare_plots']:
+            # The stats for the current run are aggregated in the output_dir
+            current_stats_filename = os.path.join(
+                output_dir,
+                f"agg_nwm_recurr_flow_elev_stats_{'_'.join(stat_groups) if stat_groups else 'location_id'}.csv",
+            )
+            # Unpach arguments
+            other_version_csv = args['compare_plots'][0]
+            other_version_label = args['compare_plots'][1]
+            current_version_label = args['compare_plots'][2]
+            # Define output path
+            plots_output_path = os.path.join(output_dir, 'FIM_Comparison_Stats.png')
+
+            if os.path.exists(current_stats_filename):
+                generate_version_comparison(
+                    old_stats_path=other_version_csv,
+                    new_stats_path=current_stats_filename,
+                    output_path=plots_output_path,
+                    old_label=other_version_label,
+                    new_label=current_version_label,
+                )
+            else:
+                logging.error(
+                    f"Could not find current stats file to generate comparison plots: {current_stats_filename}"
+                )
     except Exception:
         logging.info("-- Exception")
         print("")