added colormap, create equation table

Gossty · Gossty · commit d5996f14654c · 2025-01-07T11:09:05.000-08:00
diff --git a/qiita_db/meta_util.py b/qiita_db/meta_util.py
@@ -593,7 +593,7 @@ def update_resource_allocation_redis(active=True):
                 if len(df) == 0:
                     continue
 
-                fig, axs = resource_allocation_plot(df, cname, sname, col_name)
+                fig, axs = resource_allocation_plot(df, col_name)
                 titles = [0, 0]
                 images = [0, 0]
 
@@ -605,21 +605,19 @@ def update_resource_allocation_redis(active=True):
                     # only time
                     new_fig = plt.figure()
                     new_ax = new_fig.add_subplot(111)
-
-                    scatter_data = ax.collections[0]
-                    new_ax.scatter(scatter_data.get_offsets()[:, 0],
-                                   scatter_data.get_offsets()[:, 1],
-                                   s=scatter_data.get_sizes(), label="data")
-
                     line = ax.lines[0]
                     new_ax.plot(line.get_xdata(), line.get_ydata(),
                                 linewidth=1, color='orange')
-
-                    if len(ax.collections) > 1:
-                        failure_data = ax.collections[1]
-                        new_ax.scatter(failure_data.get_offsets()[:, 0],
-                                       failure_data.get_offsets()[:, 1],
-                                       color='red', s=3, label="failures")
+                    
+                    handles, labels = ax.get_legend_handles_labels()
+                    for handle, label, scatter_data in zip(handles,
+                                                           labels,
+                                                           ax.collections):
+                        color = handle.get_facecolor()
+                        new_ax.scatter(scatter_data.get_offsets()[:, 0],
+                                       scatter_data.get_offsets()[:, 1],
+                                       s=scatter_data.get_sizes(), label=label,
+                                       color=color)
 
                     new_ax.set_xscale('log')
                     new_ax.set_yscale('log')
diff --git a/qiita_db/support_files/patches/93.sql b/qiita_db/support_files/patches/93.sql
@@ -62,3 +62,11 @@ CREATE INDEX IF NOT EXISTS processing_job_command_parameters_payload ON qiita.pr
 -- Addding contraints for the slurm_reservation column
 ALTER TABLE qiita.analysis DROP CONSTRAINT IF EXISTS analysis_slurm_reservation_valid_chars;
 ALTER TABLE qiita.analysis ADD CONSTRAINT analysis_slurm_reservation_valid_chars CHECK ( slurm_reservation ~ '^[a-zA-Z0-9_]*$' );
+
+-- Jan 7, 2025
+-- Adding a table for formulas for resource allocations
+CREATE TABLE qiita.allocation_equations (
+  equation_id     SERIAL PRIMARY KEY,
+  equation_name   TEXT NOT NULL,
+  expression      TEXT NOT NULL
+ );
diff --git a/qiita_db/test/test_util.py b/qiita_db/test/test_util.py
@@ -1327,8 +1327,7 @@ def setUp(self):
 
     def test_plot_return(self):
         # check the plot returns correct objects
-        fig1, axs1 = qdb.util.resource_allocation_plot(
-            self.df, self.cname, self.sname, self.col_name)
+        fig1, axs1 = qdb.util.resource_allocation_plot(self.df, self.col_name)
         self.assertIsInstance(
             fig1, Figure,
             "Returned object fig1 is not a Matplotlib Figure")
@@ -1345,13 +1344,12 @@ def test_minimize_const(self):
         fig, axs = plt.subplots(ncols=2, figsize=(10, 4), sharey=False)
 
         bm, options = qdb.util._resource_allocation_plot_helper(
-            self.df, axs[0], self.cname, self.sname, 'MaxRSSRaw',
-            qdb.util.MODELS_MEM, self.col_name)
+            self.df, axs[0], 'MaxRSSRaw', qdb.util.MODELS_MEM, self.col_name)
         # check that the algorithm chooses correct model for MaxRSSRaw and
         # has 0 failures
         k, a, b = options.x
-        failures_df = qdb.util._resource_allocation_failures(
-            self.df, k, a, b, bm, self.col_name, 'MaxRSSRaw')
+        failures_df = qdb.util._resource_allocation_success_failures(
+            self.df, k, a, b, bm, self.col_name, 'MaxRSSRaw')[-1]
         failures = failures_df.shape[0]
         self.assertEqual(bm, qdb.util.mem_model3,
                          msg=f"""Best memory model
@@ -1367,11 +1365,10 @@ def test_minimize_const(self):
         # check that the algorithm chooses correct model for ElapsedRaw and
         # has 1 failure
         bm, options = qdb.util._resource_allocation_plot_helper(
-            self.df, axs[1], self.cname, self.sname, 'ElapsedRaw',
-            qdb.util.MODELS_TIME, self.col_name)
+            self.df, axs[1], 'ElapsedRaw', qdb.util.MODELS_TIME, self.col_name)
         k, a, b = options.x
-        failures_df = qdb.util._resource_allocation_failures(
-            self.df, k, a, b, bm, self.col_name, 'ElapsedRaw')
+        failures_df = qdb.util._resource_allocation_success_failures(
+            self.df, k, a, b, bm, self.col_name, 'ElapsedRaw')[-1]
         failures = failures_df.shape[0]
 
         self.assertEqual(bm, qdb.util.time_model1,
diff --git a/qiita_db/util.py b/qiita_db/util.py
@@ -74,6 +74,7 @@
 from email.mime.text import MIMEText
 
 import matplotlib.pyplot as plt
+from matplotlib import colormaps
 import numpy as np
 import pandas as pd
 from io import StringIO
@@ -2363,17 +2364,13 @@ def send_email(to, subject, body):
         smtp.close()
 
 
-def resource_allocation_plot(df, cname, sname, col_name):
+def resource_allocation_plot(df, col_name):
     """Builds resource allocation plot for given filename and jobs
 
     Parameters
     ----------
     file : str, required
         Builds plot for the specified file name. Usually provided as tsv.gz
-    cname: str, required
-        Specified job type
-    sname: str, required
-        Specified job sub type.
     col_name: str, required
         Specifies x axis for the graph
 
@@ -2392,12 +2389,12 @@ def resource_allocation_plot(df, cname, sname, col_name):
     ax = axs[0]
     # models for memory
     _resource_allocation_plot_helper(
-        df, ax, cname, sname, "MaxRSSRaw", MODELS_MEM, col_name)
+        df, ax, "MaxRSSRaw",  MODELS_MEM, col_name)
 
     ax = axs[1]
     # models for time
     _resource_allocation_plot_helper(
-        df, ax, cname, sname, "ElapsedRaw", MODELS_TIME, col_name)
+        df, ax, "ElapsedRaw",  MODELS_TIME, col_name)
 
     return fig, axs
 
@@ -2442,7 +2439,7 @@ def retrieve_resource_data(cname, sname, version, columns):
 
 
 def _resource_allocation_plot_helper(
-        df, ax, cname, sname, curr, models, col_name):
+        df, ax, curr, models, col_name):
     """Helper function for resource allocation plot. Builds plot for MaxRSSRaw
     and ElapsedRaw
 
@@ -2459,14 +2456,14 @@ def _resource_allocation_plot_helper(
     col_name: str, required
         Specifies x axis for the graph
     curr: str, required
-        Either MaxRSSRaw or ElapsedRaw
+        Either MaxRSSRaw or ElapsedRaw (y axis)
     models: list, required
         List of functions that will be used for visualization
 
     """
 
     x_data, y_data = df[col_name], df[curr]
-    ax.scatter(x_data, y_data, s=2, label="data")
+    # ax.scatter(x_data, y_data, s=2, label="data")
     d = dict()
     for index, row in df.iterrows():
         x_value = row[col_name]
@@ -2518,13 +2515,21 @@ def _resource_allocation_plot_helper(
         str(timedelta(seconds=round(cmin_value, 2))).rstrip('0').rstrip('.')
 
     x_plot = np.array(df[col_name])
-    failures_df = _resource_allocation_failures(
+    success_df, failures_df = _resource_allocation_success_failures(
         df, k, a, b, best_model, col_name, curr)
     failures = failures_df.shape[0]
-
     ax.scatter(failures_df[col_name], failures_df[curr], color='red', s=3,
                label="failures")
-
+    success_df['node_name'] = success_df['node_name'].fillna('unknown')
+    slurm_hosts = set(success_df['node_name'].tolist())
+    cmap = colormaps.get_cmap('Accent').resampled(len(slurm_hosts))
+    colors = [cmap(
+              i / (len(slurm_hosts) - 1)) for i in range(len(slurm_hosts))]
+
+    for i, host in enumerate(slurm_hosts):
+        host_df = success_df[success_df['node_name'] == host]
+        ax.scatter(host_df[col_name], host_df[curr], color=colors[i], s=3,
+                   label=host)
     ax.set_title(
                  f'k||a||b: {k}||{a}||{b}\n'
                  f'model: {get_model_name(best_model)}\n'
@@ -2590,8 +2595,10 @@ def _resource_allocation_calculate(
             options = minimize(_resource_allocation_custom_loss, init,
                                args=(x, y, model, middle))
             k, a, b = options.x
-            failures_df = _resource_allocation_failures(
-                df, k, a, b, model, col_name, type_)
+            # important: here we take the 2nd (last) value of tuple since
+            # the helper function returns success, then failures.
+            failures_df = _resource_allocation_success_failures(
+                df, k, a, b, model, col_name, type_)[-1]
             y_plot = model(x, k, a, b)
             if not any(y_plot):
                 continue
@@ -2676,9 +2683,9 @@ def _resource_allocation_custom_loss(params, x, y, model, p):
     return np.mean(weighted_residuals)
 
 
-def _resource_allocation_failures(df, k, a, b, model, col_name, type_):
+def _resource_allocation_success_failures(df, k, a, b, model, col_name, type_):
     """Helper function for resource allocation plot. Creates a dataframe with
-    failures.
+    successes and failures given current model.
 
     Parameters
     ----------
@@ -2699,14 +2706,19 @@ def _resource_allocation_failures(df, k, a, b, model, col_name, type_):
 
     Returns
     ----------
-    pandas.Dataframe
-        Dataframe containing failures for current type.
+    tuple with:
+        pandas.Dataframe
+            Dataframe containing successes for current type.
+        pandas.Dataframe
+            Dataframe containing failures for current type.
     """
 
     x_plot = np.array(df[col_name])
     df[f'c{type_}'] = model(x_plot, k, a, b)
+    success_df = df[df[type_] <= df[f'c{type_}']]
     failures_df = df[df[type_] > df[f'c{type_}']]
-    return failures_df
+    
+    return (success_df, failures_df)
 
 
 def MaxRSS_helper(x):
diff --git a/qiita_pet/templates/resources.html b/qiita_pet/templates/resources.html
@@ -114,19 +114,29 @@ <h3>Generated on: {{time}} </h3>
         toggleDataVisibility(true);
     {% end %}
 
-  const commandsConst = JSON.parse(`{% raw commands %}`);
-  const softwareSelect = document.getElementById('software');
-  const versionSelect = document.getElementById('version');
-  const commandSelect = document.getElementById('command');
-
-  // Populate software options
-  for (const software in commandsConst) {
-      const option = document.createElement('option');
-      option.value = software;
-      option.textContent = software;
-      softwareSelect.appendChild(option);
-  }
+    const commandsConst = JSON.parse(`{% raw commands %}`);
+    const softwareSelect = document.getElementById('software');
+    const versionSelect = document.getElementById('version');
+    const commandSelect = document.getElementById('command');
+
+    // Populate software options
+    for (const software in commandsConst) {
+        const option = document.createElement('option');
+        option.value = software;
+        option.textContent = software;
+        softwareSelect.appendChild(option);
+    }
 
+    // If there's only one software option, select it automatically
+    function autoSelectIfSingle(selectElem) {
+        const realOptions = Array.from(selectElem.options).filter(opt => opt.value !== "");
+        if (realOptions.length === 1) {
+            selectElem.value = realOptions[0].value;
+            // Trigger the change event to populate next select
+            const event = new Event('change', { bubbles: true });
+            selectElem.dispatchEvent(event);
+        }
+    }
 
   function populateVersions(software) {
       versionSelect.innerHTML = '<option value="">Select Version</option>';
@@ -140,9 +150,10 @@ <h3>Generated on: {{time}} </h3>
               versionSelect.appendChild(option);
           }
       }
+      // Auto-select if only one version available
+      autoSelectIfSingle(versionSelect);
   }
 
-
   function populateCommands(software, version) {
       commandSelect.innerHTML = '<option value="">Select Command</option>';
 
@@ -154,6 +165,9 @@ <h3>Generated on: {{time}} </h3>
               commandSelect.appendChild(option);
           });
       }
+
+      // Auto-select if only one command available
+      autoSelectIfSingle(commandSelect);
   }
 
   function sendPostRequest(software, version, command) {
@@ -224,8 +238,7 @@ <h3>Generated on: {{time}} </h3>
             }
 
             bootstrapAlert("Data updated successfully", "success", 2200);
-        }
-        else if (response.status === "no_data") {
+        } else if (response.status === "no_data") {
             toggleDataVisibility(false);
             $('#default-message').html('<h3>No data available for the selected options.</h3>');
             bootstrapAlert("No data available", "info", 2200);
@@ -264,5 +277,9 @@ <h3>Generated on: {{time}} </h3>
           sendPostRequest(selectedSoftware, selectedVersion, selectedCommand);
       }
   });
+
+    // Attempt auto-select after initial population of software
+    autoSelectIfSingle(softwareSelect);
+
 </script>
 {% end %}