7474from email .mime .text import MIMEText
7575
7676import matplotlib .pyplot as plt
77+ from matplotlib import colormaps
7778import numpy as np
7879import pandas as pd
7980from io import StringIO
@@ -2363,17 +2364,13 @@ def send_email(to, subject, body):
23632364 smtp .close ()
23642365
23652366
2366- def resource_allocation_plot (df , cname , sname , col_name ):
2367+ def resource_allocation_plot (df , col_name ):
23672368 """Builds resource allocation plot for given filename and jobs
23682369
23692370 Parameters
23702371 ----------
23712372 file : str, required
23722373 Builds plot for the specified file name. Usually provided as tsv.gz
2373- cname: str, required
2374- Specified job type
2375- sname: str, required
2376- Specified job sub type.
23772374 col_name: str, required
23782375 Specifies x axis for the graph
23792376
@@ -2392,12 +2389,12 @@ def resource_allocation_plot(df, cname, sname, col_name):
23922389 ax = axs [0 ]
23932390 # models for memory
23942391 _resource_allocation_plot_helper (
2395- df , ax , cname , sname , "MaxRSSRaw" , MODELS_MEM , col_name )
2392+ df , ax , "MaxRSSRaw" , MODELS_MEM , col_name )
23962393
23972394 ax = axs [1 ]
23982395 # models for time
23992396 _resource_allocation_plot_helper (
2400- df , ax , cname , sname , "ElapsedRaw" , MODELS_TIME , col_name )
2397+ df , ax , "ElapsedRaw" , MODELS_TIME , col_name )
24012398
24022399 return fig , axs
24032400
@@ -2442,7 +2439,7 @@ def retrieve_resource_data(cname, sname, version, columns):
24422439
24432440
24442441def _resource_allocation_plot_helper (
2445- df , ax , cname , sname , curr , models , col_name ):
2442+ df , ax , curr , models , col_name ):
24462443 """Helper function for resource allocation plot. Builds plot for MaxRSSRaw
24472444 and ElapsedRaw
24482445
@@ -2459,14 +2456,14 @@ def _resource_allocation_plot_helper(
24592456 col_name: str, required
24602457 Specifies x axis for the graph
24612458 curr: str, required
2462- Either MaxRSSRaw or ElapsedRaw
2459+ Either MaxRSSRaw or ElapsedRaw (y axis)
24632460 models: list, required
24642461 List of functions that will be used for visualization
24652462
24662463 """
24672464
24682465 x_data , y_data = df [col_name ], df [curr ]
2469- ax .scatter (x_data , y_data , s = 2 , label = "data" )
2466+ # ax.scatter(x_data, y_data, s=2, label="data")
24702467 d = dict ()
24712468 for index , row in df .iterrows ():
24722469 x_value = row [col_name ]
@@ -2518,13 +2515,21 @@ def _resource_allocation_plot_helper(
25182515 str (timedelta (seconds = round (cmin_value , 2 ))).rstrip ('0' ).rstrip ('.' )
25192516
25202517 x_plot = np .array (df [col_name ])
2521- failures_df = _resource_allocation_failures (
2518+ success_df , failures_df = _resource_allocation_success_failures (
25222519 df , k , a , b , best_model , col_name , curr )
25232520 failures = failures_df .shape [0 ]
2524-
25252521 ax .scatter (failures_df [col_name ], failures_df [curr ], color = 'red' , s = 3 ,
25262522 label = "failures" )
2527-
2523+ success_df ['node_name' ] = success_df ['node_name' ].fillna ('unknown' )
2524+ slurm_hosts = set (success_df ['node_name' ].tolist ())
2525+ cmap = colormaps .get_cmap ('Accent' ).resampled (len (slurm_hosts ))
2526+ colors = [cmap (
2527+ i / (len (slurm_hosts ) - 1 )) for i in range (len (slurm_hosts ))]
2528+
2529+ for i , host in enumerate (slurm_hosts ):
2530+ host_df = success_df [success_df ['node_name' ] == host ]
2531+ ax .scatter (host_df [col_name ], host_df [curr ], color = colors [i ], s = 3 ,
2532+ label = host )
25282533 ax .set_title (
25292534 f'k||a||b: { k } ||{ a } ||{ b } \n '
25302535 f'model: { get_model_name (best_model )} \n '
@@ -2590,8 +2595,10 @@ def _resource_allocation_calculate(
25902595 options = minimize (_resource_allocation_custom_loss , init ,
25912596 args = (x , y , model , middle ))
25922597 k , a , b = options .x
2593- failures_df = _resource_allocation_failures (
2594- df , k , a , b , model , col_name , type_ )
2598+ # important: here we take the 2nd (last) value of tuple since
2599+ # the helper function returns success, then failures.
2600+ failures_df = _resource_allocation_success_failures (
2601+ df , k , a , b , model , col_name , type_ )[- 1 ]
25952602 y_plot = model (x , k , a , b )
25962603 if not any (y_plot ):
25972604 continue
@@ -2676,9 +2683,9 @@ def _resource_allocation_custom_loss(params, x, y, model, p):
26762683 return np .mean (weighted_residuals )
26772684
26782685
2679- def _resource_allocation_failures (df , k , a , b , model , col_name , type_ ):
2686+ def _resource_allocation_success_failures (df , k , a , b , model , col_name , type_ ):
26802687 """Helper function for resource allocation plot. Creates a dataframe with
2681- failures.
2688+ successes and failures given current model .
26822689
26832690 Parameters
26842691 ----------
@@ -2699,14 +2706,19 @@ def _resource_allocation_failures(df, k, a, b, model, col_name, type_):
26992706
27002707 Returns
27012708 ----------
2702- pandas.Dataframe
2703- Dataframe containing failures for current type.
2709+ tuple with:
2710+ pandas.Dataframe
2711+ Dataframe containing successes for current type.
2712+ pandas.Dataframe
2713+ Dataframe containing failures for current type.
27042714 """
27052715
27062716 x_plot = np .array (df [col_name ])
27072717 df [f'c{ type_ } ' ] = model (x_plot , k , a , b )
2718+ success_df = df [df [type_ ] <= df [f'c{ type_ } ' ]]
27082719 failures_df = df [df [type_ ] > df [f'c{ type_ } ' ]]
2709- return failures_df
2720+
2721+ return (success_df , failures_df )
27102722
27112723
27122724def MaxRSS_helper (x ):
0 commit comments