Skip to content

Commit 2cb3f37

Browse files
committed
Updates to @antgonza comments
1 parent 82c95e1 commit 2cb3f37

File tree

3 files changed

+62
-116
lines changed

3 files changed

+62
-116
lines changed

notebooks/resource-allocation/upload_df.py

Lines changed: 0 additions & 72 deletions
This file was deleted.

qiita_db/test/test_util.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,7 +1343,7 @@ def test_minimize_const(self):
13431343
self.df[self.col_name] = self.df.samples * self.df['columns']
13441344
fig, axs = plt.subplots(ncols=2, figsize=(10, 4), sharey=False)
13451345

1346-
mem_models, time_models = qdb.util._retrieve_equations()
1346+
mem_models, time_models = qdb.util.retrieve_equations()
13471347
bm_name, bm, options = qdb.util._resource_allocation_plot_helper(
13481348
self.df, axs[0], 'MaxRSSRaw', mem_models, self.col_name)
13491349
# check that the algorithm chooses correct model for MaxRSSRaw and
@@ -1357,7 +1357,7 @@ def test_minimize_const(self):
13571357
msg=f"""Best memory model
13581358
doesn't match
13591359
{bm_name} != 'mem_model4'""")
1360-
self.assertEqual(bm, mem_models['mem_model4'],
1360+
self.assertEqual(bm, mem_models['mem_model4']['equation'],
13611361
msg=f"""Best memory model
13621362
doesn't match
13631363
Coefficients:{k} {a} {b}
@@ -1377,7 +1377,7 @@ def test_minimize_const(self):
13771377
doesn't match
13781378
{bm_name} != 'time_model4'""")
13791379

1380-
self.assertEqual(bm, time_models[bm_name],
1380+
self.assertEqual(bm, time_models[bm_name]['equation'],
13811381
msg=f"""Best time model
13821382
doesn't match
13831383
Coefficients:{k} {a} {b}

qiita_db/util.py

Lines changed: 59 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -82,27 +82,6 @@
8282
from scipy.optimize import minimize
8383

8484

85-
def get_model_name(model):
86-
if model == 'mem_model1':
87-
return "k * log(x) + x * a + b"
88-
elif model == 'mem_model2':
89-
return "k * log(x) + b * log(x)^2 + a"
90-
elif model == 'mem_model3':
91-
return "k * log(x) + b * log(x)^2 + a * log(x)^3"
92-
elif model == 'mem_model4':
93-
return "k * log(x) + b * log(x)^2 + a * log(x)^2.5"
94-
elif model == 'time_model1':
95-
return "a + b + log(x) * k"
96-
elif model == 'time_model2':
97-
return "a + b * x + log(x) * k"
98-
elif model == 'time_model3':
99-
return "a + b * log(x)^2 + log(x) * k"
100-
elif model == 'time_model4':
101-
return "a * log(x)^3 + b * log(x)^2 + log(x) * k"
102-
else:
103-
return "Unknown model"
104-
105-
10685
def scrub_data(s):
10786
r"""Scrubs data fields of characters not allowed by PostgreSQL
10887
@@ -2369,7 +2348,7 @@ def resource_allocation_plot(df, col_name):
23692348
fig, axs = plt.subplots(ncols=2, figsize=(10, 4), sharey=False)
23702349

23712350
ax = axs[0]
2372-
mem_models, time_models = _retrieve_equations()
2351+
mem_models, time_models = retrieve_equations()
23732352

23742353
# models for memory
23752354
_resource_allocation_plot_helper(
@@ -2382,9 +2361,9 @@ def resource_allocation_plot(df, col_name):
23822361
return fig, axs
23832362

23842363

2385-
def _retrieve_equations():
2364+
def retrieve_equations():
23862365
'''
2387-
Helepr function for resource_allocation_plot.
2366+
Helper function for resource_allocation_plot.
23882367
Retrieves equations from db. Creates dictionary for memory and time models.
23892368
23902369
Returns
@@ -2397,16 +2376,40 @@ def _retrieve_equations():
23972376
'''
23982377
memory_models = {}
23992378
time_models = {}
2379+
res = []
24002380
with qdb.sql_connection.TRN:
24012381
sql = ''' SELECT * FROM qiita.allocation_equations; '''
24022382
qdb.sql_connection.TRN.add(sql)
24032383
res = qdb.sql_connection.TRN.execute_fetchindex()
2404-
for models in res:
2405-
if 'mem' in models[1]:
2406-
memory_models[models[1]] = lambda x, k, a, b: eval(models[2])
2407-
else:
2408-
time_models[models[1]] = lambda x, k, a, b: eval(models[2])
2409-
return (memory_models, time_models)
2384+
for models in res:
2385+
model_name = "Unknown model"
2386+
if models[1] == 'mem_model1':
2387+
model_name = "k * log(x) + x * a + b"
2388+
elif models[1] == 'mem_model2':
2389+
model_name = "k * log(x) + b * log(x)^2 + a"
2390+
elif models[1] == 'mem_model3':
2391+
model_name = "k * log(x) + b * log(x)^2 + a * log(x)^3"
2392+
elif models[1] == 'mem_model4':
2393+
model_name = "k * log(x) + b * log(x)^2 + a * log(x)^2.5"
2394+
elif models[1] == 'time_model1':
2395+
model_name = "a + b + log(x) * k"
2396+
elif models[1] == 'time_model2':
2397+
model_name = "a + b * x + log(x) * k"
2398+
elif models[1] == 'time_model3':
2399+
model_name = "a + b * log(x)^2 + log(x) * k"
2400+
elif models[1] == 'time_model4':
2401+
model_name = "a * log(x)^3 + b * log(x)^2 + log(x) * k"
2402+
if 'mem' in models[1]:
2403+
memory_models[models[1]] = {
2404+
"equation_name": model_name,
2405+
"equation": lambda x, k, a, b: eval(models[2])
2406+
}
2407+
else:
2408+
time_models[models[1]] = {
2409+
"equation_name": model_name,
2410+
"equation": lambda x, k, a, b: eval(models[2])
2411+
}
2412+
return (memory_models, time_models)
24102413

24112414

24122415
def retrieve_resource_data(cname, sname, version, columns):
@@ -2483,9 +2486,20 @@ def _resource_allocation_plot_helper(
24832486
Specifies x axis for the graph
24842487
curr: str, required
24852488
Either MaxRSSRaw or ElapsedRaw (y axis)
2486-
models: dictionary, required
2487-
Dictionary of functions that will be used for visualization
2489+
models: dictionary, required. Follows this structure
2490+
equation_name: string
2491+
Human readable representation of the equation
2492+
equation: Python lambda function
2493+
Lambda function representing equation to optimizse
24882494
2495+
Returns
2496+
-------
2497+
best_model_name: string
2498+
the name of the best model from the table
2499+
best_model: function
2500+
best fitting function for the current dictionary models
2501+
options: object
2502+
object containing constants for the best model (e.g. k, a, b in kx+b*a)
24892503
"""
24902504

24912505
x_data, y_data = df[col_name], df[curr]
@@ -2560,7 +2574,7 @@ def _resource_allocation_plot_helper(
25602574
label=host)
25612575
ax.set_title(
25622576
f'k||a||b: {k}||{a}||{b}\n'
2563-
f'model: {get_model_name(best_model_name)}\n'
2577+
f'model: {models[best_model_name]["equation_name"]}\n'
25642578
f'real: {mini} || {maxi}\n'
25652579
f'calculated: {cmin} || {cmax}\n'
25662580
f'failures: {failures}')
@@ -2583,8 +2597,11 @@ def _resource_allocation_calculate(
25832597
current type (e.g. MaxRSSRaw)
25842598
col_name: str, required
25852599
Specifies x axis for the graph
2586-
models: dictionary, required
2587-
Dictionary of functions that will be used for visualization
2600+
models: dictionary, required. Follows this structure
2601+
equation_name: string
2602+
Human readable representation of the equation
2603+
equation: Python lambda function
2604+
Lambda function representing equation to optimizse
25882605
depth: int, required
25892606
Maximum number of iterations in binary search
25902607
tolerance: int, required,
@@ -2607,6 +2624,7 @@ def _resource_allocation_calculate(
26072624
best_failures = np.inf
26082625
best_max = np.inf
26092626
for model_name, model in models.items():
2627+
model_equation = model['equation']
26102628
# start values for binary search, where sl is left, sr is right
26112629
# penalty weight must be positive & non-zero, hence, sl >= 1.
26122630
# the upper bound for error can be an arbitrary large number
@@ -2624,13 +2642,13 @@ def _resource_allocation_calculate(
26242642
while left < right and cnt < depth:
26252643
middle = (left + right) // 2
26262644
options = minimize(_resource_allocation_custom_loss, init,
2627-
args=(x, y, model, middle))
2645+
args=(x, y, model_equation, middle))
26282646
k, a, b = options.x
26292647
# important: here we take the 2nd (last) value of tuple since
26302648
# the helper function returns success, then failures.
26312649
failures_df = _resource_allocation_success_failures(
2632-
df, k, a, b, model, col_name, type_)[-1]
2633-
y_plot = model(x, k, a, b)
2650+
df, k, a, b, model_equation, col_name, type_)[-1]
2651+
y_plot = model_equation(x, k, a, b)
26342652
if not any(y_plot):
26352653
continue
26362654
cmax = max(y_plot)
@@ -2678,7 +2696,7 @@ def _resource_allocation_calculate(
26782696
best_failures = prev_failures
26792697
best_max = min_max
26802698
best_model_name = model_name
2681-
best_model = model
2699+
best_model = model_equation
26822700
best_result = res
26832701
return best_model_name, best_model, best_result
26842702

@@ -2695,8 +2713,8 @@ def _resource_allocation_custom_loss(params, x, y, model, p):
26952713
Represents x data for the function calculation
26962714
y: pandas.Series (pandas column), required
26972715
Represents y data for the function calculation
2698-
models: list, required
2699-
List of functions that will be used for visualization
2716+
model: Python function
2717+
Lambda function representing current equation
27002718
p: int, required
27012719
Penalty weight for custom loss function
27022720

0 commit comments

Comments
 (0)