Skip to content

Commit 43c3cf2

Browse files
authored
Merge pull request #244 from sunya-ch/ci
Update kepler-model-db link, serve model by spec, kepler-model-db integration CI, visualize power curve
2 parents 0f6f131 + bd812c2 commit 43c3cf2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+764
-199
lines changed

.github/workflows/integration-test.yml

+10-27
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ on:
2222
description: 'Kepler image tag'
2323
required: true
2424
type: string
25+
additional_opts:
26+
description: 'additional deployment opts'
27+
required: true
28+
type: string
2529

2630
env:
2731
BASE_IMAGE: ${{ inputs.image_repo }}/kepler_model_server_base:${{ inputs.image_tag }}
@@ -65,45 +69,24 @@ jobs:
6569
curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash
6670
chmod +x kustomize
6771
mv kustomize /usr/local/bin/
68-
- name: test deploying kepler with only estimator
69-
run: |
70-
make deploy
71-
make e2e-test
72-
make cleanup
73-
env:
74-
OPTS: "ESTIMATOR"
75-
- name: test deploying kepler with only server
76-
run: |
77-
make deploy
78-
make e2e-test
79-
make cleanup
80-
env:
81-
OPTS: "SERVER"
82-
- name: test deploying kepler with estimator and model server
83-
run: |
84-
make deploy
85-
make e2e-test
86-
make cleanup
87-
env:
88-
OPTS: "ESTIMATOR SERVER"
89-
- name: test deploying dummy kepler with only estimator
72+
- name: test deploying with only estimator
9073
run: |
9174
make deploy
9275
make e2e-test
9376
make cleanup
9477
env:
95-
OPTS: "ESTIMATOR TEST"
96-
- name: test deploying dummy kepler with only server
78+
OPTS: "ESTIMATOR${{ inputs.additional_opts }}"
79+
- name: test deploying with only server
9780
run: |
9881
make deploy
9982
make e2e-test
10083
make cleanup
10184
env:
102-
OPTS: "SERVER TEST"
103-
- name: test deploying dummy kepler with estimator and model server
85+
OPTS: "SERVER${{ inputs.additional_opts }}"
86+
- name: test deploying with estimator and model server
10487
run: |
10588
make deploy
10689
make e2e-test
10790
make cleanup
10891
env:
109-
OPTS: "ESTIMATOR SERVER TEST"
92+
OPTS: "ESTIMATOR SERVER${{ inputs.additional_opts }}"

.github/workflows/pr.yml

+27-2
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ jobs:
165165
image_tag: ${{ needs.check-branch.outputs.tag }}
166166
pipeline_name: std_v0.7
167167

168-
integration-test:
168+
integration-test-internal-only:
169169
needs: [check-secret, check-branch, check-change, base-image]
170170
if: always()
171171
uses: ./.github/workflows/integration-test.yml
@@ -174,4 +174,29 @@ jobs:
174174
docker_secret: ${{ needs.check-secret.outputs.docker-secret }}
175175
image_repo: ${{ vars.IMAGE_REPO || 'docker.io/library' }}
176176
image_tag: ${{ needs.check-branch.outputs.tag }}
177-
kepler_tag: release-0.7.7
177+
kepler_tag: release-0.7.7
178+
additional_opts: " TEST"
179+
180+
integration-test-with-exporter:
181+
needs: [check-secret, check-branch, check-change, base-image]
182+
if: always()
183+
uses: ./.github/workflows/integration-test.yml
184+
with:
185+
base_change: ${{ needs.check-change.outputs.base }}
186+
docker_secret: ${{ needs.check-secret.outputs.docker-secret }}
187+
image_repo: ${{ vars.IMAGE_REPO || 'docker.io/library' }}
188+
image_tag: ${{ needs.check-branch.outputs.tag }}
189+
kepler_tag: release-0.7.7
190+
additional_opts: ""
191+
192+
integration-test-with-exporter-and-db:
193+
needs: [check-secret, check-branch, check-change, base-image]
194+
if: always()
195+
uses: ./.github/workflows/integration-test.yml
196+
with:
197+
base_change: ${{ needs.check-change.outputs.base }}
198+
docker_secret: ${{ needs.check-secret.outputs.docker-secret }}
199+
image_repo: ${{ vars.IMAGE_REPO || 'docker.io/library' }}
200+
image_tag: ${{ needs.check-branch.outputs.tag }}
201+
kepler_tag: release-0.7.7
202+
additional_opts: " DB"

.github/workflows/unit-test.yml

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ jobs:
3232
run: make test-pipeline
3333
- name: Test model server
3434
run: make test-model-server
35+
timeout-minutes: 5
3536
- name: Test estimator
3637
run: make test-estimator
3738
timeout-minutes: 5

Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ test-estimator: run-estimator run-collector-client clean-estimator
4848

4949
# test estimator --> model-server
5050
run-model-server:
51-
$(CTR_CMD) run -d --platform linux/amd64 -e "MODEL_TOPURL=http://localhost:8110" -v ${MODEL_PATH}:/mnt/models -p 8100:8100 --name model-server $(TEST_IMAGE) /bin/bash -c "python3.8 tests/http_server.py & sleep 5 && python3.8 src/server/model_server.py"
52-
sleep 5
51+
$(CTR_CMD) run -d --platform linux/amd64 -e "MODEL_TOPURL=http://localhost:8110" -v ${MODEL_PATH}:/mnt/models -p 8100:8100 --name model-server $(TEST_IMAGE) /bin/bash -c "python3.8 tests/http_server.py & sleep 10 && python3.8 src/server/model_server.py"
52+
while ! docker logs model-server | grep -q Serving; do echo "waiting for model-server to serve"; sleep 5; done
5353

5454
run-estimator-client:
5555
$(CTR_CMD) exec model-server /bin/bash -c "python3.8 -u ./tests/estimator_model_request_test.py"

README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,9 @@ Compatible version: python 3.8
7575
|Test case|Command|
7676
|---|---|
7777
|[Training pipeline](./tests/README.md#pipeline)|python -u ./tests/pipeline_test.py|
78-
|[Model server](./tests/README.md#estimator-model-request-to-model-server)|Terminal 1: python src/server/model_server.py <br>Terminal 2: python -u tests/estimator_model_request_test.py|
78+
|[Model server](./tests/README.md#estimator-model-request-to-model-server)|Terminal 1: export MODEL_PATH=$(pwd)/tests/models;python src/server/model_server.py <br>Terminal 2: python -u tests/estimator_model_request_test.py|
7979
|[Estimator](./tests/README.md#estimator-power-request-from-collector)|Terminal 1: python src/estimate/estimator.py<br>Terminal 2: python -u tests/estimator_power_request_test.py|
80+
|Estimator with Model Server|Terminal 1: export MODEL_PATH=$(pwd)/tests/models;python src/server/model_server.py <br>Terminal 2: export MODEL_SERVER_URL=http://localhost:8100;export MODEL_SERVER_ENABLE=true;python -u src/estimate/estimator.py<br>Terminal 3: python -u tests/estimator_power_request_test.py
8081
|[Offline Trainer](./tests/README.md#offline-trainer)|Terminal 1: python src/train/offline_trainer.py<br>Terminal 2: python -u tests/offline_trainer_test.py|
8182

8283
For more test information, check [here](./tests/).

cmd/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ Use kepler model server function as a standalone docker container.
8585
5.3. Plot prediction result on specific trainer model and feature group (`estimate`)
8686

8787
```bash
88-
docker run --rm -v "$(pwd)/data":/data quay.io/sustainable_computing_io/kepler_model_server:v0.7 plot --target-data estimate -i output_kepler_query --model-name GradientBoostingRegressorTrainer_1 --feature-group BPFOnly
88+
docker run --rm -v "$(pwd)/data":/data quay.io/sustainable_computing_io/kepler_model_server:v0.7 plot --target-data estimate -i output_kepler_query --model-name GradientBoostingRegressorTrainer_0 --feature-group BPFOnly
8989
```
9090

9191
5.4. Plot prediction error comparison among feature group and trainer model (`error`)

cmd/cmd_plot.py

+111
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@
99
from util.prom_types import TIMESTAMP_COL
1010
from util import PowerSourceMap
1111

12+
from util.train_types import FeatureGroup, ModelOutputType, weight_support_trainers
13+
from util.loader import load_metadata, load_scaler, get_model_group_path
14+
from train.profiler.node_type_index import NodeTypeIndexCollection
15+
from estimate import load_model
16+
markers = ['o', 's', '^', 'v', '<', '>', 'p', 'P', '*', 'x', '+', '|', '_']
1217

1318
def ts_plot(data, cols, title, output_folder, name, labels=None, subtitles=None, ylabel=None):
1419
plot_height = 3
@@ -147,4 +152,110 @@ def metadata_plot(args, energy_source, metadata_df, output_folder, name):
147152
plt.legend(frameon=False)
148153
filename = os.path.join(output_folder, name + ".png")
149154
fig.savefig(filename)
155+
plt.close()
156+
157+
def power_curve_plot(args, data_path, energy_source, output_folder, name):
158+
model_toppath = data_path
159+
pipeline_name = args.pipeline_name
160+
pipeline_path = os.path.join(model_toppath, pipeline_name)
161+
node_collection = NodeTypeIndexCollection(pipeline_path)
162+
all_node_types = sorted(list(node_collection.node_type_index.keys()))
163+
output_type = ModelOutputType[args.output_type]
164+
models, _, cpu_ms_max = _load_all_models(model_toppath=model_toppath, output_type=output_type, name=pipeline_name, node_types=all_node_types, energy_source=energy_source)
165+
if len(models) > 0:
166+
_plot_models(models, cpu_ms_max, energy_source, output_folder, name)
167+
168+
def _get_model(model_toppath, trainer, model_node_type, output_type, name, energy_source):
169+
feature_group = FeatureGroup.BPFOnly
170+
model_name = "{}_{}".format(trainer, model_node_type)
171+
group_path = get_model_group_path(model_toppath, output_type, feature_group, energy_source, name)
172+
model_path = os.path.join(group_path, model_name)
173+
model = load_model(model_path)
174+
metadata = load_metadata(model_path)
175+
if metadata is None:
176+
return model, None, None
177+
scaler = load_scaler(model_path)
178+
cpu_ms_max = scaler.max_abs_[0]
179+
return model, metadata, cpu_ms_max
180+
181+
def _load_all_models(model_toppath, output_type, name, node_types, energy_source):
182+
models_dict = dict()
183+
metadata_dict = dict()
184+
cpu_ms_max_dict = dict()
185+
for model_node_type in node_types:
186+
min_mae = None
187+
for trainer in weight_support_trainers:
188+
model, metadata, cpu_ms_max = _get_model(model_toppath, trainer, model_node_type, output_type=output_type, name=name, energy_source=energy_source)
189+
if metadata is None:
190+
continue
191+
cpu_ms_max_dict[model_node_type] = cpu_ms_max
192+
if min_mae is None or min_mae > metadata["mae"]:
193+
min_mae = metadata["mae"]
194+
models_dict[model_node_type], metadata_dict[model_node_type] = model, metadata
195+
return models_dict, metadata_dict, cpu_ms_max_dict
196+
197+
def _plot_models(models, cpu_ms_max, energy_source, output_folder, name, max_plot=15, cpu_time_bin_num=10, sample_num=20):
198+
from util.train_types import BPF_FEATURES
199+
import numpy as np
200+
import pandas as pd
201+
import seaborn as sns
202+
sns.set_palette("Paired")
203+
204+
import matplotlib.pyplot as plt
205+
206+
main_feature_col = BPF_FEATURES[0]
207+
predicted_col = {
208+
"acpi": "default_platform_power",
209+
"intel_rapl": "default_package_power"
210+
}
211+
212+
num_bins = len(cpu_ms_max)//cpu_time_bin_num + 1
213+
nobin = False
214+
if num_bins == 1:
215+
nobin = True
216+
values = np.array(list(cpu_ms_max.values()))
217+
_, bins = np.histogram(values, bins=num_bins)
218+
bin_size = len(bins) + 1 if not nobin else 1
219+
data_with_prediction_list = [[] for _ in range(bin_size)]
220+
221+
num_cols = min(3, bin_size)
222+
223+
for node_type, model in models.items():
224+
# generate data from scaler
225+
xs = np.column_stack((np.linspace(0, cpu_ms_max[node_type], sample_num), np.zeros(sample_num)))
226+
data = pd.DataFrame(xs, columns=models[node_type].estimator.features)
227+
_, data_with_prediction = model.append_prediction(data)
228+
if nobin:
229+
bin_index = 0
230+
else:
231+
bin_index = np.digitize([cpu_ms_max[node_type]], bins)[0]
232+
data_with_prediction_list[bin_index] += [(node_type, data_with_prediction)]
233+
total_graphs = 0
234+
for data_with_predictions in data_with_prediction_list:
235+
total_graphs += int(np.ceil(len(data_with_predictions) / max_plot))
236+
num_rows = int(np.ceil(total_graphs/num_cols))
237+
238+
fig, axes = plt.subplots(num_rows, num_cols, figsize=(int(6*num_cols), int(5*num_rows)))
239+
axes_index = 0
240+
for data_with_predictions in data_with_prediction_list:
241+
index = 0
242+
for data_with_prediction_index in data_with_predictions:
243+
if num_rows == 1 and num_cols == 1:
244+
ax = axes
245+
else:
246+
ax = axes[axes_index//num_cols][axes_index%num_cols]
247+
node_type = data_with_prediction_index[0]
248+
data_with_prediction = data_with_prediction_index[1]
249+
sns.lineplot(data=data_with_prediction, x=main_feature_col, y=predicted_col[energy_source], label="type={}".format(node_type), marker=markers[index], ax=ax)
250+
index += 1
251+
index = index % len(markers)
252+
if index % max_plot == 0:
253+
ax.set_ylabel("Predicted power (W)")
254+
axes_index += 1
255+
if len(data_with_predictions) > 0:
256+
ax.set_ylabel("Predicted power (W)")
257+
axes_index += 1
258+
filename = os.path.join(output_folder, name + ".png")
259+
plt.tight_layout()
260+
fig.savefig(filename)
150261
plt.close()

cmd/main.py

+26-14
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,16 @@
1818
from util.prom_types import metric_prefix as KEPLER_METRIC_PREFIX, prom_responses_to_results, TIMESTAMP_COL, feature_to_query, update_thirdparty_metrics, node_info_column
1919
from util.extract_types import get_expected_power_columns
2020
from util.train_types import ModelOutputType, FeatureGroups, is_single_source_feature_group, all_feature_groups, default_trainers
21-
from util.loader import load_json, DEFAULT_PIPELINE, load_pipeline_metadata, get_pipeline_path, get_model_group_path, list_pipelines, list_model_names, load_metadata, load_csv, get_preprocess_folder, get_general_filename, load_machine_spec
22-
from util.saver import save_json, save_csv, save_train_args, _pipeline_model_metadata_filename
21+
from util.loader import default_train_output_pipeline, load_json, load_pipeline_metadata, get_pipeline_path, get_model_group_path, list_pipelines, list_model_names, load_metadata, load_csv, get_preprocess_folder, get_general_filename, load_machine_spec
22+
from util.saver import save_json, save_csv, save_train_args, _pipeline_model_metadata_filename, _power_curve_filename
2323
from util.config import ERROR_KEY, model_toppath
2424
from util import get_valid_feature_group_from_queries, PowerSourceMap
2525
from train.prom.prom_query import _range_queries
2626
from train.exporter import exporter
2727
from train import load_class
2828
from train.profiler.node_type_index import NodeTypeIndexCollection, NodeTypeSpec, generate_spec
2929

30-
from cmd_plot import ts_plot, feature_power_plot, summary_plot, metadata_plot
30+
from cmd_plot import ts_plot, feature_power_plot, summary_plot, metadata_plot, power_curve_plot
3131
from cmd_util import extract_time, save_query_results, get_validate_df, summary_validation, get_extractor, check_ot_fg, get_pipeline, assert_train, get_isolator, UTC_OFFSET_TIMEDELTA
3232

3333
import threading
@@ -215,7 +215,7 @@ def isolate(args):
215215
extracted_data, power_labels = extract(args)
216216
if extracted_data is None or power_labels is None:
217217
return None
218-
pipeline_name = DEFAULT_PIPELINE if not args.pipeline_name else args.pipeline_name
218+
pipeline_name = default_train_output_pipeline if not args.pipeline_name else args.pipeline_name
219219
isolator = get_isolator(data_path, args.isolator, args.profile, pipeline_name, args.target_hints, args.bg_hints, args.abs_pipeline_name)
220220
isolated_data = isolator.isolate(extracted_data, label_cols=power_labels, energy_source=args.energy_source)
221221
if args.output:
@@ -247,7 +247,7 @@ def isolate_from_data(args):
247247
energy_components = PowerSourceMap[args.energy_source]
248248
extracted_data = load_csv(data_path, "extracted_" + args.input)
249249
power_columns = get_expected_power_columns(energy_components=energy_components)
250-
pipeline_name = DEFAULT_PIPELINE if not args.pipeline_name else args.pipeline_name
250+
pipeline_name = default_train_output_pipeline if not args.pipeline_name else args.pipeline_name
251251
isolator = get_isolator(data_path, args.isolator, args.profile, pipeline_name, args.target_hints, args.bg_hints, args.abs_pipeline_name)
252252
isolated_data = isolator.isolate(extracted_data, label_cols=power_columns, energy_source=args.energy_source)
253253
if args.output:
@@ -365,7 +365,7 @@ def train(args):
365365
elif PROM_THIRDPARTY_METRICS != [""]:
366366
update_thirdparty_metrics(PROM_THIRDPARTY_METRICS)
367367

368-
pipeline_name = DEFAULT_PIPELINE
368+
pipeline_name = default_train_output_pipeline
369369
if args.pipeline_name:
370370
pipeline_name = args.pipeline_name
371371

@@ -599,14 +599,16 @@ def estimate(args):
599599
- `estimate` passes all arguments to `estimate` function, and plots the predicted time series and correlation between usage and power metrics
600600
- `error` passes all arguments to `estimate` function, and plots the summary of prediction error
601601
- `metadata` plot pipeline metadata
602+
- `curve_power` plot curve power
603+
- --input : specify related path for pipeline metadata
602604
- --energy-source : specify target energy sources (use comma(,) as delimiter)
603605
- --extractor : specify extractor to get preprocessed data of AbsPower model linked to the input data
604606
- --isolator : specify isolator to get preprocessed data of DynPower model linked to the input data
605607
- --pipeline_name : specify pipeline name
606608
"""
607609

608610
def plot(args):
609-
pipeline_name = DEFAULT_PIPELINE if not args.pipeline_name else args.pipeline_name
611+
pipeline_name = default_train_output_pipeline if not args.pipeline_name else args.pipeline_name
610612
pipeline_path = get_pipeline_path(data_path, pipeline_name=pipeline_name)
611613
if not args.target_data:
612614
print("must give target data via --target-data to plot.")
@@ -691,9 +693,13 @@ def plot(args):
691693
elif args.target_data == "metadata":
692694
for energy_source in energy_sources:
693695
data_filename = _pipeline_model_metadata_filename(energy_source, ot.name)
694-
pipeline_path = get_pipeline_path(data_path, pipeline_name=pipeline_name)
695-
model_metadata_df = load_pipeline_metadata(pipeline_path, energy_source, ot.name)
696+
model_metadata_df = load_pipeline_metadata(args.input, energy_source, ot.name)
696697
metadata_plot(args, energy_source, model_metadata_df, output_folder, data_filename)
698+
elif args.target_data == "power_curve":
699+
for energy_source in energy_sources:
700+
data_filename = _power_curve_filename(energy_source, ot.name)
701+
model_metadata_df = load_pipeline_metadata(args.input, energy_source, ot.name)
702+
power_curve_plot(args, data_path, energy_source, output_folder, data_filename)
697703

698704
"""
699705
export
@@ -709,6 +715,7 @@ def plot(args):
709715
- custom benchmark in json with `startTimeUTC` and `endTimeUTC` data
710716
- --collect-date : specify collection time manually in UTC
711717
- --input : specify kepler query response file (output of `query` function) - optional
718+
- --zip : specify whether to zip pipeline
712719
"""
713720

714721
def export(args):
@@ -742,14 +749,18 @@ def export(args):
742749
pipeline_path = get_pipeline_path(data_path, pipeline_name=pipeline_name)
743750

744751
local_export_path = exporter.export(data_path, pipeline_path, output_path, publisher=args.publisher, collect_date=collect_date, inputs=inputs)
745-
args.target_data = "metadata"
746752

753+
args.input = local_export_path
747754
args.output = local_export_path
748-
args.output_type = "AbsPower"
749755
args.energy_source = ",".join(PowerSourceMap.keys())
750-
plot(args)
751-
args.output_type = "DynPower"
752-
plot(args)
756+
for target_data in ["metadata", "power_curve"]:
757+
for ot in ModelOutputType:
758+
args.target_data = target_data
759+
args.output_type = ot.name
760+
plot(args)
761+
if args.zip:
762+
import shutil
763+
shutil.make_archive(local_export_path, 'zip', local_export_path)
753764

754765
"""
755766
plot_scenario
@@ -886,6 +897,7 @@ def plot_scenario(args):
886897
parser.add_argument("--publisher", type=str, help="Specify github account of model publisher")
887898
parser.add_argument("--include-raw", type=bool, help="Include raw query data")
888899
parser.add_argument("--collect-date", type=str, help="Specify collect date directly")
900+
parser.add_argument("--zip", type=bool, help="Specify whether to zip pipeline", default=False)
889901

890902
parser.add_argument("--id", type=str, help="specify machine id")
891903

0 commit comments

Comments
 (0)