From 27f5b7dcb05aefdab9b762175d538931face0aba Mon Sep 17 00:00:00 2001 From: Radu Nicolae Date: Fri, 25 Oct 2024 08:21:49 +0200 Subject: [PATCH] M3SA - Multi-Meta-Model Simulation Analyzer (#251) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * (feat) demo files are now ignored * integrating m3sa changes with opendc * gitignore ignores demo * m3sa linked, tested, works 🎉🎆 * linting & checks fully pass * m3sa documentation (re...)added * package.json added, a potentail solution for Build Docker Images workflow * (fix) opendc-m3sa renamed to opendc-experiments-m3sa * (feat) Model is now a dataclass * (fix) package and package-lock reverted as before the PR, now they mirror the opendc master branch * (fix) Experiments renamed to experiment * branch updated with changes from master branch * trying to fix the build docker image failed workflow * trying to fix the build docker image failed workflow * All simulation are now run with a single CPU and single MemoryUnit. multi CPUs are combined into one. This is for performance and explainability. (#255) (#37) Co-authored-by: Dante Niewenhuis * All simulation are now run with a single CPU and single MemoryUnit. multi CPUs are combined into one. This is for performance and explainability. (#255) (#38) Co-authored-by: Dante Niewenhuis * All simulation are now run with a single CPU and single MemoryUnit. multi CPUs are combined into one. This is for performance and explainability. (#255) (#39) Co-authored-by: Dante Niewenhuis * [TEMP](feat) m3saCli decoupled from experimentCli * spotless and minor refactoring * (feat)[TEMP] decoupling m3sa from experiment * spotless applied * documentation resolved * requirements.txt added * path to M3SA is now provided as a parameter to M3SACLI * spotless applied * (fix) python environment variables solved, output analysis folder solved * documentation changed and matching the master branch doc * package-lock reverted * package-lock reverted --------- Co-authored-by: Dante Niewenhuis --- .gitignore | 3 + .../opendc-experiments-base/build.gradle.kts | 1 - .../base/scenario/ExperimentReader.kt | 1 - .../base/scenario/specs/ExperimentSpec.kt | 1 + .../opendc-experiments-m3sa/build.gradle.kts | 59 +++ .../src/main/kotlin/M3saAnalyzer.kt | 55 ++ .../opendc/experiments/m3sa/runner/M3SACli.kt | 85 +++ .../experiments/m3sa/runner/M3SARunner.kt | 58 ++ .../m3sa/scenario/M3SAFactories.kt | 38 ++ .../src/main/python/accuracy_evaluator.py | 114 ++++ .../src/main/python/input_parser.py | 135 +++++ .../src/main/python/main.py | 20 + .../src/main/python/models/MetaModel.py | 214 ++++++++ .../src/main/python/models/Model.py | 70 +++ .../src/main/python/models/MultiModel.py | 501 ++++++++++++++++++ .../src/main/python/requirements.txt | 4 + .../src/main/python/simulator_specifics.py | 14 + .../src/main/python/utils.py | 25 + settings.gradle.kts | 2 + site/docs/documentation/Input/M3SA.md | 92 ++++ site/docs/documentation/Input/M3SASchema.md | 115 ++++ .../documentation/Input/ScenarioSchema.md | 2 +- .../tutorials/M3SA-integration-tutorial.mdx | 188 +++++++ .../tutorials/cloud-capacity-planning.mdx | 1 + 24 files changed, 1795 insertions(+), 3 deletions(-) create mode 100644 opendc-experiments/opendc-experiments-m3sa/build.gradle.kts create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/M3saAnalyzer.kt create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/runner/M3SACli.kt create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/runner/M3SARunner.kt create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/scenario/M3SAFactories.kt create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/accuracy_evaluator.py create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/input_parser.py create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/main.py create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/models/Model.py create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MultiModel.py create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/requirements.txt create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/simulator_specifics.py create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/utils.py create mode 100644 site/docs/documentation/Input/M3SA.md create mode 100644 site/docs/documentation/Input/M3SASchema.md create mode 100644 site/docs/tutorials/M3SA-integration-tutorial.mdx diff --git a/.gitignore b/.gitignore index 1d06ddc00..127f7090a 100644 --- a/.gitignore +++ b/.gitignore @@ -115,3 +115,6 @@ gradle-app.setting # Cache of project .gradletasknamecache + +# Demo files +demo/* diff --git a/opendc-experiments/opendc-experiments-base/build.gradle.kts b/opendc-experiments/opendc-experiments-base/build.gradle.kts index a7bf69e06..98ec67232 100644 --- a/opendc-experiments/opendc-experiments-base/build.gradle.kts +++ b/opendc-experiments/opendc-experiments-base/build.gradle.kts @@ -37,7 +37,6 @@ dependencies { api(projects.opendcCompute.opendcComputeSimulator) implementation(libs.clikt) - implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.0") implementation(libs.progressbar) implementation(project(mapOf("path" to ":opendc-simulator:opendc-simulator-core"))) diff --git a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/ExperimentReader.kt b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/ExperimentReader.kt index 00b57602c..160bd7837 100644 --- a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/ExperimentReader.kt +++ b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/ExperimentReader.kt @@ -33,7 +33,6 @@ import java.nio.file.Path import kotlin.io.path.inputStream public class ExperimentReader { -// private val jsonReader = Json { serializersModule = failureModule } private val jsonReader = Json public fun read(file: File): ExperimentSpec = read(file.inputStream()) diff --git a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/specs/ExperimentSpec.kt b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/specs/ExperimentSpec.kt index 67d45e89c..60fcf51ae 100644 --- a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/specs/ExperimentSpec.kt +++ b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/specs/ExperimentSpec.kt @@ -42,6 +42,7 @@ import java.util.UUID * @property computeExportConfig configures which parquet columns are to * be included in the output files. */ + @Serializable public data class ExperimentSpec( var id: Int = -1, diff --git a/opendc-experiments/opendc-experiments-m3sa/build.gradle.kts b/opendc-experiments/opendc-experiments-m3sa/build.gradle.kts new file mode 100644 index 000000000..6530390b5 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/build.gradle.kts @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2020 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +description = "Multi-Meta-Model Simulation Analysis (M3SA) used across OpenDC modules" + +// Build configuration +plugins { + `kotlin-library-conventions` +} + +dependencies { + api(libs.kotlinx.coroutines) + + testImplementation(projects.opendcSimulator.opendcSimulatorCore) + api(projects.opendcCompute.opendcComputeService) + api(projects.opendcCompute.opendcComputeSimulator) + + implementation(libs.clikt) + + implementation(project(":opendc-experiments:opendc-experiments-base")) + implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.0") + implementation(libs.progressbar) + implementation(project(mapOf("path" to ":opendc-simulator:opendc-simulator-core"))) + + implementation(project(mapOf("path" to ":opendc-compute:opendc-compute-workload"))) + implementation(project(mapOf("path" to ":opendc-compute:opendc-compute-telemetry"))) + implementation(project(mapOf("path" to ":opendc-compute:opendc-compute-topology"))) + implementation(project(mapOf("path" to ":opendc-compute:opendc-compute-carbon"))) + implementation(project(mapOf("path" to ":opendc-compute:opendc-compute-failure"))) + + runtimeOnly(libs.log4j.core) + runtimeOnly(libs.log4j.slf4j) +} + +sourceSets { + main { + kotlin.srcDirs("src/main/kotlin", "src/main/python") + resources.srcDir("src/main/resources") + } +} diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/M3saAnalyzer.kt b/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/M3saAnalyzer.kt new file mode 100644 index 000000000..6b8f8422b --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/M3saAnalyzer.kt @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2024 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import kotlin.io.path.Path + +/** + * This constant variable should be changed depending on the root folder that is being run. + * PATH_TO_PYTHON_MAIN should point to the main python file, ran when the analysis starts. + */ + +public val ANALYSIS_SCRIPTS_DIRECTORY: String = "./opendc-experiments/opendc-experiments-m3sa/src/main/python" +public val ABSOLUTE_SCRIPT_PATH: String = + Path("$ANALYSIS_SCRIPTS_DIRECTORY/main.py").toAbsolutePath().normalize().toString() +public val SCRIPT_LANGUAGE: String = Path("$ANALYSIS_SCRIPTS_DIRECTORY/venv/bin/python3").toAbsolutePath().normalize().toString() + +public fun m3saAnalyze( + outputFolderPath: String, + m3saSetupPath: String, +) { + val process = + ProcessBuilder( + SCRIPT_LANGUAGE, + ABSOLUTE_SCRIPT_PATH, + outputFolderPath, + m3saSetupPath, + ).directory(Path(ANALYSIS_SCRIPTS_DIRECTORY).toFile()) + .start() + + val exitCode = process.waitFor() + if (exitCode == 0) { + println("[M3SA says] M3SA operation(s) completed successfully.") + } else { + val errors = process.errorStream.bufferedReader().readText() + println("[M3SA says] Exit code $exitCode; Error(s): $errors") + } +} diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/runner/M3SACli.kt b/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/runner/M3SACli.kt new file mode 100644 index 000000000..43597ff5c --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/runner/M3SACli.kt @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2022 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +@file:JvmName("M3SACli") + +package org.opendc.experiments.base.runner + +import com.github.ajalt.clikt.core.CliktCommand +import com.github.ajalt.clikt.parameters.options.default +import com.github.ajalt.clikt.parameters.options.defaultLazy +import com.github.ajalt.clikt.parameters.options.option +import com.github.ajalt.clikt.parameters.types.file +import com.github.ajalt.clikt.parameters.types.int +import m3saAnalyze +import org.opendc.experiments.base.scenario.getExperiment +import org.opendc.experiments.m3sa.scenario.getOutputFolder +import java.io.File + +/** + * Main entrypoint of the application. + */ +public fun main(args: Array): Unit = M3SACommand().main(args) + +/** + * Represents the command for the Scenario experiments. + */ +internal class M3SACommand : CliktCommand(name = "experiment") { + /** + * The path to the environment directory. + */ + private val scenarioPath by option("--experiment-path", help = "path to experiment file") + .file(canBeDir = false, canBeFile = true) + .defaultLazy { File("resources/experiment.json") } + + /** + * The number of threads to use for parallelism. + */ + private val parallelism by option("-p", "--parallelism", help = "number of worker threads") + .int() + .default(Runtime.getRuntime().availableProcessors() - 1) + + private val m3saPath by option("-m", "--m3sa-setup-path", help = "path to m3sa setup file") + .file(canBeDir = false, canBeFile = true) + .defaultLazy { File("") } + + override fun run() { + println("The provided m3saPath is $m3saPath") + + val experiment = getExperiment(scenarioPath) + runExperiment(experiment, parallelism) + + if (m3saPath.toString().isNotEmpty()) { + m3saAnalyze( + outputFolderPath = getOutputFolder(scenarioPath), + m3saSetupPath = m3saPath.toString(), + ) + } else { + println( + "\n" + + "===================================================\n" + + "|M3SA path is not provided. Skipping M3SA analysis.|\n" + + "===================================================", + ) + } + } +} diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/runner/M3SARunner.kt b/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/runner/M3SARunner.kt new file mode 100644 index 000000000..9a61ad175 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/runner/M3SARunner.kt @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2024 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +@file:JvmName("M3saCliKt") + +package org.opendc.experiments.base.runner + +import org.opendc.experiments.base.scenario.Scenario +import java.util.concurrent.ForkJoinPool + +/** + * Run scenario when no pool is available for parallel execution + * + * @param experiment The scenarios to run + * @param parallelism The number of scenarios that can be run in parallel + */ +public fun runExperiment( + experiment: List, + parallelism: Int, +) { + val ansiReset = "\u001B[0m" + val ansiGreen = "\u001B[32m" + val ansiBlue = "\u001B[34m" + + setupOutputFolderStructure(experiment[0].outputFolder) + + for (scenario in experiment) { + val pool = ForkJoinPool(parallelism) + println( + "\n\n$ansiGreen================================================================================$ansiReset", + ) + println("$ansiBlue Running scenario: ${scenario.name} $ansiReset") + println("$ansiGreen================================================================================$ansiReset") + runScenario( + scenario, + pool, + ) + } +} diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/scenario/M3SAFactories.kt b/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/scenario/M3SAFactories.kt new file mode 100644 index 000000000..a4df40e1f --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/scenario/M3SAFactories.kt @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2024 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.m3sa.scenario + +import org.opendc.experiments.base.scenario.ExperimentReader +import java.io.File + +private val experimentReader = ExperimentReader() + +/** + * Returns a list of Scenarios from a given file path (input). + * + * @param filePath The path to the file containing the scenario specifications. + * @return A list of Scenarios. + */ +public fun getOutputFolder(file: File): String { + return experimentReader.read(file).outputFolder + "/outputs" +} diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/accuracy_evaluator.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/accuracy_evaluator.py new file mode 100644 index 000000000..463f69e6d --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/accuracy_evaluator.py @@ -0,0 +1,114 @@ +import numpy as np + +from models.MetaModel import MetaModel + + +def accuracy_evaluator( + real_data, + multi_model, + compute_mape=True, + compute_nad=True, + compute_rmsle=True, + rmsle_hyperparameter=0.5, + only_metamodel=False +): + """ + :param real_data: the real-world data of the simulation + :param multi_model: the Multi-Model, containing individual models (possibly also a Meta-Model, with id=101) + :param MAPE: whether to calculate Mean Absolute Percentage Error (MAPE) + :param NAD: whether to calculate Normalized Absolute Differences (NAD) + :param RMSLE: whether to calculate Root Mean Square Logarithmic Error (RMSLE) + :param rmsle_hyperparameter: the hyperparameter that balances the ration underestimations:overestimations + - default is 0.5 (balanced penalty) + - < 0.5: more penalty for overestimations + - > 0.5: more penalty for underestimations + e.g., RMSLE_hyperparameter=0.3 -> 30% penalty for overestimations, 70% penalty for underestimations (3:7 ratio) + :return: None, but prints the accuracy metrics + """ + + meta_model = MetaModel(multimodel=multi_model) + multi_model.models.append(meta_model.meta_model) # metamodel + # multi_model.models.append(Model(raw_host_data=real_data, id=-1, path=None)) # real-world data + + with open(multi_model.output_folder_path + "/accuracy_report.txt", "a") as f: + f.write("====================================\n") + f.write("Accuracy Report, against ground truth\n") + + for model in multi_model.models: + if only_metamodel and model.id != 101: + continue + + if model.id == -1: + f.write("Real-World data") + elif model.id == 101: + f.write( + f"Meta-Model, meta-function: {multi_model.user_input['meta_function']}, window_size: {meta_model.multi_model.window_size}") + else: + f.write(f"Model {model.id}") + + simulation_data = model.raw_sim_data + min_len = min(len(real_data), len(simulation_data)) + real_data = real_data[:min_len] + simulation_data = simulation_data[:min_len] + if compute_mape: + accuracy_mape = mape( + real_data=real_data, + simulation_data=simulation_data + ) + f.write(f"\nMean Absolute Percentage Error (MAPE): {accuracy_mape}%") + + if compute_nad: + accuracy_nad = nad( + real_data=real_data, + simulation_data=simulation_data + ) + f.write(f"\nNormalized Absolute Differences (NAD): {accuracy_nad}%") + + if compute_rmsle: + accuracy_rmsle = rmsle( + real_data=real_data, + simulation_data=simulation_data, + alpha=rmsle_hyperparameter + ) + f.write( + f"\nRoot Mean Square Logarithmic Error (RMSLE), alpha={rmsle_hyperparameter}:{accuracy_rmsle}\n\n") + + f.write("====================================\n") + + +def mape(real_data, simulation_data): + """ + Calculate Mean Absolute Percentage Error (MAPE) + :param real_data: Array of real values + :param simulation_data: Array of simulated values + :return: MAPE value + """ + real_data = np.array(real_data) + simulation_data = np.array(simulation_data) + return round(np.mean(np.abs((real_data - simulation_data) / real_data)) * 100, 3) + + +def nad(real_data, simulation_data): + """ + Calculate Normalized Absolute Differences (NAD) + :param real_data: Array of real values + :param simulation_data: Array of simulated values + :return: NAD value + """ + real_data = np.array(real_data) + simulation_data = np.array(simulation_data) + return round(np.sum(np.abs(real_data - simulation_data)) / np.sum(real_data) * 100, 3) + + +def rmsle(real_data, simulation_data, alpha=0.5): + """ + Calculate Root Mean Square Logarithmic Error (RMSLE) with an adjustable alpha parameter + :param real_data: Array of real values + :param simulation_data: Array of simulated values + :param alpha: Hyperparameter that balances the penalty between underestimations and overestimations + :return: RMSLE value + """ + real_data = np.array(real_data) + simulation_data = np.array(simulation_data) + log_diff = alpha * np.log(real_data) - (1 - alpha) * np.log(simulation_data) + return round(np.sqrt(np.mean(log_diff ** 2)) * 100, 3) diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/input_parser.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/input_parser.py new file mode 100644 index 000000000..cb1bc2b9f --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/input_parser.py @@ -0,0 +1,135 @@ +import json +import os +import sys +import warnings + + +def read_input(path=""): + """ + Reads and processes the input JSON file from the specified path. Validates the input path, + ensures the file exists, and decodes the JSON content. Switches to the project root directory + before returning the parsed input. + + :param path: The relative path to the input JSON file. + :type path: str + :raises ValueError: If the input path is not provided, file does not exist, or JSON decoding fails. + :return: Parsed JSON content. + :rtype: dict + :side effect: Changes the working directory to the project root. + """ + if not path: + raise ValueError("No input path provided.") + + path = path.strip().strip(',') + + project_root = find_root_dir() + if not project_root: + raise ValueError("Project root not found.") + + full_path = os.path.join(project_root, path) + + if not os.path.exists(full_path): + raise ValueError(f"File does not exist: {full_path}") + + try: + with open(full_path, 'r') as raw_json: + input_json = json.load(raw_json) + except json.JSONDecodeError: + raise ValueError("Failed to decode JSON.") + except IOError: + raise ValueError("MultiModel's parser says: Error opening file.") + + switch_to_root_dir() + + # Validate and apply defaults + input_json = parse_input(input_json) + return input_json + + +def parse_input(input_json): + """ + Validates and applies default values to the input JSON content. Ensures required fields are present + and raises warnings or errors for missing or invalid values. + + :param input_json: The input JSON content. + :type input_json: dict + :raises ValueError: If required fields are missing or invalid values are provided. + :return: Validated and processed JSON content with defaults applied. + :rtype: dict + """ + + DEFAULTS = { + "multimodel": True, + "metamodel": False, + "window_size": 1, + "window_function": "mean", + "meta_function": "mean", + "samples_per_minute": 0, + "current_unit": "", + "unit_scaling_magnitude": 1, + "plot_type": "time_series", + "plot_title": "", + "x_label": "", + "y_label": "", + "seed": 0, + "y_ticks_count": None, + "x_ticks_count": None, + "y_min": None, + "y_max": None, + "x_min": None, + "x_max": None, + } + + # Apply default values where not specified + for key, default_value in DEFAULTS.items(): + if key not in input_json: + input_json[key] = default_value + + # Special handling for required fields without default values + if "metric" not in input_json: + raise ValueError("Required field 'metric' is missing.") + + if ("meta_function" not in input_json) and input_json["metamodel"]: + raise ValueError("Required field 'meta_function' is missing. Please select between 'mean' and 'median'. Alternatively," + "disable metamodel in the config file.") + + if input_json["meta_function"] not in ["mean", "median", "meta_equation1", "equation2", "equation3"]: + raise ValueError("Invalid value for meta_function. Please select between 'mean', 'median', !!!!!!!to be updated in the end!!!!!!!!.") + + # raise a warning + if not input_json["multimodel"] and input_json["metamodel"]: + warnings.warn("Warning: Cannot have a Meta-Model without a Multi-Model. No computation made.") + + return input_json + + +def find_root_dir(): + """ + Searches for the project root directory by looking for a 'README.md' file in the current + and parent directories. + + :return: The path to the project root directory if found, otherwise None. + :rtype: str or None + """ + current_dir = os.path.dirname(os.path.abspath(__file__)) + root = os.path.abspath(os.sep) + while current_dir and current_dir != root: + if os.path.exists(os.path.join(current_dir, 'README.md')): + return current_dir + current_dir = os.path.dirname(current_dir) + return None + + +def switch_to_root_dir(): + """ + Switches the current working directory to the project root directory. Exits the program if the + root directory is not found. + + :side effect: Changes the current working directory or exits the program. + """ + root_dir = find_root_dir() + if root_dir: + os.chdir(root_dir) + else: + print("Failed to switch to root directory.") + sys.exit(1) diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/main.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/main.py new file mode 100644 index 000000000..11ee836de --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/main.py @@ -0,0 +1,20 @@ +from os import sys + +from input_parser import read_input +from models.MetaModel import MetaModel +from models.MultiModel import MultiModel + + +def main(): + multimodel = MultiModel( + user_input=read_input(sys.argv[2]), + path=sys.argv[1], + ) + + multimodel.generate_plot() + + MetaModel(multimodel) + + +if __name__ == "__main__": + main() diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py new file mode 100644 index 000000000..49930d254 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py @@ -0,0 +1,214 @@ +import numpy as np +import os +import pandas as pd + +from .Model import Model + + +class MetaModel: + """ + A class that aggregates results from multiple simulation models based on user-defined functions, producing + consolidated outputs for analysis. + + Attributes: + multi_model (MultiModel): The container of models whose results are aggregated. + meta_model (Model): Model instance that stores aggregated results. + meta_function (function): Function used to calculate aggregated data. + min_raw_model_len (int): Minimum length of raw data arrays across all models. + min_processed_model_len (int): Minimum length of processed data arrays across all models. + number_of_models (int): Number of models being aggregated. + function_map (dict): Mapping of aggregation function names to function implementations. + """ + + META_MODEL_ID = -101 + + def __init__(self, multimodel, meta_function=None): + """ + Initializes the Metamodel with a MultiModel instance and prepares aggregation functions based on configuration. + + :param multimodel: MultiModel instance containing the models to aggregate. + :raise ValueError: If metamodel functionality is not enabled in the configuration. + """ + if not multimodel.user_input.get('metamodel', False): + raise ValueError("Metamodel is not enabled in the config file") + + self.function_map = { + 'mean': self.mean, + 'median': self.median, + 'meta_equation1': self.meta_equation1, + } + + self.multi_model = multimodel + self.meta_model = Model( + raw_sim_data=[], + id=self.META_MODEL_ID, + path=self.multi_model.output_folder_path + ) + + if meta_function is not None: + self.meta_function = meta_function + else: + self.meta_function = self.function_map.get(multimodel.user_input['meta_function'], self.mean) + + self.min_raw_model_len = min([len(model.raw_sim_data) for model in self.multi_model.models]) + self.min_processed_model_len = min([len(model.processed_sim_data) for model in self.multi_model.models]) + self.number_of_models = len(self.multi_model.models) + self.compute() + self.output() + + def output(self): + """ + Generates outputs by plotting the aggregated results and exporting the metamodel data to a file. + :return: None + :side effect: Outputs data to files and generates plots. + """ + self.plot() + self.output_metamodel() + + def compute(self): + """ + Computes aggregated data based on the specified plot type from the configuration. + :raise ValueError: If an unsupported plot type is specified in the configuration. + """ + if self.multi_model.plot_type == 'time_series': + self.compute_time_series() + elif self.multi_model.plot_type == 'cumulative': + self.compute_cumulative() + elif self.multi_model.plot_type == 'cumulative_time_series': + self.compute_cumulative_time_series() + else: + raise ValueError("Invalid plot type in config file") + + def plot(self): + """ + Plots the aggregated data according to the specified plot type from the configuration. + :raise ValueError: If an unsupported plot type is specified. + """ + if self.multi_model.plot_type == 'time_series': + self.plot_time_series() + elif self.multi_model.plot_type == 'cumulative': + self.plot_cumulative() + elif self.multi_model.plot_type == 'cumulative_time_series': + self.plot_cumulative_time_series() + + else: + raise ValueError("Invalid plot type in config file") + + def compute_time_series(self): + """ + Aggregates time series data across models using the specified aggregation function. + :return: None + :side effect: Updates the meta_model's processed data with aggregated results. + """ + for i in range(0, self.min_processed_model_len): + data_entries = [] + for j in range(self.number_of_models): + data_entries.append(self.multi_model.models[j].processed_sim_data[i]) + self.meta_model.processed_sim_data.append(self.meta_function(data_entries)) + self.meta_model.raw_sim_data = self.meta_model.processed_sim_data + + def plot_time_series(self): + """ + Generates a time series plot of the aggregated data. + :return: None + :side effect: Displays a time series plot using the multi_model's plotting capabilities. + """ + self.multi_model.models.append(self.meta_model) + self.multi_model.generate_plot() + + def compute_cumulative(self): + """ + Aggregates cumulative data entries across all models. + :return: None + :side effect: Updates the meta_model's cumulative data with aggregated results. + """ + + for i in range(0, self.min_raw_model_len): + data_entries = [] + for j in range(self.number_of_models): + sim_data = self.multi_model.models[j].raw_sim_data + ith_element = sim_data[i] + data_entries.append(ith_element) + self.meta_model.cumulated += self.mean(data_entries) + self.meta_model.cumulated = round(self.meta_model.cumulated, 2) + + def plot_cumulative(self): + """ + Generates a cumulative plot of the aggregated data. + :return: None + :side effect: Displays a cumulative plot using the multi_model's plotting capabilities. + """ + self.multi_model.models.append(self.meta_model) + self.multi_model.generate_plot() + + def compute_cumulative_time_series(self): + """ + Aggregates cumulative time series data entries across models using the specified aggregation function. + :return: None + :side effect: Updates the meta_model's processed data with cumulative aggregated results. + """ + for i in range(0, self.min_processed_model_len): + data_entries = [] + for j in range(self.number_of_models): + data_entries.append(self.multi_model.models[j].processed_sim_data[i]) + self.meta_model.processed_sim_data.append(self.meta_function(data_entries)) + + def plot_cumulative_time_series(self): + """ + Generates a cumulative time series plot of the aggregated data. + :return: None + :side effect: Displays a cumulative time series plot using the multi_model's plotting capabilities. + """ + self.multi_model.models.append(self.meta_model) + self.multi_model.generate_plot() + + def output_metamodel(self): + """ + Exports the processed sim data of the metamodel to a parquet file for further analysis or record keeping. + :return: None + :side effect: Writes data to a parquet file at the specified directory path. + """ + directory_path = os.path.join(self.multi_model.output_folder_path, "raw-output/metamodel/seed=0") + os.makedirs(directory_path, exist_ok=True) + current_path = os.path.join(directory_path, f"{self.multi_model.metric}.parquet") + df = pd.DataFrame({'processed_sim_data': self.meta_model.processed_sim_data}) + df.to_parquet(current_path, index=False) + + def mean(self, chunks): + """ + Calculates the mean of a list of numerical data. + + :param chunks (list): The data over which to calculate the mean. + :return: float: The mean of the provided data. + """ + return np.mean(chunks) + + def median(self, chunks): + """ + Calculates the median of a list of numerical data. + + :param chunks (list): The data over which to calculate the median. + :return: float: The median of the provided data. + """ + return np.median(chunks) + + def meta_equation1(self, chunks): + """ + Calculates a weighted mean where the weights are inversely proportional to the absolute difference from the median value. + :param chunks (list): Data chunks from which to calculate the weighted mean. + :return: float: The calculated weighted mean. + """ + + """Attempt 1""" + # median_val = np.median(chunks) + # proximity_weights = 1 / (1 + np.abs(chunks - median_val)) # Avoid division by zero + # weighted_mean = np.sum(proximity_weights * chunks) / np.sum(proximity_weights) + # return weighted_mean + + """Attempt 2 Inter-Quartile Mean (same accuracy as mean)""" + # sorted_preds = np.sort(chunks, axis=0) + # Q1 = int(np.floor(0.25 * len(sorted_preds))) + # Q3 = int(np.floor(0.75 * len(sorted_preds))) + # + # iqm = np.mean(sorted_preds[Q1:Q3], axis=0) + # return iqm diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/Model.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/Model.py new file mode 100644 index 000000000..f60f0bb07 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/Model.py @@ -0,0 +1,70 @@ +""" +A model is the output of simulator. It contains the data the simulator output, under a certain topology, seed, +workload, datacenter configuration, etc. A model is further used in the analyzer as part of the MultiModel class, +and further in the MetaModel class. + +:param sim: the simulation data of the model +""" +import json +from dataclasses import dataclass, field + +@dataclass +class Model: + """ + Represents a single simulation output containing various data metrics collected under specific simulation conditions. + A Model object stores raw and processed simulation data and is designed to interact with higher-level structures like + MultiModel and MetaModel for complex data analysis. + + Attributes: + raw_sim_data (list): Initial raw data from the simulator output. + processed_sim_data (list): Data derived from raw_sim_data after applying certain processing operations like aggregation or smoothing. + cumulative_time_series_values (list): Stores cumulative data values useful for time series analysis. + id (int): Unique identifier for the model, typically used for tracking and referencing within analysis tools. + path (str): Base path for storing or accessing related data files. + cumulated (float): Cumulative sum of processed data, useful for quick summaries and statistical analysis. + experiment_name (str): A descriptive name for the experiment associated with this model, potentially extracted from external metadata. + margins_of_error (list): Stores error margins associated with the data, useful for uncertainty analysis. + topologies (list): Describes the network or system topologies used during the simulation. + workloads (list): Lists the types of workloads applied during the simulation, affecting the simulation's applicability and scope. + allocation_policies (list): Details the resource allocation policies used, which influence the simulation outcomes. + carbon_trace_paths (list): Paths to data files containing carbon output or usage data, important for environmental impact studies. + + Methods: + parse_trackr(): Reads additional configuration and metadata from a JSON file named 'trackr.json', enhancing the model with detailed context information. + + Usage: + Model objects are typically instantiated with raw data from simulation outputs and an identifier. After instantiation, + the 'parse_trackr' method can be called to load additional experimental details from a corresponding JSON file. + """ + + path: str + raw_sim_data: list + id: int + processed_sim_data: list = field(default_factory=list) + cumulative_time_series_values: list = field(default_factory=list) + cumulated: float = 0.0 + experiment_name: str = "" + margins_of_error: list = field(default_factory=list) + topologies: list = field(default_factory=list) + workloads: list = field(default_factory=list) + allocation_policies: list = field(default_factory=list) + carbon_trace_paths: list = field(default_factory=list) + + def parse_trackr(self): + """ + Parses the 'trackr.json' file located in the model's base path to extract and store detailed experimental metadata. + This method enhances the model with comprehensive contextual information about the simulation environment. + + :return: None + :side effect: Updates model attributes with data from the 'trackr.json' file, such as experiment names, topologies, and policies. + :raises FileNotFoundError: If the 'trackr.json' file does not exist at the specified path. + :raises json.JSONDecodeError: If there is an error parsing the JSON data. + """ + trackr_path = self.path + "/trackr.json" + with open(trackr_path) as f: + trackr = json.load(f) + self.experiment_name = trackr.get(self.id, {}).get('name', "") + self.topologies = trackr.get(self.id, {}).get('topologies', []) + self.workloads = trackr.get(self.id, {}).get('workloads', []) + self.allocation_policies = trackr.get(self.id, {}).get('allocationPolicies', []) + self.carbon_trace_paths = trackr.get(self.id, {}).get('carbonTracePaths', []) diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MultiModel.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MultiModel.py new file mode 100644 index 000000000..17a927654 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MultiModel.py @@ -0,0 +1,501 @@ +import matplotlib.pyplot as plt +import numpy as np +import os +import pyarrow.parquet as pq +import time +from matplotlib.ticker import MaxNLocator, FuncFormatter + +from simulator_specifics import * +from .MetaModel import MetaModel +from .Model import Model + + +def is_meta_model(model): + """ + Check if the given model is a MetaModel based on its ID. A metamodel will always have an id of -101. + + Args: + model (Model): The model to check. + + Returns: + bool: True if model is MetaModel, False otherwise. + """ + return model.id == MetaModel.META_MODEL_ID + + +class MultiModel: + """ + Handles multiple simulation models, aggregates their data based on user-defined parameters, + and generates plots and statistics. + + Attributes: + user_input (dict): Configuration dictionary containing user settings for model processing. + path (str): The base directory path where output files and analysis results are stored. + window_size (int): The size of the window for data aggregation, which affects how data smoothing and granularity are handled. + models (list of Model): A list of Model instances that store the simulation data. + metric (str): The specific metric to be analyzed and plotted, as defined by the user. + measure_unit (str): The unit of measurement for the simulation data, adjusted according to the user's specifications. + output_folder_path (str): Path to the folder where output files are saved. + raw_output_path (str): Directory path where raw simulation data is stored. + analysis_file_path (str): Path to the file where detailed analysis results are recorded. + plot_type (str): The type of plot to generate, which can be 'time_series', 'cumulative', or 'cumulative_time_series'. + plot_title (str): The title of the plot. + x_label (str), y_label (str): Labels for the x and y axes of the plot. + x_min (float), x_max (float), y_min (float), y_max (float): Optional parameters to define axis limits for the plots. + + Methods: + parse_user_input(window_size): Parses and sets the class attributes based on the provided user input. + adjust_unit(): Adjusts the unit of measurement based on user settings, applying appropriate metric prefixes. + set_paths(): Initializes the directory paths for storing outputs and analysis results. + init_models(): Reads simulation data from Parquet files and initializes Model instances. + compute_windowed_aggregation(): Processes the raw data by applying a windowed aggregation function for smoothing. + generate_plot(): Orchestrates the generation of the specified plot type by calling the respective plotting functions. + generate_time_series_plot(): Generates a time series plot of the aggregated data. + generate_cumulative_plot(): Creates a bar chart showing cumulative data for each model. + generate_cumulative_time_series_plot(): Produces a plot that displays cumulative data over time for each model. + save_plot(): Saves the generated plot to a PDF file in the specified directory. + output_stats(): Writes detailed statistics of the simulation to an analysis file for record-keeping. + mean_of_chunks(np_array, window_size): Calculates the mean of data segments for smoothing and processing. + get_cumulative_limits(model_sums): Determines appropriate x-axis limits for cumulative plots based on the model data. + + Usage: + To use this class, instantiate it with a dictionary of user settings, a path for outputs, and optionally a window size. + Call the `generate_plot` method to process the data and generate plots as configured by the user. + """ + + def __init__(self, user_input, path, window_size=-1): + """ + Initializes the MultiModel with provided user settings and prepares the environment. + + :param user_input (dict): Configurations and settings from the user. + :param path (str): Path where output and analysis will be stored. + :param window_size (int): The size of the window to aggregate data; uses user input if -1. + :return: None + """ + + self.starting_time = time.time() + self.end_time = None + self.workload_time = None + + self.user_input = user_input + + self.metric = None + self.measure_unit = None + self.path = path + self.models = [] + + self.folder_path = None + self.output_folder_path = None + self.raw_output_path = None + self.analysis_file_path = None + self.unit_scaling = 1 + self.window_size = -1 + self.window_function = "median" + self.max_model_len = 0 + self.seed = 0 + + self.plot_type = None + self.plot_title = None + self.x_label = None + self.y_label = None + self.x_min = None + self.x_max = None + self.y_min = None + self.y_max = None + self.plot_path = None + + self.parse_user_input(window_size) + self.set_paths() + self.init_models() + + self.compute_windowed_aggregation() + + def parse_user_input(self, window_size): + """ + Parses and sets attributes based on user input. + + :param window_size (int): Specified window size for data aggregation, defaults to user_input if -1. + :return: None + """ + if window_size == -1: + self.window_size = self.user_input["window_size"] + else: + self.window_size = window_size + self.metric = self.user_input["metric"] + self.measure_unit = self.adjust_unit() + self.window_function = self.user_input["window_function"] + self.seed = self.user_input["seed"] + + self.plot_type = self.user_input["plot_type"] + self.plot_title = self.user_input["plot_title"] + if self.user_input["x_label"] == "": + self.x_label = "Samples" + else: + self.x_label = self.user_input["x_label"] + + if self.user_input["y_label"] == "": + self.y_label = self.metric + " [" + self.measure_unit + "]" + else: + self.y_label = self.user_input["y_label"] + + self.y_min = self.user_input["y_min"] + self.y_max = self.user_input["y_max"] + self.x_min = self.user_input["x_min"] + self.x_max = self.user_input["x_max"] + + def adjust_unit(self): + """ + Adjusts the unit of measurement according to the scaling magnitude specified by the user. + This method translates the given measurement scale into a scientifically accepted metric prefix. + + :return str: The metric prefixed by the appropriate scale (e.g., 'kWh' for kilo-watt-hour if the scale is 3). + :raise ValueError: If the unit scaling magnitude provided by the user is not within the accepted range of scaling factors. + """ + prefixes = ['n', 'μ', 'm', '', 'k', 'M', 'G', 'T'] + scaling_factors = [-9, -6, -3, 1, 3, 6, 9] + given_metric = self.user_input["current_unit"] + self.unit_scaling = self.user_input["unit_scaling_magnitude"] + + if self.unit_scaling not in scaling_factors: + raise ValueError( + "Unit scaling factor not found. Please enter a valid unit from [-9, -6, -3, 1, 3, 6, 9].") + + if self.unit_scaling == 1: + return given_metric + + for i in range(len(scaling_factors)): + if self.unit_scaling == scaling_factors[i]: + self.unit_scaling = 10 ** self.unit_scaling + result = prefixes[i] + given_metric + return result + + def set_paths(self): + """ + Configures and initializes the directory paths for output and analysis based on the base directory provided. + This method sets paths for the raw output and detailed analysis results, ensuring directories are created if + they do not already exist, and prepares a base file for capturing analytical summaries. + + :return: None + :side effect: Creates necessary directories and files for output and analysis. + """ + self.output_folder_path = os.getcwd() + "/" + self.path + self.raw_output_path = os.getcwd() + "/" + self.path + "/raw-output" + self.analysis_file_path = os.getcwd() + "/" + self.path + "/simulation-analysis/" + os.makedirs(self.analysis_file_path, exist_ok=True) + self.analysis_file_path = os.path.join(self.analysis_file_path, "analysis.txt") + if not os.path.exists(self.analysis_file_path): + with open(self.analysis_file_path, "w") as f: + f.write("Analysis file created.\n") + + def init_models(self): + """ + Initializes models from the simulation output stored in Parquet files. This method reads each Parquet file, + processes the relevant data, and initializes Model instances which are stored in the model list. + + :return: None + :raise ValueError: If the unit scaling has not been set prior to model initialization. + """ + model_id = 0 + + for simulation_folder in os.listdir(self.raw_output_path): + if simulation_folder == "metamodel": + continue + path_of_parquet_file = f"{self.raw_output_path}/{simulation_folder}/seed={self.seed}/{SIMULATION_DATA_FILE}.parquet" + parquet_file = pq.read_table(path_of_parquet_file).to_pandas() + raw = parquet_file.select_dtypes(include=[np.number]).groupby("timestamp") + raw = raw[self.metric].sum().values + + if self.unit_scaling is None: + raise ValueError("Unit scaling factor is not set. Please ensure it is set correctly.") + + raw = np.divide(raw, self.unit_scaling) + + if self.user_input["samples_per_minute"] > 0: + MINUTES_IN_DAY = 1440 + self.workload_time = len(raw) * self.user_input["samples_per_minute"] / MINUTES_IN_DAY + + model = Model(raw_sim_data=raw, id=model_id, path=self.output_folder_path) + self.models.append(model) + model_id += 1 + + self.max_model_len = min([len(model.raw_sim_data) for model in self.models]) + + def compute_windowed_aggregation(self): + """ + Applies a windowed aggregation function to each model's dataset. This method is typically used for smoothing + or reducing data granularity. It involves segmenting the dataset into windows of specified size and applying + an aggregation function to each segment. + + :return: None + :side effect: Modifies each model's processed_sim_data attribute to contain aggregated data. + """ + if self.plot_type != "cumulative": + for model in self.models: + numeric_values = model.raw_sim_data + model.processed_sim_data = self.mean_of_chunks(numeric_values, self.window_size) + + def generate_plot(self): + """ + Creates and saves plots based on the processed data from multiple models. This method determines + the type of plot to generate based on user input and invokes the appropriate plotting function. + + The plotting options supported are 'time_series', 'cumulative', and 'cumulative_time_series'. + Depending on the type specified, this method delegates to specific plot-generating functions. + + :return: None + :raises ValueError: If the plot type specified is not recognized or supported by the system. + :side effect: + - Generates and saves a plot to the file system. + - Updates the plot attributes based on the generated plot. + - Displays the plot on the matplotlib figure canvas. + """ + plt.figure(figsize=(12, 10)) + plt.xticks(size=22) + plt.yticks(size=22) + plt.ylabel(self.y_label, size=26) + plt.xlabel(self.x_label, size=26) + plt.title(self.plot_title, size=26) + plt.grid() + + formatter = FuncFormatter(lambda x, _: '{:,}'.format(int(x)) if x >= 1000 else int(x)) + ax = plt.gca() + ax.xaxis.set_major_formatter(formatter) + # ax.yaxis.set_major_formatter(formatter) yaxis has formatting issues - to solve in a future iteration + + if self.user_input['x_ticks_count'] is not None: + ax = plt.gca() + ax.xaxis.set_major_locator(MaxNLocator(self.user_input['x_ticks_count'])) + + if self.user_input['y_ticks_count'] is not None: + ax = plt.gca() + ax.yaxis.set_major_locator(MaxNLocator(self.user_input['y_ticks_count'])) + + self.set_x_axis_lim() + self.set_y_axis_lim() + + if self.plot_type == "time_series": + self.generate_time_series_plot() + elif self.plot_type == "cumulative": + self.generate_cumulative_plot() + elif self.plot_type == "cumulative_time_series": + self.generate_cumulative_time_series_plot() + else: + raise ValueError( + "Plot type not recognized. Please enter a valid plot type. The plot can be either " + "'time_series', 'cumulative', or 'cumulative_time_series'." + ) + + plt.tight_layout() + plt.subplots_adjust(right=0.85) + plt.legend(fontsize=12, bbox_to_anchor=(1, 1)) + self.save_plot() + self.output_stats() + + def generate_time_series_plot(self): + """ + Plots time series data for each model. This function iterates over each model, applies the defined + windowing function to smooth the data, and plots the resulting series. + + :return: None + :side effect: Plots are displayed on the matplotlib figure canvas. + """ + for model in self.models: + label = "Meta-Model" if is_meta_model(model) else "Model " + str(model.id) + if is_meta_model(model): + repeated_means = np.repeat(means, self.window_size)[:len(model.processed_sim_data) * self.window_size] + plt.plot( + repeated_means, + drawstyle='steps-mid', + label=label, + color="red", + linestyle="--", + marker="o", + markevery=max(1, len(repeated_means) // 50), + linewidth=2 + ) + else: + means = self.mean_of_chunks(model.raw_sim_data, self.window_size) + repeated_means = np.repeat(means, self.window_size)[:len(model.raw_sim_data)] + plt.plot(repeated_means, drawstyle='steps-mid', label=label) + + def generate_cumulative_plot(self): + """ + Generates a horizontal bar chart showing cumulative data for each model. This function + aggregates total values per model and displays them in a bar chart, providing a visual + comparison of total values across models. + + :return: None + :side effect: Plots are displayed on the matplotlib figure canvas. + """ + plt.xlim(self.get_cumulative_limits(model_sums=self.sum_models_entries())) + plt.ylabel("Model ID", size=20) + plt.xlabel("Total " + self.metric + " [" + self.measure_unit + "]") + plt.yticks(range(len(self.models)), [model.id for model in self.models]) + plt.grid(False) + + cumulated_energies = self.sum_models_entries() + for i, model in enumerate(self.models): + label = "Meta-Model" if is_meta_model(model) else "Model " + str(model.id) + if is_meta_model(model): + plt.barh(label=label, y=i, width=cumulated_energies[i], color="red") + else: + plt.barh(label=label, y=i, width=cumulated_energies[i]) + plt.text(cumulated_energies[i], i, str(cumulated_energies[i]), ha='left', va='center', size=26) + + def generate_cumulative_time_series_plot(self): + """ + Generates a plot showing the cumulative data over time for each model. This visual representation is + useful for analyzing trends and the accumulation of values over time. + + :return: None + :side effect: Displays the cumulative data over time on the matplotlib figure canvas. + """ + self.compute_cumulative_time_series() + + for model in self.models: + if is_meta_model(model): + cumulative_repeated = np.repeat(model.cumulative_time_series_values, self.window_size)[ + :len(model.processed_sim_data) * self.window_size] + plt.plot( + cumulative_repeated, + drawstyle='steps-mid', + label=("Meta-Model"), + color="red", + linestyle="--", + marker="o", + markevery=max(1, len(cumulative_repeated) // 10), + linewidth=3 + ) + else: + cumulative_repeated = np.repeat(model.cumulative_time_series_values, self.window_size)[ + :len(model.raw_sim_data)] + plt.plot(cumulative_repeated, drawstyle='steps-mid', label=("Model " + str(model.id))) + + def compute_cumulative_time_series(self): + """ + Computes the cumulative sum of processed data over time for each model, storing the result for use in plotting. + + :return: None + :side effect: Updates each model's 'cumulative_time_series_values' attribute with the cumulative sums. + """ + for model in self.models: + cumulative_array = [] + _sum = 0 + for value in model.processed_sim_data: + _sum += value + cumulative_array.append(_sum * self.window_size) + model.cumulative_time_series_values = cumulative_array + + def save_plot(self): + """ + Saves the current plot to a PDF file in the specified directory, constructing the file path from the + plot attributes and ensuring that the directory exists before saving. + + :return: None + :side effect: Creates or overwrites a PDF file containing the plot in the designated folder. + """ + folder_prefix = self.output_folder_path + "/simulation-analysis/" + self.metric + "/" + self.plot_path = folder_prefix + self.plot_type + "_plot_multimodel_metric=" + self.metric + "_window=" + str( + self.window_size) + ".pdf" + plt.savefig(self.plot_path) + + def set_x_axis_lim(self): + """ + Sets the x-axis limits for the plot based on user-defined minimum and maximum values. If values + are not specified, the axis limits will default to encompassing all data points. + + :return: None + :side effect: Adjusts the x-axis limits of the current matplotlib plot. + """ + if self.x_min is not None: + plt.xlim(left=self.x_min) + + if self.x_max is not None: + plt.xlim(right=self.x_max) + + def set_y_axis_lim(self): + """ + Dynamically sets the y-axis limits to be slightly larger than the range of the data, enhancing + the readability of the plot by ensuring all data points are comfortably within the view. + + :return: None + :side effect: Adjusts the y-axis limits of the current matplotlib plot. + """ + if self.y_min is not None: + plt.ylim(bottom=self.y_min) + if self.y_max is not None: + plt.ylim(top=self.y_max) + + def sum_models_entries(self): + """ + Computes the total values from each model for use in cumulative plotting. This method aggregates + the data across all models and prepares it for cumulative display. + + :return: List of summed values for each model, useful for plotting and analysis. + """ + models_sums = [] + for (i, model) in enumerate(self.models): + if is_meta_model(model): + models_sums.append(model.cumulated) + else: + cumulated_energy = model.raw_sim_data.sum() + cumulated_energy = round(cumulated_energy, 2) + models_sums.append(cumulated_energy) + + return models_sums + + def output_stats(self): + """ + Records and writes detailed simulation statistics to an analysis file. This includes time stamps, + performance metrics, and other relevant details. + + :return: None + :side effect: Appends detailed simulation statistics to an existing file for record-keeping and analysis. + """ + self.end_time = time.time() + with open(self.analysis_file_path, "a") as f: + f.write("\n\n========================================\n") + f.write("Simulation made at " + time.strftime("%Y-%m-%d %H:%M:%S") + "\n") + f.write("Metric: " + self.metric + "\n") + f.write("Unit: " + self.measure_unit + "\n") + f.write("Window size: " + str(self.window_size) + "\n") + f.write("Sample count in raw sim data: " + str(self.max_model_len) + "\n") + f.write("Computing time " + str(round(self.end_time - self.starting_time, 1)) + "s\n") + if (self.user_input["samples_per_minute"] > 0): + f.write("Workload time: " + str(round(self.workload_time, 2)) + " days\n") + f.write("Plot path" + self.plot_path + "\n") + f.write("========================================\n") + + def mean_of_chunks(self, np_array, window_size): + """ + Calculates the mean of data within each chunk for a given array. This method helps in smoothing the data by + averaging over specified 'window_size' segments. + + :param np_array (np.array): Array of numerical data to be chunked and averaged. + :param window_size (int): The size of each segment to average over. + :return: np.array: An array of mean values for each chunk. + :side effect: None + """ + if window_size == 1: + return np_array + + chunks = [np_array[i:i + window_size] for i in range(0, len(np_array), window_size)] + means = [np.mean(chunk) for chunk in chunks] + return np.array(means) + + def get_cumulative_limits(self, model_sums): + """ + Calculates the appropriate x-axis limits for cumulative plots based on the summarized data from each model. + + :param model_sums (list of float): The total values for each model. + :return: tuple: A tuple containing the minimum and maximum x-axis limits. + """ + axis_min = min(model_sums) * 0.9 + axis_max = max(model_sums) * 1.1 + + if self.user_input["x_min"] is not None: + axis_min = self.user_input["x_min"] + if self.user_input["x_max"] is not None: + axis_max = self.user_input["x_max"] + + return [axis_min * 0.9, axis_max * 1.1] diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/requirements.txt b/opendc-experiments/opendc-experiments-m3sa/src/main/python/requirements.txt new file mode 100644 index 000000000..cbd229858 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/requirements.txt @@ -0,0 +1,4 @@ +matplotlib==3.8.4 +numpy==2.1.1 +pandas==2.2.2 +pyarrow==16.1.0 diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/simulator_specifics.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/simulator_specifics.py new file mode 100644 index 000000000..4e1c36e1f --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/simulator_specifics.py @@ -0,0 +1,14 @@ +""" +This file is the integration layer of the M3SA tool upon any (ICT) simulator. + +The system will use the elements from this file in the analysis / meta-simulation process. +""" + +""" +SIMULATION_DATA_FILE (str): The name of the file containing the simulation data. Enter only the name, not the path, not +the extension. The data file must be parquet format. + +✅ Good: "host", "simulation_data", "cats_predictions" +❌ Wrong: "host.json", "opendc/folder_x/folder_y/data" +""" +SIMULATION_DATA_FILE = "host" # opendc outputs in file host.parquet diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/utils.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/utils.py new file mode 100644 index 000000000..fd4fec2e7 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/utils.py @@ -0,0 +1,25 @@ +import sys + +""" +Constants for the main.py file +""" + +SIMULATION_ANALYSIS_FOLDER_NAME = 'simulation-analysis' +EMISSIONS_ANALYSIS_FOLDER_NAME = 'carbon_emission' +ENERGY_ANALYSIS_FOLDER_NAME = 'power_draw' + +""" +Utility functions +""" + + +def clean_analysis_file(metric): + analysis_file_path = SIMULATION_ANALYSIS_FOLDER_NAME + "/" + if metric == "power_draw": + analysis_file_path += ENERGY_ANALYSIS_FOLDER_NAME + else: + analysis_file_path += EMISSIONS_ANALYSIS_FOLDER_NAME + analysis_file_path += "/analysis.txt" + + with open(analysis_file_path, "w") as f: + f.write("") diff --git a/settings.gradle.kts b/settings.gradle.kts index d01d12617..e079346b1 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -21,6 +21,7 @@ */ rootProject.name = "opendc" + include(":opendc-common") include(":opendc-compute:opendc-compute-api") include(":opendc-compute:opendc-compute-carbon") @@ -40,6 +41,7 @@ include(":opendc-experiments:opendc-experiments-workflow") include(":opendc-experiments:opendc-experiments-faas") include(":opendc-experiments:opendc-experiments-scenario") include(":opendc-experiments:opendc-experiments-tf20") +include(":opendc-experiments:opendc-experiments-m3sa") include(":opendc-web:opendc-web-proto") include(":opendc-web:opendc-web-server") include(":opendc-web:opendc-web-client") diff --git a/site/docs/documentation/Input/M3SA.md b/site/docs/documentation/Input/M3SA.md new file mode 100644 index 000000000..6c97d2077 --- /dev/null +++ b/site/docs/documentation/Input/M3SA.md @@ -0,0 +1,92 @@ +M3SA is setup using a json file. The Multi-Model is a top-layer applied on top of the +simulator, +capable to leverage into a singular tool the prediction of multiple models. The Meta-Model is a model generated from the +Multi-Model, and predicts using the predictions of individual models. + +The Multi-Model's properties can be set using a JSON file. The JSON file must be linked to the scenario file and is +required +to follow the structure below. + +## Schema + +The schema for the scenario file is provided in [schema](M3SASchema.md) +In the following section, we describe the different components of the schema. + +### General Structure + +| Variable | Type | Required? | Default | Possible Answers | Description | +|------------------------|---------|-----------|---------------|-------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| multimodel | boolean | no | true | true, false | Whether or not to build a Multi-Model. If set to false, a Meta-Model will not be computed either. | +| metamodel | boolean | no | true | true, false | Whether to build a Meta-Model. | +| metric | string | yes | N/A | N/A | What metric to be analyzed from the computed files. | +| current_unit | string | no | "" | any string (e.g., "CO2", "Wh") | The international system unit of the metric to be analyzed, without prefixes. e.g., "W" for Watt is ok, "kW" is not. | +| unit_scaling_magnitude | integer | no | 10 | -9, -6, -3, 1, 3, 6, 9 | The scaling factor to be applied to the metric (10^-9, 10^-6, 10^3, 10^3, 10^6, 10^9). For no scaling, input 1. | +| window_size | integer | no | 1 | any positive, non-zero, integer | The size of the window, used for aggregating the chunks. | +| window_function | string | no | "mean" | "mean", "median" | The function used by the window for aggregating the chunks (e.g., for "mean", the window will compute the mean of the samples). | +| meta_function | string | no | "mean" | "mean", "median" | The function used by the Meta-Model to be generated. For "mean", the Meta-Model takes the mean of the individual models, at the granularity established by the window-size. | +| samples_per_minute | double | no | N/A | any positive, non-zero, double | The number of samples per minute, in the prediction data (simulator export rate). e.g., "0.2" means 1 sample every 5 minutes, "20" means a 20 samples per minute, or 1 sample every 3 seconds. | +| seed | integer | no | 0 | any integer >= 0 | The seed of the simulation. This must correspond to the seed from the output folder (from seed=x). | +| plot_type | string | no | "time_series" | "time_series", "cumulative", "cumulative_time_series" | The type of the plot, generated by the Multi-Model and Meta-Model. | +| plot_title | string | no | "" | any string | The title of the plot. | +| x_ticks_count | integer | no | None | any integer, larger than 0 | The number of ticks on x-axis. | +| y_ticks_count | integer | no | None | any integer, larger than 0 | The number of ticks on y-axis. | +| x_label | string | no | "Time" | any string | The label for the x-axis of the plot. | +| y_label | string | no | "Metric Unit" | any string | The label for the y-axis of the plot. | +| y_min | double | no | None | any positive, non-zero, double | The minimum value for the vertical axis of the plot. | +| y_max | double | no | None | any positive, non-zero, double | The maximum value for the vertical axis of the plot. | +| x_min | double | no | None | any positive, non-zero, double | The minimum value for the horizontal axis of the plot. | +| x_max | double | no | None | any positive, non-zero, double | The maximum value for the horizontal axis of the plot. | + +## Examples + +In the following section, we discuss several examples of M3SA setup files. Any setup file can be verified +using the JSON schema defined in [schema](M3SASchema.md). + +### Simple + +The simplest M3SA setup that can be provided to OpenDC is shown below: + +```json +{ + "metric": "power_draw" +} +``` + +This configuration creates a Multi-Model and Meta-Model on the power_draw. All the other parameters are handled by the +default values, towards reducing the complexity of the setup. + +### Complex + +A more complex M3SA setup, where the user has more control on teh generated output, is show below: + +```json +{ + "multimodel": true, + "metamodel": false, + "metric": "carbon_emission", + "window_size": 10, + "window_function": "median", + "metamodel_function": "mean", + "samples_per_minute": 0.2, + "unit_scaling_magnitude": 1000, + "current_unit": "gCO2", + "seed": 0, + "plot_type": "cumulative_time_series", + "plot_title": "Carbon Emission Prediction", + "x_label": "Time [days]", + "y_label": "Carbon Emission [gCO2/kWh]", + "x_min": 0, + "x_max": 200, + "y_min": 500, + "y_max": 1000, + "x_ticks_count": 3, + "y_ticks_count": 3 +} +``` + +This configuration creates a Multi-Model and a Meta-Model which predicts the carbon_emission. The window size is 10, and +the aggregation function (for the window) is median. The Meta-Model function is mean. The data has been exported at a +rate of 0.2 samples per minute (i.e., a sample every 5 minutes). The plot type is cummulative_time_series, which starts +from a y-axis value of 500 and goes up to 1000. Therefore, the Multi-Model and the Meta-Model will show only +the values greater than y_min (500) and smaller than y_max (1000). Also, the x-axis will start from 0 and go up to 200, +with 3 ticks on the x-axis and 3 ticks on the y-axis. diff --git a/site/docs/documentation/Input/M3SASchema.md b/site/docs/documentation/Input/M3SASchema.md new file mode 100644 index 000000000..5a3503ca3 --- /dev/null +++ b/site/docs/documentation/Input/M3SASchema.md @@ -0,0 +1,115 @@ +Below is the schema for the MultiMetaModel JSON file. This schema can be used to validate a MultiMetaModel setup file. +A setup file can be validated using a JSON schema validator, such as https://www.jsonschemavalidator.net/. + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "multimodel": { + "type": "boolean", + "default": true, + "description": "Whether or not to build a Multi-Model. If set to false, a Meta-Model will not be computed either." + }, + "metamodel": { + "type": "boolean", + "default": true, + "description": "Whether to build a Meta-Model." + }, + "metric": { + "type": "string", + "description": "What metric to be analyzed from the computed files." + }, + "current_unit": { + "type": "string", + "default": "", + "description": "The international system unit of the metric to be analyzed, without prefixes. e.g., 'W' for Watt is ok, 'kW' is not." + }, + "unit_scaling_magnitude": { + "type": "integer", + "default": 10, + "enum": [-9, -6, -3, 1, 3, 6, 9], + "description": "The scaling factor to be applied to the metric (10^-9, 10^-6, 10^3, 10^3, 10^6, 10^9). For no scaling, input 1." + }, + "seed": { + "type": "integer", + "default": 0, + "minimum": 0, + "description": "The seed of the simulation. This must correspond to the seed from the output folder (from seed=x)." + }, + "window_size": { + "type": "integer", + "default": 1, + "minimum": 1, + "description": "The size of the window, used for aggregating the chunks." + }, + "window_function": { + "type": "string", + "default": "mean", + "enum": ["mean", "median"], + "description": "The function used by the window for aggregating the chunks (e.g., for 'mean', the window will compute the mean of the samples)." + }, + "meta_function": { + "type": "string", + "default": "mean", + "enum": ["mean", "median"], + "description": "The function used by the Meta-Model to be generated. For 'mean', the Meta-Model takes the mean of the individual models, at the granularity established by the window-size." + }, + "samples_per_minute": { + "type": "number", + "minimum": 0.0001, + "description": "The number of samples per minute, in the prediction data (simulator export rate). e.g., '0.2' means 1 sample every 5 minutes, '20' means 20 samples per minute, or 1 sample every 3 seconds." + }, + "plot_type": { + "type": "string", + "default": "time_series", + "enum": ["time_series", "cumulative", "cumulative_time_series"], + "description": "The type of the plot, generated by the Multi-Model and Meta-Model." + }, + "plot_title": { + "type": "string", + "default": "", + "description": "The title of the plot." + }, + "x_label": { + "type": "string", + "default": "Time", + "description": "The label for the x-axis of the plot." + }, + "y_label": { + "type": "string", + "default": "Metric Unit", + "description": "The label for the y-axis of the plot." + }, + "y_min": { + "type": "number", + "description": "The minimum value for the vertical axis of the plot." + }, + "y_max": { + "type": "number", + "description": "The maximum value for the vertical axis of the plot." + }, + "x_min": { + "type": "number", + "description": "The minimum value for the horizontal axis of the plot." + }, + "x_max": { + "type": "number", + "description": "The maximum value for the horizontal axis of the plot." + }, + "x_ticks_count": { + "type": "integer", + "minimum": 1, + "description": "The number of ticks on x-axis." + }, + "y_ticks_count": { + "type": "integer", + "minimum": 1, + "description": "The number of ticks on y-axis." + } + }, + "required": [ + "metric" + ] +} +``` diff --git a/site/docs/documentation/Input/ScenarioSchema.md b/site/docs/documentation/Input/ScenarioSchema.md index bd800fd71..78ec55f71 100644 --- a/site/docs/documentation/Input/ScenarioSchema.md +++ b/site/docs/documentation/Input/ScenarioSchema.md @@ -75,7 +75,7 @@ A scenario file can be validated using a JSON schema validator, such as https:// "required": [ "topologies", "workloads", - "allocationPolicies", + "allocationPolicies" ] } ``` diff --git a/site/docs/tutorials/M3SA-integration-tutorial.mdx b/site/docs/tutorials/M3SA-integration-tutorial.mdx new file mode 100644 index 000000000..c09011c7d --- /dev/null +++ b/site/docs/tutorials/M3SA-integration-tutorial.mdx @@ -0,0 +1,188 @@ +--- +sidebar_position: 2 +title: M3SA Integration +hide_title: true +sidebar_label: M3SA Integration +description: M3SA Integration +--- + +# M3SA integration tutorial + +M3SA is a tool able to perform "Multi-Meta-Model Simulation Analysis". The tool is designed to analyze the output of +simulations, by leveraging predictions, generate Multi-Model graphs, novel models, and more. M3SA can integrate with any +simulation infrastructure, as long as integration steps are followed. + +We build our tool towards performance, scalability, and **universality**. In this document, we present the steps to +integrate our tool into your simulation infrastructure. + +If you are using OpenDC, none of adaptation steps are necessary, yet they can be useful to understand the structure +of the tool. Step 3 is still necessary. + +## Step 1: Adapt the simulator output folder structure + +The first step is to adapt the I/O of your simulation to the format of our tool. The output folder structure should have +the +following format: + +``` +[1] ── {simulation-folder-name} 📁 🔧 +[2] ├── inputs 📁 🔒 +[3] │ └── {m3sa-config-file}.json 📄 🔧 +[4] │ └── {other input files / folders} 🔧 +[5] ├── outputs 📁 🔒 +[6] │ ├── raw-output 📁 🔒 +[7] │ │ ├── 0 📁 🔒 +[8] │ │ │ └── seed={your_seed}🔒 +[9] │ │ │ └── {simulation_data_file}.parquet 📄 🔧 +[10] │ │ │ └── {any other files / folders} ⚪ +[11] │ │ ├── 1 📁 ⚪ 🔒 +[12] │ │ │ └── seed={your_seed} 📁 ⚪ 🔒 +[13] │ │ │ └── {simulation_data_file}.parquet 📄 ⚪ 🔧 +[14] │ │ │ └── {any other files / folders} ⚪󠁪 +[15] │ │ ├── metamodel 📁 ⚪ +[16] │ │ └── seed={your_seed} 📁 ⚪ +[17] │ │ └── {your_metric_name}.parquet 📄 ⚪ +[18] │ │ └── {any other files / folders} ⚪ +[19] │ └── {any other files / folders} 📁 ⚪ +[20]| └──{any other files / folders} 📁 ⚪ +``` + +📄 = file
+📁 = folder
+🔒 = fixed, the name of the folder/file must be the same.
+🔧 = flexible, the name of the folder/file can differ. However, the item must be present.
+⚪ = optional and flexible. The item can be absent.
+ +- [1] = the name of the analyzed folder. +- [2] = the _inputs_ folder, containing various inputs / configuration files. +- [3] = the configuration file for M3SA, flexible naming, but needs to be a JSON file +- [4],[10],[14],[18],[19],[20] = any other input files or folders. +- [5] = the _outputs_ folder, containing the raw-output. can contain any other files or folders, besides the raw-output +folder. +After running a simulation, also a "simulation-analysis" folder will be generated in this folder. +- [6] = raw-output folder, containing the raw output of the simulation. +- [7],[11] = the IDs of the models. Must always start from zero. Possible values are 0, 1, 2 ... n, and "metamodel". The +id +of "metamodel" is reserved for the Meta-Model. Any simulation data in the respective folder will be treated as +Meta-Model data. +- [8],[12] = the seed of the simulation. the seed must be the same for both [8], [12], and other equivalent, further +files. +- [9],[13] = the file in which the simulation data is stored. The name of the file can differ, but it must be a parquet +file. +- [15] = the Meta-Model folder, optional. If the folder is present, its data will be treated as Meta-Model data. +- [16] = the Meta-Model seed folder. The seed must be the same as the seed of the simulation. +- [17] = the Meta-Model output. The name of the file is of the type ```{your_metric_name}.parquet```. For example, if +you analyze CO2 emissions, the file will be named ```co2_emissions.parquet```. + +--- + +## Step 2: Adapt the simulation file format + +The simulator data file must be a 🪵 _parquet_ 🪵 file. + +The file must contain (at least) the columns: + +- timestamp: the timestamp, in miliseconds, of the data point (e.g., 30000, 60000, 90000) - the time unit is flexible. +- {metric_name}: the value of the metric at the given timestamp. This is the metric analyzed (e.g., CO2_emissions, +energy_usage). + +e.g., if you are analyzing the CO2 emissions of a datacenter, for a timeperiod of 5 minutes, and the data is sampled +every 30 seconds, the file will look like this: + +| timestamp | co2_emissions | +|-----------|---------------| +| 30000 | 31.2 | +| 60000 | 31.4 | +| 90000 | 28.5 | +| 120000 | 31.8 | +| 150000 | 51.5 | +| 180000 | 51.2 | +| 210000 | 51.4 | +| 240000 | 21.5 | +| 270000 | 21.8 | +| 300000 | 21.2 | + +--- + +## Step 3: Running M3SA + +### 3.1 Setup the Simulator Specifics + +Update the simulation folder name ([9], [13], [17] from Step 1), in the +file ```simulator_specifics.py```, from ```opendc/src/python/simulator_specifics.py```. + +### 3.2 Setup the python program arguments + +### Arguments for Main.py Setup +Main.py takes two arguments: + +1. Argument 1 is the path to the output directory where M3SA output files will be stored. +2. Argument 2 is the path to the input file that contains the configuration of M3SA. + +e.g., + +```json +"simulation-123/outputs/" "simulation-123/inputs/m3sa-configurator.json" +``` + +### 3.3 Working directory Main.py Setup + +Make sure to set the working directory to the directory where the main.py file is located. + +e.g., + +``` +/your/path/to-analyzer/src/main/python +``` + +If you are using OpenDC, you can set the working directory to the following path: + +``` +/your/path/opendc/opendc-analyze/src/main/python +``` + +--- + +## Optional: Step 4: Simulate and analyze, with one click + +The simulation and analysis can be executed as a single command; if no errors are encountered, from the user +perspective, +this operation is atomic. We integrated M3SA into OpenDC to facilitate this process. + +To further integrate M3SA into any simulation infrastructure, M3SA needs to called from +the simulation infrastructure, and provided the following running setup: + +1. script language: Python +2. argument 1: the path of the output directory, in which M3SA output files will be stored +3. argument 2: the path of the input file, containing the configuration of M3SA +4. other language-specific setup + +For example, the integration of the M3SA into OpenDC can be found +in ```Analyzr.kt``` from ```opendc-analyze/src/main/kotlin/Analyzr.kt```. +Below, we provide a snippet of the code: + +```kotlin +val ANALYSIS_SCRIPTS_DIRECTORY: String = "./opendc-analyze/src/main/python" +val ABSOLUTE_SCRIPT_PATH: String = + Path("$ANALYSIS_SCRIPTS_DIRECTORY/main.py").toAbsolutePath().normalize().toString() +val SCRIPT_LANGUAGE: String = "python3" + +fun analyzeResults(outputFolderPath: String, analyzerSetupPath: String) { + val process = ProcessBuilder( + SCRIPT_LANGUAGE, + ABSOLUTE_SCRIPT_PATH, + outputFolderPath, // argument 1 + analyzerSetupPath // argument 2 + ) + .directory(Path(ANALYSIS_SCRIPTS_DIRECTORY).toFile()) + .start() + + val exitCode = process.waitFor() + if (exitCode == 0) { + println("[Analyzr.kt says] Analysis completed successfully.") + } else { + val errors = process.errorStream.bufferedReader().readText() + println("[Analyzr.kt says] Exit code ${exitCode}; Error(s): $errors") + } +} +``` diff --git a/site/docs/tutorials/cloud-capacity-planning.mdx b/site/docs/tutorials/cloud-capacity-planning.mdx index a55c6a20c..df9cb5661 100644 --- a/site/docs/tutorials/cloud-capacity-planning.mdx +++ b/site/docs/tutorials/cloud-capacity-planning.mdx @@ -3,6 +3,7 @@ sidebar_position: 1 title: Cloud Capacity Planning hide_title: true sidebar_label: Cloud Capacity Planning +description: Cloud Capacity Planning --- # Cloud Capacity Planning Tutorial