From 5047e4a25a0814f96852882f02c4017e1d5f81e7 Mon Sep 17 00:00:00 2001 From: Dante Niewenhuis Date: Thu, 12 Sep 2024 15:32:47 +0200 Subject: [PATCH] Added max number of failures (#254) * Added a max failure for tasks. If tasks fail more times, they get cancelled * Added maxNumFailures to the frontend * Updated tests --- .../kotlin/org/opendc/compute/api/Task.kt | 5 ++++ .../failure/models/TraceBasedFailureModel.kt | 8 +++--- .../compute/service/ComputeService.java | 26 ++++++++++++++++--- .../opendc/compute/service/ServiceTask.java | 24 ++++++++++++----- .../compute/service/ComputeServiceTest.kt | 2 +- .../ComputeServiceProvisioningStep.kt | 2 ++ .../simulator/provisioner/ComputeSteps.kt | 3 ++- .../opendc/compute/simulator/SimHostTest.kt | 1 + .../compute/telemetry/ComputeMetricReader.kt | 25 +++++++++++------- .../export/parquet/DfltTaskExportColumns.kt | 16 +++++++++--- .../telemetry/table/TaskTableReader.kt | 8 +++++- .../base/runner/ScenarioReplayer.kt | 7 ----- .../experiments/base/runner/ScenarioRunner.kt | 1 + .../base/scenario/ExperimentFactories.kt | 13 +++++----- .../experiments/base/scenario/Scenario.kt | 13 +++++----- .../base/scenario/specs/ExperimentSpec.kt | 8 ++++-- .../base/scenario/specs/ScenarioSpec.kt | 1 + 17 files changed, 111 insertions(+), 52 deletions(-) diff --git a/opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Task.kt b/opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Task.kt index c9b0aeb35..23f2cb919 100644 --- a/opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Task.kt +++ b/opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Task.kt @@ -43,6 +43,11 @@ public interface Task : Resource { */ public val state: TaskState + /** + * The number of times a Task has been stopped due to failures + */ + public val numFailures: Int + /** * The most recent moment in time when the task was launched. */ diff --git a/opendc-compute/opendc-compute-failure/src/main/kotlin/org/opendc/compute/failure/models/TraceBasedFailureModel.kt b/opendc-compute/opendc-compute-failure/src/main/kotlin/org/opendc/compute/failure/models/TraceBasedFailureModel.kt index 28acd49a3..f1ff09e9f 100644 --- a/opendc-compute/opendc-compute-failure/src/main/kotlin/org/opendc/compute/failure/models/TraceBasedFailureModel.kt +++ b/opendc-compute/opendc-compute-failure/src/main/kotlin/org/opendc/compute/failure/models/TraceBasedFailureModel.kt @@ -23,7 +23,6 @@ package org.opendc.compute.failure.models import kotlinx.coroutines.delay -import kotlinx.coroutines.launch import org.opendc.compute.service.ComputeService import org.opendc.trace.Trace import org.opendc.trace.conv.FAILURE_DURATION @@ -79,12 +78,11 @@ public class TraceBasedFailureModel( override suspend fun runInjector() { do { for (failure in failureList) { - delay(failure.failureInterval - clock.millis()) + delay(failure.failureInterval) val victims = victimSelector.select(hosts, failure.failureIntensity) - scope.launch { - fault.apply(victims, failure.failureDuration) - } + + fault.apply(victims, failure.failureDuration) } } while (repeat) } diff --git a/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ComputeService.java b/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ComputeService.java index a64f6a4e3..ad01ee579 100644 --- a/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ComputeService.java +++ b/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ComputeService.java @@ -82,6 +82,8 @@ public final class ComputeService implements AutoCloseable { */ private final SplittableRandom random = new SplittableRandom(0); + private final int maxNumFailures; + /** * A flag to indicate that the service is closed. */ @@ -162,6 +164,7 @@ public void onStateChanged(@NotNull Host host, @NotNull Task task, @NotNull Task serviceTask.setState(newState); + // TODO: move the removal of tasks when max Failures are reached to here if (newState == TaskState.TERMINATED || newState == TaskState.DELETED || newState == TaskState.ERROR) { LOGGER.info("task {} {} {} finished", task.getUid(), task.getName(), task.getFlavor()); @@ -196,10 +199,11 @@ public void onStateChanged(@NotNull Host host, @NotNull Task task, @NotNull Task /** * Construct a {@link ComputeService} instance. */ - ComputeService(Dispatcher dispatcher, ComputeScheduler scheduler, Duration quantum) { + ComputeService(Dispatcher dispatcher, ComputeScheduler scheduler, Duration quantum, int maxNumFailures) { this.clock = dispatcher.getTimeSource(); this.scheduler = scheduler; this.pacer = new Pacer(dispatcher, quantum.toMillis(), (time) -> doSchedule()); + this.maxNumFailures = maxNumFailures; } /** @@ -365,8 +369,16 @@ private void doSchedule() { } final ServiceTask task = request.task; - // Check if all dependencies are met - // otherwise continue + + // Remove task from scheduling if it has failed too many times + if (task.getNumFailures() > maxNumFailures) { + LOGGER.warn("Failed to spawn {}: Task has failed more than the allowed {} times", task, maxNumFailures); + + taskQueue.poll(); + tasksPending--; + task.setState(TaskState.TERMINATED); + continue; + } final ServiceFlavor flavor = task.getFlavor(); final HostView hv = scheduler.select(request.task); @@ -425,6 +437,7 @@ public static class Builder { private final Dispatcher dispatcher; private final ComputeScheduler computeScheduler; private Duration quantum = Duration.ofMinutes(5); + private int maxNumFailures = 10; Builder(Dispatcher dispatcher, ComputeScheduler computeScheduler) { this.dispatcher = dispatcher; @@ -439,11 +452,16 @@ public Builder withQuantum(Duration quantum) { return this; } + public Builder withMaxNumFailures(int maxNumFailures) { + this.maxNumFailures = maxNumFailures; + return this; + } + /** * Build a {@link ComputeService}. */ public ComputeService build() { - return new ComputeService(dispatcher, computeScheduler, quantum); + return new ComputeService(dispatcher, computeScheduler, quantum, maxNumFailures); } } diff --git a/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ServiceTask.java b/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ServiceTask.java index e981921a4..f0e2a82eb 100644 --- a/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ServiceTask.java +++ b/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ServiceTask.java @@ -60,6 +60,8 @@ public final class ServiceTask implements Task { Host host = null; private ComputeService.SchedulingRequest request = null; + private int numFailures = 0; + ServiceTask( ComputeService service, UUID uid, @@ -232,14 +234,19 @@ public String toString() { return "Task[uid=" + uid + ",name=" + name + ",state=" + state + "]"; } - void setState(TaskState state) { - if (this.state != state) { - for (TaskWatcher watcher : watchers) { - watcher.onStateChanged(this, state); - } + void setState(TaskState newState) { + if (this.state == newState) { + return; + } + + for (TaskWatcher watcher : watchers) { + watcher.onStateChanged(this, newState); + } + if (newState == TaskState.ERROR) { + this.numFailures++; } - this.state = state; + this.state = newState; } /** @@ -252,4 +259,9 @@ private void cancelProvisioningRequest() { request.isCancelled = true; } } + + @Override + public int getNumFailures() { + return this.numFailures; + } } diff --git a/opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/ComputeServiceTest.kt b/opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/ComputeServiceTest.kt index e48244f07..fa5b775e0 100644 --- a/opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/ComputeServiceTest.kt +++ b/opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/ComputeServiceTest.kt @@ -68,7 +68,7 @@ internal class ComputeServiceTest { filters = listOf(ComputeFilter(), VCpuFilter(allocationRatio = 1.0), RamFilter(allocationRatio = 1.0)), weighers = listOf(RamWeigher()), ) - service = ComputeService(scope.dispatcher, computeScheduler, Duration.ofMinutes(5)) + service = ComputeService(scope.dispatcher, computeScheduler, Duration.ofMinutes(5), 10) } @Test diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/ComputeServiceProvisioningStep.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/ComputeServiceProvisioningStep.kt index 484ae7ca0..645c9d462 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/ComputeServiceProvisioningStep.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/ComputeServiceProvisioningStep.kt @@ -37,11 +37,13 @@ public class ComputeServiceProvisioningStep internal constructor( private val serviceDomain: String, private val scheduler: (ProvisioningContext) -> ComputeScheduler, private val schedulingQuantum: Duration, + private val maxNumFailures: Int, ) : ProvisioningStep { override fun apply(ctx: ProvisioningContext): AutoCloseable { val service = ComputeService.builder(ctx.dispatcher, scheduler(ctx)) .withQuantum(schedulingQuantum) + .withMaxNumFailures(maxNumFailures) .build() ctx.registry.register(serviceDomain, ComputeService::class.java, service) diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/ComputeSteps.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/ComputeSteps.kt index 7a6b69278..afde82190 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/ComputeSteps.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/ComputeSteps.kt @@ -42,8 +42,9 @@ public fun setupComputeService( serviceDomain: String, scheduler: (ProvisioningContext) -> ComputeScheduler, schedulingQuantum: Duration = Duration.ofMinutes(5), + maxNumFailures: Int = 10, ): ProvisioningStep { - return ComputeServiceProvisioningStep(serviceDomain, scheduler, schedulingQuantum) + return ComputeServiceProvisioningStep(serviceDomain, scheduler, schedulingQuantum, maxNumFailures) } /** diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index 9c54f3129..77cd22911 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -365,6 +365,7 @@ internal class SimHostTest { override val name: String, override val flavor: Flavor, override val image: Image, + override val numFailures: Int = 10, ) : Task { override val labels: Map = emptyMap() diff --git a/opendc-compute/opendc-compute-telemetry/src/main/kotlin/org/opendc/compute/telemetry/ComputeMetricReader.kt b/opendc-compute/opendc-compute-telemetry/src/main/kotlin/org/opendc/compute/telemetry/ComputeMetricReader.kt index 5bd237fdd..a98a1a6dd 100644 --- a/opendc-compute/opendc-compute-telemetry/src/main/kotlin/org/opendc/compute/telemetry/ComputeMetricReader.kt +++ b/opendc-compute/opendc-compute-telemetry/src/main/kotlin/org/opendc/compute/telemetry/ComputeMetricReader.kt @@ -30,6 +30,7 @@ import mu.KotlinLogging import org.opendc.common.Dispatcher import org.opendc.common.asCoroutineDispatcher import org.opendc.compute.api.Task +import org.opendc.compute.api.TaskState import org.opendc.compute.carbon.CarbonTrace import org.opendc.compute.service.ComputeService import org.opendc.compute.service.driver.Host @@ -422,11 +423,11 @@ public class ComputeMetricReader( */ private class TaskTableReaderImpl( private val service: ComputeService, - task: Task, + private val task: Task, private val startTime: Duration = Duration.ofMillis(0), ) : TaskTableReader { override fun copy(): TaskTableReader { - val newTaskTable = TaskTableReaderImpl(service, _task) + val newTaskTable = TaskTableReaderImpl(service, task) newTaskTable.setValues(this) return newTaskTable @@ -448,14 +449,14 @@ public class ComputeMetricReader( _provisionTime = table.provisionTime _bootTime = table.bootTime _bootTimeAbsolute = table.bootTimeAbsolute - } - private val _task = task + _taskState = table.taskState + } /** * The static information about this task. */ - override val task = + override val taskInfo = TaskInfo( task.uid.toString(), task.name, @@ -527,18 +528,22 @@ public class ComputeMetricReader( get() = _bootTimeAbsolute private var _bootTimeAbsolute: Instant? = null + override val taskState: TaskState? + get() = _taskState + private var _taskState: TaskState? = null + /** * Record the next cycle. */ fun record(now: Instant) { - val newHost = service.lookupHost(_task) + val newHost = service.lookupHost(task) if (newHost != null && newHost.uid != _host?.uid) { _host = newHost host = HostInfo(newHost.uid.toString(), newHost.name, "x86", newHost.model.cpuCount, newHost.model.memoryCapacity) } - val cpuStats = _host?.getCpuStats(_task) - val sysStats = _host?.getSystemStats(_task) + val cpuStats = _host?.getCpuStats(task) + val sysStats = _host?.getSystemStats(task) _timestamp = now _timestampAbsolute = now + startTime @@ -550,9 +555,11 @@ public class ComputeMetricReader( _cpuLostTime = cpuStats?.lostTime ?: 0 _uptime = sysStats?.uptime?.toMillis() ?: 0 _downtime = sysStats?.downtime?.toMillis() ?: 0 - _provisionTime = _task.launchedAt + _provisionTime = task.launchedAt _bootTime = sysStats?.bootTime + _taskState = task.state + if (sysStats != null) { _bootTimeAbsolute = sysStats.bootTime + startTime } else { diff --git a/opendc-compute/opendc-compute-telemetry/src/main/kotlin/org/opendc/compute/telemetry/export/parquet/DfltTaskExportColumns.kt b/opendc-compute/opendc-compute-telemetry/src/main/kotlin/org/opendc/compute/telemetry/export/parquet/DfltTaskExportColumns.kt index 5bb7dd1f7..9e86e1a31 100644 --- a/opendc-compute/opendc-compute-telemetry/src/main/kotlin/org/opendc/compute/telemetry/export/parquet/DfltTaskExportColumns.kt +++ b/opendc-compute/opendc-compute-telemetry/src/main/kotlin/org/opendc/compute/telemetry/export/parquet/DfltTaskExportColumns.kt @@ -64,7 +64,7 @@ public object DfltTaskExportColumns { Types.required(BINARY) .`as`(LogicalTypeAnnotation.stringType()) .named("task_id"), - ) { Binary.fromString(it.task.id) } + ) { Binary.fromString(it.taskInfo.id) } public val HOST_ID: ExportColumn = ExportColumn( @@ -80,17 +80,17 @@ public object DfltTaskExportColumns { Types.required(BINARY) .`as`(LogicalTypeAnnotation.stringType()) .named("task_name"), - ) { Binary.fromString(it.task.name) } + ) { Binary.fromString(it.taskInfo.name) } public val CPU_COUNT: ExportColumn = ExportColumn( field = Types.required(INT32).named("cpu_count"), - ) { it.task.cpuCount } + ) { it.taskInfo.cpuCount } public val MEM_CAPACITY: ExportColumn = ExportColumn( field = Types.required(INT64).named("mem_capacity"), - ) { it.task.memCapacity } + ) { it.taskInfo.memCapacity } public val CPU_LIMIT: ExportColumn = ExportColumn( @@ -142,6 +142,14 @@ public object DfltTaskExportColumns { field = Types.optional(INT64).named("boot_time_absolute"), ) { it.bootTimeAbsolute?.toEpochMilli() } + public val TASK_STATE: ExportColumn = + ExportColumn( + field = + Types.optional(BINARY) + .`as`(LogicalTypeAnnotation.stringType()) + .named("task_state"), + ) { Binary.fromString(it.taskState?.name) } + /** * The columns that are always included in the output file. */ diff --git a/opendc-compute/opendc-compute-telemetry/src/main/kotlin/org/opendc/compute/telemetry/table/TaskTableReader.kt b/opendc-compute/opendc-compute-telemetry/src/main/kotlin/org/opendc/compute/telemetry/table/TaskTableReader.kt index 1e38d5eba..ae7f7a490 100644 --- a/opendc-compute/opendc-compute-telemetry/src/main/kotlin/org/opendc/compute/telemetry/table/TaskTableReader.kt +++ b/opendc-compute/opendc-compute-telemetry/src/main/kotlin/org/opendc/compute/telemetry/table/TaskTableReader.kt @@ -22,6 +22,7 @@ package org.opendc.compute.telemetry.table +import org.opendc.compute.api.TaskState import org.opendc.compute.telemetry.export.parquet.DfltTaskExportColumns import org.opendc.trace.util.parquet.exporter.Exportable import java.time.Instant @@ -47,7 +48,7 @@ public interface TaskTableReader : Exportable { /** * The [TaskInfo] of the task to which the row belongs to. */ - public val task: TaskInfo + public val taskInfo: TaskInfo /** * The [HostInfo] of the host on which the task is hosted or `null` if it has no host. @@ -103,6 +104,11 @@ public interface TaskTableReader : Exportable { * The duration (in seconds) of CPU time that was lost due to interference. */ public val cpuLostTime: Long + + /** + * The state of the task + */ + public val taskState: TaskState? } // Loads the default export fields for deserialization whenever this file is loaded. diff --git a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/runner/ScenarioReplayer.kt b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/runner/ScenarioReplayer.kt index f5829f6d0..49fa409e5 100644 --- a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/runner/ScenarioReplayer.kt +++ b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/runner/ScenarioReplayer.kt @@ -125,13 +125,6 @@ public suspend fun ComputeService.replay( val checkpointDuration = checkpointModelSpec?.checkpointDuration ?: 0L val checkpointIntervalScaling = checkpointModelSpec?.checkpointIntervalScaling ?: 1.0 -// val workload = SimRuntimeWorkload( -// entry.duration, -// 1.0, -// checkpointTime, -// checkpointWait -// ) - val workload = entry.trace.createWorkload(start, checkpointInterval, checkpointDuration, checkpointIntervalScaling) val meta = mutableMapOf("workload" to workload) diff --git a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/runner/ScenarioRunner.kt b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/runner/ScenarioRunner.kt index 074fe49cc..2bd9dfa30 100644 --- a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/runner/ScenarioRunner.kt +++ b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/runner/ScenarioRunner.kt @@ -87,6 +87,7 @@ public fun runScenario( setupComputeService( serviceDomain, { createComputeScheduler(scenario.allocationPolicySpec.policyType, Random(it.seeder.nextLong())) }, + maxNumFailures = scenario.maxNumFailures, ), setupHosts(serviceDomain, topology, optimize = true), ) diff --git a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/ExperimentFactories.kt b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/ExperimentFactories.kt index b364ffe66..ca0578a2f 100644 --- a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/ExperimentFactories.kt +++ b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/ExperimentFactories.kt @@ -71,18 +71,19 @@ public fun getExperiment(experimentSpec: ExperimentSpec): List { val scenario = Scenario( id = scenarioID, + name = scenarioID.toString(), + outputFolder = outputFolder, + runs = experimentSpec.runs, + initialSeed = experimentSpec.initialSeed, + computeExportConfig = scenarioSpec.computeExportConfig, topologySpec = scenarioSpec.topology, workloadSpec = scenarioSpec.workload, allocationPolicySpec = scenarioSpec.allocationPolicy, + exportModelSpec = scenarioSpec.exportModel, failureModelSpec = scenarioSpec.failureModel, checkpointModelSpec = scenarioSpec.checkpointModel, carbonTracePath = scenarioSpec.carbonTracePath, - exportModelSpec = scenarioSpec.exportModel, - outputFolder = outputFolder, - name = scenarioID.toString(), - runs = experimentSpec.runs, - initialSeed = experimentSpec.initialSeed, - computeExportConfig = scenarioSpec.computeExportConfig, + maxNumFailures = scenarioSpec.maxNumFailures, ) trackScenario(scenarioSpec, outputFolder) scenarios.add(scenario) diff --git a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/Scenario.kt b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/Scenario.kt index c31f03005..91cd09ba7 100644 --- a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/Scenario.kt +++ b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/Scenario.kt @@ -46,16 +46,17 @@ import org.opendc.experiments.base.scenario.specs.WorkloadSpec */ public data class Scenario( var id: Int = -1, + val name: String = "", + val outputFolder: String = "output", + val runs: Int = 1, + val initialSeed: Int = 0, + val computeExportConfig: ComputeExportConfig, val topologySpec: ScenarioTopologySpec, val workloadSpec: WorkloadSpec, val allocationPolicySpec: AllocationPolicySpec, + val exportModelSpec: ExportModelSpec = ExportModelSpec(), val failureModelSpec: FailureModelSpec?, val checkpointModelSpec: CheckpointModelSpec?, val carbonTracePath: String? = null, - val exportModelSpec: ExportModelSpec = ExportModelSpec(), - val outputFolder: String = "output", - val computeExportConfig: ComputeExportConfig, - val name: String = "", - val runs: Int = 1, - val initialSeed: Int = 0, + val maxNumFailures: Int = 10, ) diff --git a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/specs/ExperimentSpec.kt b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/specs/ExperimentSpec.kt index db26849e4..67d45e89c 100644 --- a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/specs/ExperimentSpec.kt +++ b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/specs/ExperimentSpec.kt @@ -57,6 +57,7 @@ public data class ExperimentSpec( val checkpointModels: Set = setOf(null), val carbonTracePaths: Set = setOf(null), val computeExportConfig: ComputeExportConfig = ComputeExportConfig.ALL_COLUMNS, + val maxNumFailures: Set = setOf(10), ) { init { require(runs > 0) { "The number of runs should always be positive" } @@ -73,7 +74,8 @@ public data class ExperimentSpec( public fun getCartesian(): Sequence { return sequence { - val checkpointDiv = carbonTracePaths.size + val carbonTracePathDiv = maxNumFailures.size + val checkpointDiv = carbonTracePathDiv * carbonTracePaths.size val failureDiv = checkpointDiv * checkpointModels.size val exportDiv = failureDiv * failureModels.size val allocationDiv = exportDiv * exportModels.size @@ -88,6 +90,7 @@ public data class ExperimentSpec( val failureModelList = failureModels.toList() val checkpointModelList = checkpointModels.toList() val carbonTracePathList = carbonTracePaths.toList() + val maxNumFailuresList = maxNumFailures.toList() for (i in 0 until numScenarios) { yield( @@ -102,7 +105,8 @@ public data class ExperimentSpec( exportModelList[(i / exportDiv) % exportModelList.size], failureModelList[(i / failureDiv) % failureModelList.size], checkpointModelList[(i / checkpointDiv) % checkpointModelList.size], - carbonTracePathList[i % carbonTracePathList.size], + carbonTracePathList[(i / carbonTracePathDiv) % carbonTracePathList.size], + maxNumFailuresList[i % maxNumFailuresList.size], ), ) } diff --git a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/specs/ScenarioSpec.kt b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/specs/ScenarioSpec.kt index 41e9a885d..d7fdb8f48 100644 --- a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/specs/ScenarioSpec.kt +++ b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/scenario/specs/ScenarioSpec.kt @@ -38,4 +38,5 @@ public data class ScenarioSpec( val failureModel: FailureModelSpec? = null, val checkpointModel: CheckpointModelSpec? = null, val carbonTracePath: String? = null, + val maxNumFailures: Int = 10, )