Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,5 @@ flake.lock
/known-docker-images.txt
/test/sqllogictest/sqlite
my-local-mz/
/test/orchestratord/cluster.yaml
uv.lock
40 changes: 16 additions & 24 deletions ci/nightly/pipeline.template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2367,6 +2367,7 @@ steps:
steps:
- id: orchestratord-defaults
label: "Orchestratord test (defaults from documentation)"
artifact_paths: ["mz_debug_*.zip"]
depends_on: devel-docker-tags
timeout_in_minutes: 120
plugins:
Expand All @@ -2379,6 +2380,7 @@ steps:

- id: orchestratord-default-properties
label: "Orchestratord test (defaults for properties)"
artifact_paths: ["mz_debug_*.zip"]
depends_on: devel-docker-tags
timeout_in_minutes: 120
plugins:
Expand All @@ -2391,6 +2393,7 @@ steps:

- id: orchestratord-individual
label: "Orchestratord test (individual properties)"
artifact_paths: ["mz_debug_*.zip"]
depends_on: devel-docker-tags
timeout_in_minutes: 120
plugins:
Expand All @@ -2403,76 +2406,65 @@ steps:

- id: orchestratord-combine
label: "Orchestratord test (combine properties)"
artifact_paths: ["mz_debug_*.zip"]
depends_on: build-aarch64
timeout_in_minutes: 120
plugins:
- ./ci/plugins/mzcompose:
composition: orchestratord
args: [--action=noop, --properties=combine, --runtime=3600, --recreate-cluster]
args: [--action=noop, --properties=combine, --runtime=1800, --recreate-cluster]
ci-builder: stable
agents:
queue: hetzner-aarch64-16cpu-32gb

- id: orchestratord-upgrade-individual
label: "Orchestratord test (upgrade, individual props)"
artifact_paths: ["mz_debug_*.zip"]
depends_on: devel-docker-tags
timeout_in_minutes: 120
plugins:
- ./ci/plugins/mzcompose:
composition: orchestratord
args: [--action=upgrade, --properties=individual, --runtime=3600, --recreate-cluster]
args: [--action=upgrade, --properties=individual, --runtime=1800, --recreate-cluster]
ci-builder: stable
env:
# Old versions are not on GHCR yet
MZ_GHCR: 0
agents:
queue: hetzner-aarch64-8cpu-16gb
skip: "https://github.com/MaterializeInc/materialize/pull/34214"
queue: hetzner-aarch64-16cpu-32gb

- id: orchestratord-upgrade-combine
label: "Orchestratord test (upgrade, combine props)"
artifact_paths: ["mz_debug_*.zip"]
depends_on: devel-docker-tags
timeout_in_minutes: 120
plugins:
- ./ci/plugins/mzcompose:
composition: orchestratord
args: [--action=upgrade, --properties=combine, --runtime=3600, --recreate-cluster]
args: [--action=upgrade, --properties=combine, --runtime=1800, --recreate-cluster]
ci-builder: stable
env:
# Old versions are not on GHCR yet
MZ_GHCR: 0
agents:
queue: hetzner-aarch64-8cpu-16gb
skip: "https://github.com/MaterializeInc/materialize/pull/34214"
queue: hetzner-aarch64-16cpu-32gb

- id: orchestratord-upgrade-chain-individual
label: "Orchestratord test (upgrade chain, individual props)"
artifact_paths: ["mz_debug_*.zip"]
depends_on: devel-docker-tags
timeout_in_minutes: 120
plugins:
- ./ci/plugins/mzcompose:
composition: orchestratord
args: [--action=upgrade-chain, --properties=individual, --runtime=3600, --recreate-cluster]
args: [--action=upgrade-chain, --properties=individual, --runtime=1800, --recreate-cluster]
ci-builder: stable
env:
# Old versions are not on GHCR yet
MZ_GHCR: 0
agents:
queue: hetzner-aarch64-8cpu-16gb
skip: "https://github.com/MaterializeInc/materialize/pull/34214"
queue: hetzner-aarch64-16cpu-32gb

- id: orchestratord-upgrade-chain-combine
label: "Orchestratord test (upgrade chain, combine props)"
artifact_paths: ["mz_debug_*.zip"]
depends_on: devel-docker-tags
timeout_in_minutes: 120
plugins:
- ./ci/plugins/mzcompose:
composition: orchestratord
args: [--action=upgrade-chain, --properties=combine, --runtime=3600, --recreate-cluster]
args: [--action=upgrade-chain, --properties=combine, --runtime=1800, --recreate-cluster]
ci-builder: stable
env:
# Old versions are not on GHCR yet
MZ_GHCR: 0
agents:
queue: hetzner-aarch64-16cpu-32gb
skip: "https://github.com/MaterializeInc/materialize/pull/34214"
31 changes: 31 additions & 0 deletions src/cloud-resources/src/crd/materialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,23 @@ pub mod v1alpha1 {
#[default]
WaitUntilReady,

/// Create a new generation of pods, leaving the old generation as the serving generation
/// until the user manually promotes the new generation.
///
/// Users can promote the new generation at any time, even if the new generation pods are
/// not fully caught up, by setting `forcePromote` to the same value as `requestRollout` in
/// the Materialize spec.
///
/// {{<warning>}}
/// Do not leave new generations unpromoted indefinitely.
///
/// The new generation keeps open read holds which prevent compaction. Once promoted or
/// cancelled, those read holds are released. If left unpromoted for an extended time, this
/// data can build up, and can cause extreme deletion load on the metadata backend database
/// when finally promoted or cancelled.
/// {{</warning>}}
ManuallyPromote,

/// {{<warning>}}
/// THIS WILL CAUSE YOUR MATERIALIZE INSTANCE TO BE UNAVAILABLE FOR SOME TIME!!!
///
Expand Down Expand Up @@ -429,6 +446,20 @@ pub mod v1alpha1 {
false
}

pub fn is_ready_to_promote(&self, resources_hash: &str) -> bool {
let Some(status) = self.status.as_ref() else {
return false;
};
if status.conditions.is_empty() {
return false;
}
status
.conditions
.iter()
.any(|condition| condition.reason == "ReadyToPromote")
&& &status.resources_hash == resources_hash
}

pub fn is_promoting(&self) -> bool {
let Some(status) = self.status.as_ref() else {
return false;
Expand Down
91 changes: 63 additions & 28 deletions src/orchestratord/src/controller/materialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -604,34 +604,38 @@ impl k8s_controller::Context for Context {
// replace_status, but this is fine because we already
// extracted all of the information we want from the spec
// earlier.
let mz = self
.update_status(
&mz_api,
mz,
MaterializeStatus {
active_generation,
// don't update the reconciliation id yet,
// because the rollout hasn't yet completed. if
// we fail later on, we want to ensure that the
// rollout gets retried.
last_completed_rollout_request: status.last_completed_rollout_request,
resource_id: status.resource_id,
resources_hash: String::new(),
conditions: vec![Condition {
type_: "UpToDate".into(),
status: "Unknown".into(),
last_transition_time: Time(chrono::offset::Utc::now()),
message: format!(
"Applying changes for generation {desired_generation}"
),
observed_generation: mz.meta().generation,
reason: "Applying".into(),
}],
},
active_generation != desired_generation,
)
.await?;
let mz = &mz;
let mz = if mz.is_ready_to_promote(&resources_hash) {
mz
} else {
&self
.update_status(
&mz_api,
mz,
MaterializeStatus {
active_generation,
// don't update the reconciliation id yet,
// because the rollout hasn't yet completed. if
// we fail later on, we want to ensure that the
// rollout gets retried.
last_completed_rollout_request: status
.last_completed_rollout_request,
resource_id: status.resource_id,
resources_hash: String::new(),
conditions: vec![Condition {
type_: "UpToDate".into(),
status: "Unknown".into(),
last_transition_time: Time(chrono::offset::Utc::now()),
message: format!(
"Applying changes for generation {desired_generation}"
),
observed_generation: mz.meta().generation,
reason: "Applying".into(),
}],
},
active_generation != desired_generation,
)
.await?
};
let status = mz.status();

if mz.spec.rollout_strategy
Expand All @@ -655,6 +659,37 @@ impl k8s_controller::Context for Context {
Ok(Some(action))
}
Ok(None) => {
if mz.spec.rollout_strategy == MaterializeRolloutStrategy::ManuallyPromote
&& !mz.should_force_promote()
{
trace!(
"Ready to promote, but not promoting because the instance is configured with ManuallyPromote rollout strategy."
);
self.update_status(
&mz_api,
mz,
MaterializeStatus {
active_generation,
last_completed_rollout_request: status
.last_completed_rollout_request,
resource_id: status.resource_id,
resources_hash,
conditions: vec![Condition {
type_: "UpToDate".into(),
status: "Unknown".into(),
last_transition_time: Time(chrono::offset::Utc::now()),
message: format!(
"Ready to promote generation {desired_generation}"
),
observed_generation: mz.meta().generation,
reason: "ReadyToPromote".into(),
}],
},
active_generation != desired_generation,
)
.await?;
return Ok(None);
}
// do this last, so that we keep traffic pointing at
// the previous environmentd until the new one is
// fully ready
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@

kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
# Allow access to the registry from both inside and outside kubernetes
containerdConfigPatches:
- |-
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
endpoint = ["http://proxy-docker-hub:5000"]
- |-
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."ghcr.io"]
endpoint = ["http://proxy-ghcr:5000"]
# Constrain the node port range to something relatively small, then forward all
# those ports from the host. This makes services running in Kubernetes
# accessible at localhost:$NODEPORT without requiring manual port forwarding.
Expand All @@ -23,6 +31,9 @@ kubeadmConfigPatches:
nodes:
- role: control-plane
image: kindest/node:v1.32.5
extraMounts:
- containerPath: /var/lib/kubelet/config.json
hostPath: "$DOCKER_CONFIG/config.json"
extraPortMappings:
- containerPort: 32000
hostPort: 32000
Expand Down Expand Up @@ -160,10 +171,17 @@ nodes:
materialize.cloud/availability-zone: "1"
topology.kubernetes.io/zone: "1"
workload: "materialize-instance"
extraMounts:
- containerPath: /var/lib/kubelet/config.json
hostPath: "$DOCKER_CONFIG/config.json"
- role: worker
image: kindest/node:v1.32.5
labels:
materialize.cloud/scratch-fs: "true"
materialize.cloud/disk: "true"
materialize.cloud/availability-zone: "2"
topology.kubernetes.io/zone: "2"
workload: "materialize-instance"
extraMounts:
- containerPath: /var/lib/kubelet/config.json
hostPath: "$DOCKER_CONFIG/config.json"
Loading