From 3bdb20ce95326aa211d29f2d1adf89af288f4c5c Mon Sep 17 00:00:00 2001 From: Zed Spencer-Milnes Date: Mon, 16 Dec 2024 22:29:44 +0000 Subject: [PATCH] OKD 4.16 and 4.17 release announcement and supporting documentation (#46) * adding inital 4.15 to 4.16 migration docs * Change target release and add kubectl patch instructions * code block * Wrong value * WIP prepare documentation for 4.16/4.17 releases * Updating instructions to latest tested 4-16 target release * Copyedit upgrade procedure for 4.15 clusters * add source reference * Continue work on SCOS migration notes * Improve drafting of FCOS->SCOS migration notes * Add page noting known issues in OKD 4.16 and encourage users to upgrade striaght to OKD 4.17 once the 4.16 upgrade has completed * Fix contianer typo * Update blog post date to 2024-12-16 * Add link to release in okd project --------- Co-authored-by: Tyrone-W --- blog/2024/06-01-okd-future-statement.md | 1 + blog/2024/07-30-okd-pre-release-testing.md | 1 + blog/2024/12-16-okd-4-16-and-4-17-release.md | 49 +++++++ blog/authors.yml | 17 +++ .../1-force-upgrade-to-stable-4-16.md | 136 ++++++++++++++++++ .../from-4-15/2-fcos-to-scos-migration.md | 44 ++++++ .../from-4-15/3-known-issues-4-16.md | 14 ++ docs/project/upgrade-notes/from-4-15/index.md | 24 ++++ docs/project/upgrade-notes/index.md | 12 ++ docusaurus.config.ts | 3 +- 10 files changed, 300 insertions(+), 1 deletion(-) create mode 100644 blog/2024/12-16-okd-4-16-and-4-17-release.md create mode 100644 docs/project/upgrade-notes/from-4-15/1-force-upgrade-to-stable-4-16.md create mode 100644 docs/project/upgrade-notes/from-4-15/2-fcos-to-scos-migration.md create mode 100644 docs/project/upgrade-notes/from-4-15/3-known-issues-4-16.md create mode 100644 docs/project/upgrade-notes/from-4-15/index.md create mode 100644 docs/project/upgrade-notes/index.md diff --git a/blog/2024/06-01-okd-future-statement.md b/blog/2024/06-01-okd-future-statement.md index f2231be..3060885 100644 --- a/blog/2024/06-01-okd-future-statement.md +++ b/blog/2024/06-01-okd-future-statement.md @@ -1,6 +1,7 @@ --- draft: false date: 2024-06-01 +authors: ["jaimemagiera"] --- # OKD Working Group Statement diff --git a/blog/2024/07-30-okd-pre-release-testing.md b/blog/2024/07-30-okd-pre-release-testing.md index 6b09379..db4c747 100644 --- a/blog/2024/07-30-okd-pre-release-testing.md +++ b/blog/2024/07-30-okd-pre-release-testing.md @@ -1,6 +1,7 @@ --- draft: false date: 2024-07-30 +authors: ["jaimemagiera"] --- # OKD Pre-Release Testing July 2024 diff --git a/blog/2024/12-16-okd-4-16-and-4-17-release.md b/blog/2024/12-16-okd-4-16-and-4-17-release.md new file mode 100644 index 0000000..b956178 --- /dev/null +++ b/blog/2024/12-16-okd-4-16-and-4-17-release.md @@ -0,0 +1,49 @@ +--- +title: OKD 4.17 and 4.16 releases +authors: ["zedsm"] +date: 2024-12-16 +--- + +We are pleased to announce the release of OKD 4.17, alongside OKD 4.16 to allow upgrades for existing 4.15 clusters. + +:::warning +4.16 is intended only as a pass-through for existing 4.15 clusters. Upgrading existing 4.15 cluster will require manual interventions and special care due to major changes in how OKD is built and assembled which have introduced various side effects. +::: + +## You're late, why? + +Yes, we are. OKD builds became polluted with RHEL content that was included in "payload components" (e.g cluster-infrastructure operators, images, etc that made up OKD). This was highlighted in Summer 2023 and heading into 2024 all OKD releases were stopped until this issue was addressed. + +After significant work from a few engineers at RedHat, all components that make up OKD should now be free from RHEL artifacts. This required significant work to build infrastructure and process and chasing issues related to discrepancies between CentOS and RHEL. Most OKD components are now based off CentOS Stream as the base image layer (the license-free upstream to RHEL). + +## I want to install a new cluster + +New cluster installations can follow the normal process. Downloads of client tools with the latest versions of OKD 4.17 embedded can be found [here](https://github.com/okd-project/okd/releases/tag/4.17.0-okd-scos.0). + +## I want to upgrade an existing cluster + +We recommended attempting upgrades from the latest released version of OKD FCOS 4.15 (`4.15.0-0.okd-2024-03-10-010116`). + +Upgrading existing 4.15 cluster will require manual interventions and special care due to major changes in how OKD is built and assembled which have introduced various side effects. + +There is a new area for upgrade notes covering the 4.15 through 4.17 + +:::info +[OKD Upgrade Notes: From 4.15](/docs/project/upgrade-notes/from-4-15/) +::: + +## Node operating systems are now based off CentOS Stream CoreOS (SCOS) + +As part of this work we have also changed the node operating system to be based off CentOS Stream CoreOS (SCOS) rather than Fedora CoreOS (FCOS). It's worth noting that this work was not part of the OKD Streams (where we produced concurrent releases for FCOS and SCOS) project which for now has been suspended. + +The build process for SCOS and it's assembly into OKD in versions greater than 4.16 is vastly different to how it happened as part of OKD Streams in version 4.15 and below. + +:::warning +There are known issues and regressions related to the move from FCOS to SCOS that may effect new and existing clusters. Please refer to [OKD Upgrade Notes: From 4.15](/docs/project/upgrade-notes/from-4-15/) +::: + +## Special thanks + +The OKD Working Group would like to thank [Prashanth Sundararaman](https://github.com/Prashanth684) of RedHat for their work + + diff --git a/blog/authors.yml b/blog/authors.yml index f41826d..63c10f4 100644 --- a/blog/authors.yml +++ b/blog/authors.yml @@ -8,3 +8,20 @@ dmueller: title: Red Hat url: https://github.com/dmueller2001 image_url: https://github.com/dmueller2001.png +zedsm: + name: Zed Spencer-Milnes + title: Co-chair, OKD Working Group + url: https://github.com/GingerGeek + image_url: https://github.com/GingerGeek.png + socials: + x: GingerGeek + github: GingerGeek + linkedin: zedspencermilnes +jaimemagiera: + name: Jaime Magiera + title: Co-chair, OKD Working Group + url: https://github.com/JaimeMagiera + image_url: https://github.com/JaimeMagiera.png + socials: + github: JaimeMagiera + linkedin: jaime-magiera diff --git a/docs/project/upgrade-notes/from-4-15/1-force-upgrade-to-stable-4-16.md b/docs/project/upgrade-notes/from-4-15/1-force-upgrade-to-stable-4-16.md new file mode 100644 index 0000000..e0923de --- /dev/null +++ b/docs/project/upgrade-notes/from-4-15/1-force-upgrade-to-stable-4-16.md @@ -0,0 +1,136 @@ +--- +title: Upgrade 4.15 cluster to 4.16 +sidebar_label: 4.15 -> 4.16 Upgrade +description: Take a cluster from 4.15 to 4.16 before upgrading to 4.17 +--- + +There is a known issue taking a cluster from 4.15 to 4.16 due to validation changes that were introduced in Cluster Version Operator which couldn't be backported to 4.15. + +In order to allow the upgrade to proceed, we will start an upgrade to 4.16 and then when the error state is achieved, we will intervene and patch component versions to allow the upgrade to finish. + +These docs are based off an [original set of instructions provided by aleskandro](https://github.com/okd-project/okd/discussions/1971#discussioncomment-10119718). + +### Cluster Versions + +- Starting cluster version: `4.15.0-0.okd-2024-03-10-010116` +- Target cluster version: `4.16.0-okd-scos.1` + +## Manual Upgrade + +### 1) Collect OKD payload component references + +Use https://github.com/okd-project/okd-scos/releases to confirm release manifest for target cluster version + +For `4.16.0-okd-scos.1` the release manifest is found at `quay.io/okd/scos-release@sha256:0de353901f9ab5ecb14c2583d16d24561df23d1bf46fe03f218f2ffb8f134096` + +#### Collect `hyperkube` reference +```bash +oc adm release info --image-for=hyperkube quay.io/okd/scos-release@sha256:0de353901f9ab5ecb14c2583d16d24561df23d1bf46fe03f218f2ffb8f134096 +``` +- `hyperkube`: `quay.io/okd/scos-content@sha256:5c9128668752a9b891a24a9ec36e0724d975d6d49e6e4e2d516b5ba80ae2fb23` + +#### Collect `cluster-kube-apiserver-operator` reference +```bash +oc adm release info --image-for=cluster-kube-apiserver-operator quay.io/okd/scos-release@sha256:0de353901f9ab5ecb14c2583d16d24561df23d1bf46fe03f218f2ffb8f134096 +``` +- `cluster-kube-apiserver-operator`: `quay.io/okd/scos-content@sha256:37d6b6c13d864deb7ea925acf2b2cb34305333f92ce64e7906d3f973a8071642` + +### 2) Start the OKD 4.15 to 4.16 upgrade +Run the following command to start the cluster upgrade process to this version +```bash +oc adm upgrade --allow-explicit-upgrade --force --to-image quay.io/okd/scos-release@sha256:0de353901f9ab5ecb14c2583d16d24561df23d1bf46fe03f218f2ffb8f134096 +``` + +### 3) Wait for upgrade error state to occur +The clusterversion will show failed status shortly after the process starts, cluster-version-operator pod logs or clusterversion operator events will show an error similar to +``` +message: 'Could not update customresourcedefinition "infrastructures.config.openshift.io" (47 of 903): the object is invalid, possibly due to local cluster configuration' +``` + +### 4) Scale down Cluster Version Operator for manual intervention +To continue the cluster upgrade process the following steps are required: +Scale down the cluster-version-operator to pause the update process +`oc scale --replicas=0 deployments/cluster-version-operator -n openshift-cluster-version` + +### 5) Manually intervene and force Kubernetes API Server upgrade +Modify the openshift-kube-apiserver-operator deployment with +```bash +oc edit -n openshift-kube-apiserver-operator deployments/kube-apiserver-operator +``` + +1. Update the `kube-apiserver-operator` container template with the image reference for the 4.16 `cluster-kube-apiserver-operator` +```yaml +image: 'quay.io/okd/scos-content@sha256:37d6b6c13d864deb7ea925acf2b2cb34305333f92ce64e7906d3f973a8071642' +``` + +2. Update the `IMAGE` environment variable within the container template with the the image reference for the 4.16 `hyperkube` +```yaml +- name: IMAGE + value: 'quay.io/okd/scos-content@sha256:5c9128668752a9b891a24a9ec36e0724d975d6d49e6e4e2d516b5ba80ae2fb23' +``` + +3. Update the `OPERATOR_IMAGE` environment variable within the container template with the image reference for the 4.16 `cluster-kube-apiserver-operator` +```yaml +- name: OPERATOR_IMAGE + value: 'quay.io/okd/scos-content@sha256:37d6b6c13d864deb7ea925acf2b2cb34305333f92ce64e7906d3f973a8071642' +``` + +4. Update the `OPERAND_IMAGE_VERSION` environment variable within the container template with the value `1.29.6` +```yaml +- name: OPERAND_IMAGE_VERSION + value: 1.29.6 +``` + +### 6) Wait for rollout of `kube-apiserver` +Wait for the rollout of the new kube-apiserver pods to complete, this process is complete once an installer pod has been created for each node running kube-apiserver after the above changes, the status of these installer pods is 'Completed' and the kube-apiserver cluster operator progressing status is false (this process takes around 3 minutes to start and around 15 minutes to complete on a three node control-plane) + + +### 7) Scale up Cluster Version Operator to allow upgrade to continue +Scale up the cluster-version-operator to continue the update process +'oc scale --replicas=1 deployments/cluster-version-operator -n openshift-cluster-version' + +The upgrade should then continue without error + +## Automatic Upgrade + +:::danger +Please take care to review that your cluster is in the expected state before running these scripts. +If you only have a few clusters to upgrade consider following the manual process detailed above +::: + +The above process and patch can be used to upgrade the cluster if the openshift-kube-apiserver-operator deployment has one container and it's environment variables are in the following order: + +```yaml +env: +- name: IMAGE +- name: OPERATOR_IMAGE +- name: OPERAND_IMAGE_VERSION +- name: OPERATOR_IMAGE_VERSION +- name: POD_NAME +``` + +:::danger +The script *relies on the above ordering of the environment variables*. If your environment variables within the container template are different then this _will not be validated by the script_ and it could break your cluster. +::: + +### `upgrade-4-15-to-4-16-unsafe.sh` +```bash +oc adm upgrade --allow-explicit-upgrade --to-image quay.io/okd/scos-release@sha256:0de353901f9ab5ecb14c2583d16d24561df23d1bf46fe03f218f2ffb8f134096 + +sleep 60 + +oc scale --replicas=0 deployments/cluster-version-operator -n openshift-cluster-version + +oc -n openshift-kube-apiserver-operator patch deployment kube-apiserver-operator --type='json' -p='[ + {"op": "replace", "path": "/spec/template/spec/containers/0/image", "value": "quay.io/okd/scos-content@sha256:37d6b6c13d864deb7ea925acf2b2cb34305333f92ce64e7906d3f973a8071642"}, + {"op": "replace", "path": "/spec/template/spec/containers/0/env/0/value", "value": "quay.io/okd/scos-content@sha256:5c9128668752a9b891a24a9ec36e0724d975d6d49e6e4e2d516b5ba80ae2fb23"}, + {"op": "replace", "path": "/spec/template/spec/containers/0/env/1/value", "value": "quay.io/okd/scos-content@sha256:37d6b6c13d864deb7ea925acf2b2cb34305333f92ce64e7906d3f973a8071642"}, + {"op": "replace", "path": "/spec/template/spec/containers/0/env/2/value", "value": "1.29.6"} +]' + +sleep 180 + +oc wait clusteroperators kube-apiserver --for=condition=Progressing=false --timeout=600s + +oc scale --replicas=1 deployments/cluster-version-operator -n openshift-cluster-version +``` diff --git a/docs/project/upgrade-notes/from-4-15/2-fcos-to-scos-migration.md b/docs/project/upgrade-notes/from-4-15/2-fcos-to-scos-migration.md new file mode 100644 index 0000000..3ce975a --- /dev/null +++ b/docs/project/upgrade-notes/from-4-15/2-fcos-to-scos-migration.md @@ -0,0 +1,44 @@ +--- +title: Node Operating System changes to SCOS +sidebar_label: FCOS -> SCOS Migration +description: OKD Node OS has changed from a Fedora to a CentOS Stream base. +--- + +### Background + +Nodes within OKD run an operating system that ships as a component of the cluster. In OKD \<4.15 you could run an OKD package that incorporated Fedora or CentOS Stream. These builds came from entirely seperate build pipelines, with a long term intention to converge them over time. + +As well as the node operating system, there are many cluster components that share a common base image. In versions prior to 4.15 the base image used contained RHEL-polluted content. This was the cause of the pause of releases during the majority of 2024. + +In producing OKD 4.16 the decision was taken for both the Node Operating System and Base Image to be based off CentOS Stream. This was to reduce the complexity of rebuilding all the cluster component containers (as CentOS Stream is closer to RHEL) and also to concentrate efforts on the single overall assembly pipeline as builds resumed. + +### Moving from FCOS to SCOS for existing clusters + +When you perform an upgrade from 4.15 to 4.16 your node operating system will transition from FCOS to SCOS automatically. + +### When might I see issues moving from node OS FCOS to SCOS? + +In the vast majority of cases, you should be able to move between FCOS and SCOS without issue (as this kind of checkout/rebase is a usecase of the underlying `ostree` system). + +Where you may encounter issue is if you have (probably inadvertanly) made use of features from system components that rely on newer package or kernel version that is present in Fedora but isn't present in CoreOS Stream. + +:::note +If you experience issues transitioning between FCOS and SCOS please report or start a discussion on our [GitHub project](https://github.com/okd-project/okd). +::: + +We have seen reports related to ext4 features that were present on FCOS installs but not available on SCOS. + +### Will you reintroduce a method to support Fedora based node operating systems or base images + +There are lab, research and experimental reasons which may mean you want to be running newer packages or kernels than what's available in CentOS Stream. Should you have the resource to do so, we encourage contributions in this area to look at how we can provide both alternative Node Operating Systems (ie Fedora) and even base container images for certain cluster components. Please get in touch! + +### Where can I get boot artifacts for SCOS? + +The FCOS team provide a variety of bootable media for FCOS. The equivalent is not yet available for SCOS but is in progress at the time of writing. + +### FCOS -> SCOS for new clusters + +Until bootable media is available, you can use FCOS as a live boot image to then "pivot" into a SCOS installation based off the cluster ignitiion manifests and SCOS version. Follow the normal installation procedure for your platform. + +As mentioned above we have seen issues related to ext4 issues on FCOS systems that prevent the pivot from FCOS to SCOS in certain setups. +Please see [this issue (#2041)](https://github.com/okd-project/okd/issues/2041) for more information and workarounds. diff --git a/docs/project/upgrade-notes/from-4-15/3-known-issues-4-16.md b/docs/project/upgrade-notes/from-4-15/3-known-issues-4-16.md new file mode 100644 index 0000000..ea42d69 --- /dev/null +++ b/docs/project/upgrade-notes/from-4-15/3-known-issues-4-16.md @@ -0,0 +1,14 @@ +--- +title: OKD 4.16 has Known Issues +sidebar_label: OKD 4.16 Known Issues +description: OKD 4.16 should only be used as a step-thru to 4.17 +--- + +OKD 4.16 was released simulteanously with OKD 4.17. + +The purpose of the OKD 4.16 release was to provide an upgrade path to exisitng OKD \<4.15 clusters. + +It is not intended that clusters should remain on OKD 4.16. After completing [the manual steps](2-fcos-to-scos-migration.md) to take your cluster to 4.16, you should proceed immediately to the 4.17 version which is available. + +## Known Issues - 4.16 +- metal3 pod crash on baremetal ([#2030](https://github.com/okd-project/okd/issues/2030)) diff --git a/docs/project/upgrade-notes/from-4-15/index.md b/docs/project/upgrade-notes/from-4-15/index.md new file mode 100644 index 0000000..40ce842 --- /dev/null +++ b/docs/project/upgrade-notes/from-4-15/index.md @@ -0,0 +1,24 @@ +--- +title: Upgrading from OKD 4.15 to 4.17 +sidebar_label: From 4.15 to 4.17 +description: Upgrading to OKD 4.17 for existing 4.15 clusters requires special attention and passing through 4.16. +--- + +import DocCardList from '@theme/DocCardList'; + +Please also see the [4.16 and 4.17 release blog post](/blog/2024/12/16/okd-4-16-and-4-17-release). + +There was a large gap of releases after 4.15. 4.16 and 4.17 were released concurrently based off a significantly different build process. OKD 4.16 is only intended to be used as a pass-through for clusters going from 4.15 through to 4.17. + +Extra care should be taken for upgrading an existing OKD 4.15 cluster to 4.17. + +Your cluster will need to pass from OKD 4.15, to the latest version of 4.16 before proceeding to 4.17. + +Upgrades should take place from the March 2024 FCOS stream of OKD (`4.15.0-0.okd-2024-03-10-010116`), upgrades from other versions may require additional work. + +:::tip +Read these notes in conjunction with the usual [Product Documentation for Upgrading](https://docs.okd.io/4.16/updating/index.html) +::: + + + diff --git a/docs/project/upgrade-notes/index.md b/docs/project/upgrade-notes/index.md new file mode 100644 index 0000000..bd47fbf --- /dev/null +++ b/docs/project/upgrade-notes/index.md @@ -0,0 +1,12 @@ +--- +title: Upgrade Notes +--- + +import DocCardList from '@theme/DocCardList'; + +Most upgrades can be completed by following automated cluster upgrade process. Some versions require special attention and this section of the documentation covers these upgrades. + +In addition please refer to the product docs for the version you are upgrading to as they will have additional upgrade notes that apply to both OKD and OpenShift. + + + diff --git a/docusaurus.config.ts b/docusaurus.config.ts index 0e44217..e7e3b94 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -82,7 +82,7 @@ const config: Config = { // Replace with your project's social card announcementBar: { id: 'scos-notice', - content: 'Update on OKD 4.16, 4.17 and why we\'re switching to SCOS from FCOS: read here.', + content: 'OKD 4.17 and 4.16 now available: read here.', backgroundColor: '#666', textColor: '#fff', isCloseable: false, @@ -190,6 +190,7 @@ const config: Config = { prism: { theme: prismThemes.github, darkTheme: prismThemes.dracula, + additionalLanguages: ['bash'], }, } satisfies BaseThemeConfig & ClassicThemeConfig,