Skip to content

Commit

Permalink
Merge branch 'Azure:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
gouthamMN authored Jul 19, 2023
2 parents 5bb8c5b + 2710ec3 commit 6bed9c1
Show file tree
Hide file tree
Showing 40 changed files with 3,146 additions and 157 deletions.
9 changes: 8 additions & 1 deletion .pipelines/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
- script: |
set -xe
sudo rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
sudo dnf install -y openvpn make podman
sudo dnf install -y openvpn make podman jq
displayName: Setup (Container)
target: container
Expand Down Expand Up @@ -84,13 +84,20 @@ jobs:
. ./hack/e2e/run-rp-and-e2e.sh
hack/get-admin-kubeconfig.sh /subscriptions/$AZURE_SUBSCRIPTION_ID/resourceGroups/$CLUSTER/providers/Microsoft.RedHatOpenShift/openShiftClusters/$CLUSTER >admin.kubeconfig
displayName: Get admin kubeconfig for must-gather
condition: failed()
# must-gather collection must be run inside the container so it can access the VPN
- script: |
export CI=true
. ./hack/e2e/run-rp-and-e2e.sh
export KUBECONFIG=admin.kubeconfig
wget -nv https://mirror.openshift.com/pub/openshift-v4/x86_64/clients/ocp/$(OpenShiftVersion)/openshift-client-linux-$(OpenShiftVersion).tar.gz
tar xf openshift-client-linux-$(OpenShiftVersion).tar.gz
./oc adm must-gather
tar cf must-gather.tar.gz must-gather.local.*
displayName: Collect must-gather
target: container
condition: failed()
- publish: must-gather.tar.gz
artifact: must-gather
Expand Down
40 changes: 26 additions & 14 deletions cmd/aro/update_ocp_versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,29 @@ import (
"github.com/Azure/ARO-RP/pkg/util/version"
)

func getInstallerImageDigests(envKey string) (map[string]string, error) {
var installerImageDigests map[string]string
var err error

jsonData := []byte(os.Getenv(envKey))

// For Azure DevOps pipelines, the JSON data is Base64-encoded
// since it's embedded in JSON-formatted build artifacts. But
// let's not force that on local development mode.
if !env.IsLocalDevelopmentMode() {
jsonData, err = base64.StdEncoding.DecodeString(string(jsonData))
if err != nil {
return nil, fmt.Errorf("%s: Failed to decode base64: %v", envKey, err)
}
}

if err = json.Unmarshal(jsonData, &installerImageDigests); err != nil {
return nil, fmt.Errorf("%s: Failed to parse JSON: %v", envKey, err)
}

return installerImageDigests, nil
}

func getLatestOCPVersions(ctx context.Context, log *logrus.Entry) ([]api.OpenShiftVersion, error) {
env, err := env.NewCoreForCI(ctx, log)
if err != nil {
Expand All @@ -36,20 +59,9 @@ func getLatestOCPVersions(ctx context.Context, log *logrus.Entry) ([]api.OpenShi
// the aro-installer wrapper digest. This allows us to utilize
// Azure Safe Deployment Practices (SDP) instead of pushing the
// version tag and deploying to all regions at once.
var installerImageDigests map[string]string
jsonData := []byte(os.Getenv("INSTALLER_IMAGE_DIGESTS"))

// For Azure DevOps pipelines, the JSON data is Base64-encoded
// since it's embedded in JSON-formatted build artifacts. But
// let's not force that on local development mode.
if !env.IsLocalDevelopmentMode() {
jsonData, err = base64.StdEncoding.DecodeString(string(jsonData))
if err != nil {
return nil, fmt.Errorf("INSTALLER_IMAGE_DIGESTS: Failed to decode base64: %v", err)
}
}
if err = json.Unmarshal(jsonData, &installerImageDigests); err != nil {
return nil, fmt.Errorf("INSTALLER_IMAGE_DIGESTS: Failed to parse JSON: %v", err)
installerImageDigests, err := getInstallerImageDigests("INSTALLER_IMAGE_DIGESTS")
if err != nil {
return nil, err
}

for _, vers := range version.HiveInstallStreams {
Expand Down
2 changes: 2 additions & 0 deletions pkg/cluster/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ package cluster
// Licensed under the Apache License 2.0.

//go:generate go run ../../vendor/github.com/golang/mock/mockgen -destination=../util/mocks/$GOPACKAGE/$GOPACKAGE.go github.com/Azure/ARO-RP/pkg/$GOPACKAGE Interface
//go:generate go run ../../vendor/github.com/golang/mock/mockgen -destination=../util/mocks/samplesclient/versioned.go github.com/openshift/client-go/samples/clientset/versioned Interface
//go:generate go run ../../vendor/github.com/golang/mock/mockgen -destination=../util/mocks/samples/samples.go github.com/openshift/client-go/samples/clientset/versioned/typed/samples/v1 SamplesV1Interface,ConfigInterface
//go:generate go run ../../vendor/golang.org/x/tools/cmd/goimports -local=github.com/Azure/ARO-RP -e -w ../util/mocks/$GOPACKAGE/$GOPACKAGE.go
24 changes: 15 additions & 9 deletions pkg/cluster/samples.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

configv1 "github.com/openshift/api/config/v1"
operatorv1 "github.com/openshift/api/operator/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/util/retry"
)
Expand All @@ -19,17 +20,22 @@ func (m *manager) disableSamples(ctx context.Context) error {
return nil
}

return retry.RetryOnConflict(retry.DefaultRetry, func() error {
c, err := m.samplescli.SamplesV1().Configs().Get(ctx, "cluster", metav1.GetOptions{})
if err != nil {
return err
}
return retry.OnError(
retry.DefaultRetry,
func(err error) bool {
return errors.IsConflict(err) || errors.IsNotFound(err)
},
func() error {
c, err := m.samplescli.SamplesV1().Configs().Get(ctx, "cluster", metav1.GetOptions{})
if err != nil {
return err
}

c.Spec.ManagementState = operatorv1.Removed
c.Spec.ManagementState = operatorv1.Removed

_, err = m.samplescli.SamplesV1().Configs().Update(ctx, c, metav1.UpdateOptions{})
return err
})
_, err = m.samplescli.SamplesV1().Configs().Update(ctx, c, metav1.UpdateOptions{})
return err
})
}

// disableOperatorHubSources disables operator hub sources if there's no
Expand Down
105 changes: 105 additions & 0 deletions pkg/cluster/samples_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package cluster

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"context"
"errors"
"testing"

"github.com/golang/mock/gomock"
operatorv1 "github.com/openshift/api/operator/v1"
samplesv1 "github.com/openshift/api/samples/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"

"github.com/Azure/ARO-RP/pkg/api"
mock_env "github.com/Azure/ARO-RP/pkg/util/mocks/env"
mock_samples "github.com/Azure/ARO-RP/pkg/util/mocks/samples"
mock_samplesclient "github.com/Azure/ARO-RP/pkg/util/mocks/samplesclient"
utilerror "github.com/Azure/ARO-RP/test/util/error"
)

func Test_manager_disableSamples(t *testing.T) {
ctx := context.Background()
samplesConfig := &samplesv1.Config{
TypeMeta: metav1.TypeMeta{},
ObjectMeta: metav1.ObjectMeta{},
Spec: samplesv1.ConfigSpec{},
Status: samplesv1.ConfigStatus{},
}
tests := []struct {
name string
samplesConfig *samplesv1.Config
samplesCRGetError error
samplesCRUpdateError error
expectedMinNumberOfGetCalls int
expectedMaxNumberOfGetCalls int
wantErr string
}{
{
name: "samples cr is found and updated",
samplesConfig: samplesConfig,
expectedMinNumberOfGetCalls: 1,
expectedMaxNumberOfGetCalls: 1,
wantErr: "",
},
{
name: "samples cr is not found and retried",
samplesCRGetError: kerrors.NewNotFound(schema.GroupResource{}, "samples"),
expectedMinNumberOfGetCalls: 2,
expectedMaxNumberOfGetCalls: 15,
wantErr: " \"samples\" not found",
},
{
name: "samples cr update is conflicting and retried",
samplesConfig: samplesConfig,
expectedMinNumberOfGetCalls: 2,
expectedMaxNumberOfGetCalls: 15,
samplesCRUpdateError: kerrors.NewConflict(schema.GroupResource{}, "samples", errors.New("conflict")),
wantErr: "Operation cannot be fulfilled on \"samples\": conflict",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
controller := gomock.NewController(t)
defer controller.Finish()
env := mock_env.NewMockInterface(controller)
samplescli := mock_samplesclient.NewMockInterface(controller)
samplesInterface := mock_samples.NewMockSamplesV1Interface(controller)
configInterface := mock_samples.NewMockConfigInterface(controller)

env.EXPECT().IsLocalDevelopmentMode().Return(false)
samplescli.EXPECT().SamplesV1().AnyTimes().Return(samplesInterface)
samplesInterface.EXPECT().Configs().AnyTimes().Return(configInterface)
configInterface.EXPECT().Get(gomock.Any(), "cluster", metav1.GetOptions{}).
MinTimes(tt.expectedMinNumberOfGetCalls).
MaxTimes(tt.expectedMaxNumberOfGetCalls).
Return(tt.samplesConfig, tt.samplesCRGetError)

if tt.samplesConfig != nil {
samplesConfig.Spec.ManagementState = operatorv1.Removed
configInterface.EXPECT().Update(gomock.Any(), samplesConfig, metav1.UpdateOptions{}).AnyTimes().Return(samplesConfig, tt.samplesCRUpdateError)
}

m := &manager{
env: env,
doc: &api.OpenShiftClusterDocument{
OpenShiftCluster: &api.OpenShiftCluster{
Properties: api.OpenShiftClusterProperties{
ClusterProfile: api.ClusterProfile{
ResourceGroupID: "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/clusterRGName",
},
},
},
},
samplescli: samplescli,
}

err := m.disableSamples(ctx)
utilerror.AssertErrorMessage(t, err, tt.wantErr)
})
}
}
2 changes: 1 addition & 1 deletion pkg/deploy/assets/gateway-production.json

Large diffs are not rendered by default.

29 changes: 24 additions & 5 deletions pkg/deploy/generator/scripts/gatewayVMSS.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ yum -y -x WALinuxAgent -x WALinuxAgent-udev update --allowerasing
echo "extending partition table"
# Linux block devices are inconsistently named
# it's difficult to tie the lvm pv to the physical disk using /dev/disk files, which is why lvs is used here
physicalDisk="$(lvs -o devices -a | head -n2 | tail -n1 | cut -d ' ' -f 3 | cut -d \( -f 1 | tr -d '[:digit:]')"
growpart "$physicalDisk" 2
physical_disk="$(lvs -o devices -a | head -n2 | tail -n1 | cut -d ' ' -f 3 | cut -d \( -f 1 | tr -d '[:digit:]')"
growpart "$physical_disk" 2

echo "extending filesystems"
lvextend -l +20%FREE /dev/rootvg/rootlv
Expand All @@ -33,7 +33,12 @@ for attempt in {1..5}; do
done

echo "configuring logrotate"
cat >/etc/logrotate.conf <<'EOF'

# gateway_logdir is a readonly variable that specifies the host path mount point for the gateway container log file
# for the purpose of rotating the gateway logs
declare -r gateway_logdir='/var/log/aro-gateway'

cat >/etc/logrotate.conf <<EOF
# see "man logrotate" for details
# rotate log files weekly
weekly
Expand Down Expand Up @@ -67,6 +72,18 @@ include /etc/logrotate.d
create 0600 root utmp
rotate 1
}
# Maximum log directory size is 100G with this configuration
# Setting limit to 100G to allow space for other logging services
# copytruncate is a critical option used to prevent logs from being shipped twice
${gateway_logdir} {
size 20G
rotate 5
create 0600 root root
copytruncate
noolddir
compress
}
EOF

echo "configuring yum repository and running yum update"
Expand Down Expand Up @@ -250,14 +267,15 @@ GATEWAY_FEATURES='$GATEWAYFEATURES'
RPIMAGE='$RPIMAGE'
EOF

cat >/etc/systemd/system/aro-gateway.service <<'EOF'
cat >/etc/systemd/system/aro-gateway.service <<EOF
[Unit]
After=network-online.target
Wants=network-online.target
[Service]
EnvironmentFile=/etc/sysconfig/aro-gateway
ExecStartPre=-/usr/bin/docker rm -f %N
ExecStartPre=/usr/bin/mkdir -p ${gateway_logdir}
ExecStart=/usr/bin/docker run \
--hostname %H \
--name %N \
Expand All @@ -277,7 +295,8 @@ ExecStart=/usr/bin/docker run \
-p 443:8443 \
-v /run/systemd/journal:/run/systemd/journal \
-v /var/etw:/var/etw:z \
$RPIMAGE \
-v /ctr.log:${gateway_logdir}:z \
\$RPIMAGE \
gateway
ExecStop=/usr/bin/docker stop -t 3600 %N
TimeoutStopSec=3600
Expand Down
74 changes: 74 additions & 0 deletions pkg/frontend/admin_openshiftcluster_etcdrecovery.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package frontend

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"context"
"net/http"
"path/filepath"
"strings"

"github.com/go-chi/chi/v5"
operatorclient "github.com/openshift/client-go/operator/clientset/versioned"
"github.com/sirupsen/logrus"

"github.com/Azure/ARO-RP/pkg/api"
"github.com/Azure/ARO-RP/pkg/database/cosmosdb"
"github.com/Azure/ARO-RP/pkg/frontend/middleware"
"github.com/Azure/ARO-RP/pkg/util/restconfig"
)

func (f *frontend) postAdminOpenShiftClusterEtcdRecovery(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
log := ctx.Value(middleware.ContextKeyLog).(*logrus.Entry)
r.URL.Path = filepath.Dir(r.URL.Path)

b, err := f._postAdminOpenShiftClusterEtcdRecovery(ctx, r, log)

if err == nil {
w.Header().Set("Content-Type", "text/plain")
}

adminReply(log, w, nil, b, err)
}

// TODO write integration test that skips f.fixEtcd
func (f *frontend) _postAdminOpenShiftClusterEtcdRecovery(ctx context.Context, r *http.Request, log *logrus.Entry) ([]byte, error) {
resType, resName, resGroupName := chi.URLParam(r, "resourceType"), chi.URLParam(r, "resourceName"), chi.URLParam(r, "resourceGroupName")
resourceID := strings.TrimPrefix(r.URL.Path, "/admin")

doc, err := f.dbOpenShiftClusters.Get(ctx, resourceID)
switch {
case cosmosdb.IsErrorStatusCode(err, http.StatusNotFound):
return []byte{}, api.NewCloudError(http.StatusNotFound, api.CloudErrorCodeResourceNotFound, "", "The Resource '%s/%s' under resource group '%s' was not found.", resType, resName, resGroupName)
case err != nil:
return []byte{}, err
}
kubeActions, err := f.kubeActionsFactory(log, f.env, doc.OpenShiftCluster)
if err != nil {
return []byte{}, api.NewCloudError(http.StatusInternalServerError, api.CloudErrorCodeInternalServerError, "", err.Error())
}

gvr, err := kubeActions.ResolveGVR("Etcd")
if err != nil {
return []byte{}, api.NewCloudError(http.StatusInternalServerError, api.CloudErrorCodeInternalServerError, "", err.Error())
}

err = validateAdminKubernetesObjects(r.Method, gvr, namespaceEtcds, "cluster")
if err != nil {
return []byte{}, err
}

restConfig, err := restconfig.RestConfig(f.env, doc.OpenShiftCluster)
if err != nil {
return []byte{}, api.NewCloudError(http.StatusInternalServerError, api.CloudErrorCodeInternalServerError, "", err.Error())
}

operatorcli, err := operatorclient.NewForConfig(restConfig)
if err != nil {
return []byte{}, api.NewCloudError(http.StatusInternalServerError, api.CloudErrorCodeInternalServerError, "", err.Error())
}

return f.fixEtcd(ctx, log, f.env, doc, kubeActions, operatorcli.OperatorV1().Etcds())
}
Loading

0 comments on commit 6bed9c1

Please sign in to comment.