Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 upgrade-e2e failure RC #1623

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,3 @@ jobs:
- uses: actions/setup-go@v5
with:
go-version-file: go.mod

- name: Run the upgrade e2e test
run: make test-upgrade-e2e
28 changes: 28 additions & 0 deletions .github/workflows/upgrade-e2e.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: upgrade-e2e

on:
workflow_dispatch:
pull_request:
merge_group:
push:
branches:
- main

jobs:
upgrade-e2e:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: actions/setup-go@v5
with:
go-version-file: go.mod

- name: Run the upgrade e2e test
run: ARTIFACT_PATH=/tmp/artifacts make test-upgrade-e2e

- uses: cytopia/upload-artifact-retry-action@v0.1.7
if: failure()
with:
name: upgrade-e2e-artifacts
path: /tmp/artifacts/
262 changes: 207 additions & 55 deletions test/upgrade-e2e/post_upgrade_test.go
Original file line number Diff line number Diff line change
@@ -1,77 +1,75 @@
package upgradee2e

import (
"bufio"
"context"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apimeta "k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
kubeclient "k8s.io/client-go/kubernetes"
"k8s.io/utils/env"
"sigs.k8s.io/controller-runtime/pkg/client"

ocv1 "github.com/operator-framework/operator-controller/api/v1"
catalogd "github.com/operator-framework/operator-controller/catalogd/api/v1"
)

const (
artifactName = "operator-controller-upgrade-e2e"
)

func TestClusterExtensionAfterOLMUpgrade(t *testing.T) {
t.Log("Starting checks after OLM upgrade")
ctx := context.Background()
defer getArtifactsOutput(t)

managerLabelSelector := labels.Set{"control-plane": "operator-controller-controller-manager"}
now := time.Now()

t.Log("Checking that the controller-manager deployment is updated")
require.EventuallyWithT(t, func(ct *assert.CollectT) {
var managerDeployments appsv1.DeploymentList
assert.NoError(ct, c.List(ctx, &managerDeployments, client.MatchingLabelsSelector{Selector: managerLabelSelector.AsSelector()}))
assert.Len(ct, managerDeployments.Items, 1)
managerDeployment := managerDeployments.Items[0]
// wait for catalogd deployment to finish
t.Log("Wait for catalogd deployment to be ready")
waitForDeployment(t, ctx, "catalogd-controller-manager")

assert.True(ct,
managerDeployment.Status.UpdatedReplicas == *managerDeployment.Spec.Replicas &&
managerDeployment.Status.Replicas == *managerDeployment.Spec.Replicas &&
managerDeployment.Status.AvailableReplicas == *managerDeployment.Spec.Replicas &&
managerDeployment.Status.ReadyReplicas == *managerDeployment.Spec.Replicas,
)
}, time.Minute, time.Second)
// wait for operator-controller deployment to finish
t.Log("Wait for operator-controller deployment to be ready")
waitForDeployment(t, ctx, "operator-controller-controller-manager")

var managerPods corev1.PodList
t.Log("Waiting for only one controller-manager Pod to remain")
require.EventuallyWithT(t, func(ct *assert.CollectT) {
assert.NoError(ct, c.List(ctx, &managerPods, client.MatchingLabelsSelector{Selector: managerLabelSelector.AsSelector()}))
assert.Len(ct, managerPods.Items, 1)
}, time.Minute, time.Second)

t.Log("Reading logs to make sure that ClusterExtension was reconciled by operator-controller before we update it")
// Make sure that after we upgrade OLM itself we can still reconcile old objects without any changes
logCtx, cancel := context.WithTimeout(ctx, time.Minute)
defer cancel()
substrings := []string{
"reconcile ending",
fmt.Sprintf(`ClusterExtension=%q`, testClusterExtensionName),
}
found, err := watchPodLogsForSubstring(logCtx, &managerPods.Items[0], "manager", substrings...)
require.NoError(t, err)
require.True(t, found)

t.Log("Checking that the ClusterCatalog is serving")
t.Log("Checking that the ClusterCatalog is unpacked")
require.EventuallyWithT(t, func(ct *assert.CollectT) {
var clusterCatalog catalogd.ClusterCatalog
assert.NoError(ct, c.Get(ctx, types.NamespacedName{Name: testClusterCatalogName}, &clusterCatalog))

// check serving condition
cond := apimeta.FindStatusCondition(clusterCatalog.Status.Conditions, catalogd.TypeServing)
if !assert.NotNil(ct, cond) {
return
}
assert.Equal(ct, metav1.ConditionTrue, cond.Status)
assert.Equal(ct, catalogd.ReasonAvailable, cond.Reason)

// check progressing condition
cond = apimeta.FindStatusCondition(clusterCatalog.Status.Conditions, catalogd.TypeProgressing)
if !assert.NotNil(ct, cond) {
return
}
assert.Equal(ct, metav1.ConditionTrue, cond.Status)
assert.Equal(ct, catalogd.ReasonSucceeded, cond.Reason)

// check that the catalog was recently unpacked (after progressing is over)
t.Logf("last unpacked: %s - progressing last transitioned: %s", clusterCatalog.Status.LastUnpacked.String(), cond.LastTransitionTime.String())
assert.True(ct, clusterCatalog.Status.LastUnpacked.After(now))
}, time.Minute, time.Second)

t.Log("Checking that the ClusterExtension is installed")
Expand Down Expand Up @@ -111,33 +109,187 @@ func TestClusterExtensionAfterOLMUpgrade(t *testing.T) {
}, time.Minute, time.Second)
}

func watchPodLogsForSubstring(ctx context.Context, pod *corev1.Pod, container string, substrings ...string) (bool, error) {
podLogOpts := corev1.PodLogOptions{
Follow: true,
Container: container,
func waitForDeployment(t *testing.T, ctx context.Context, controlPlaneLabel string) {
deploymentLabelSelector := labels.Set{"control-plane": controlPlaneLabel}.AsSelector()

t.Log("Checking that the deployment is updated")
require.EventuallyWithT(t, func(ct *assert.CollectT) {
var managerDeployments appsv1.DeploymentList
assert.NoError(ct, c.List(ctx, &managerDeployments, client.MatchingLabelsSelector{Selector: deploymentLabelSelector}))
assert.Len(ct, managerDeployments.Items, 1)
managerDeployment := managerDeployments.Items[0]

assert.True(ct,
managerDeployment.Status.UpdatedReplicas == *managerDeployment.Spec.Replicas &&
managerDeployment.Status.Replicas == *managerDeployment.Spec.Replicas &&
managerDeployment.Status.AvailableReplicas == *managerDeployment.Spec.Replicas &&
managerDeployment.Status.ReadyReplicas == *managerDeployment.Spec.Replicas,
)
}, time.Minute, time.Second)

var managerPods corev1.PodList
t.Log("Waiting for only one Pod to remain")
require.EventuallyWithT(t, func(ct *assert.CollectT) {
assert.NoError(ct, c.List(ctx, &managerPods, client.MatchingLabelsSelector{Selector: deploymentLabelSelector}))
assert.Len(ct, managerPods.Items, 1)
}, time.Minute, time.Second)
}

//func watchPodLogsForSubstring(ctx context.Context, pod *corev1.Pod, container string, substrings ...string) (bool, error) {
// podLogOpts := corev1.PodLogOptions{
// Follow: true,
// Container: container,
// }
//
// req := kclientset.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &podLogOpts)
// podLogs, err := req.Stream(ctx)
// if err != nil {
// return false, err
// }
// defer podLogs.Close()
//
// scanner := bufio.NewScanner(podLogs)
// for scanner.Scan() {
// line := scanner.Text()
//
// foundCount := 0
// for _, substring := range substrings {
// if strings.Contains(line, substring) {
// foundCount++
// }
// }
// if foundCount == len(substrings) {
// return true, nil
// }
// }
//
// return false, scanner.Err()
//}

// getArtifactsOutput gets all the artifacts from the test run and saves them to the artifact path.
// Currently it saves:
// - clusterextensions
// - pods logs
// - deployments
// - catalogsources
func getArtifactsOutput(t *testing.T) {
basePath := env.GetString("ARTIFACT_PATH", "")
if basePath == "" {
return
}

kubeClient, err := kubeclient.NewForConfig(cfg)
require.NoError(t, err)

// sanitize the artifact name for use as a directory name
testName := strings.ReplaceAll(strings.ToLower(t.Name()), " ", "-")
// Get the test description and sanitize it for use as a directory name
artifactPath := filepath.Join(basePath, artifactName, fmt.Sprint(time.Now().UnixNano()), testName)

// Create the full artifact path
err = os.MkdirAll(artifactPath, 0755)
require.NoError(t, err)

// Get all namespaces
namespaces := corev1.NamespaceList{}
if err := c.List(context.Background(), &namespaces); err != nil {
fmt.Printf("Failed to list namespaces: %v", err)
}

// get all cluster extensions save them to the artifact path.
clusterExtensions := ocv1.ClusterExtensionList{}
if err := c.List(context.Background(), &clusterExtensions, client.InNamespace("")); err != nil {
fmt.Printf("Failed to list cluster extensions: %v", err)
}
for _, clusterExtension := range clusterExtensions.Items {
// Save cluster extension to artifact path
clusterExtensionYaml, err := yaml.Marshal(clusterExtension)
if err != nil {
fmt.Printf("Failed to marshal cluster extension: %v", err)
continue
}
if err := os.WriteFile(filepath.Join(artifactPath, clusterExtension.Name+"-clusterextension.yaml"), clusterExtensionYaml, 0600); err != nil {
fmt.Printf("Failed to write cluster extension to file: %v", err)
}
}

req := kclientset.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &podLogOpts)
podLogs, err := req.Stream(ctx)
if err != nil {
return false, err
// get all catalogsources save them to the artifact path.
catalogsources := catalogd.ClusterCatalogList{}
if err := c.List(context.Background(), &catalogsources, client.InNamespace("")); err != nil {
fmt.Printf("Failed to list catalogsources: %v", err)
}
for _, catalogsource := range catalogsources.Items {
// Save catalogsource to artifact path
catalogsourceYaml, err := yaml.Marshal(catalogsource)
if err != nil {
fmt.Printf("Failed to marshal catalogsource: %v", err)
continue
}
if err := os.WriteFile(filepath.Join(artifactPath, catalogsource.Name+"-catalogsource.yaml"), catalogsourceYaml, 0600); err != nil {
fmt.Printf("Failed to write catalogsource to file: %v", err)
}
}
defer podLogs.Close()

scanner := bufio.NewScanner(podLogs)
for scanner.Scan() {
line := scanner.Text()
for _, namespace := range namespaces.Items {
// let's ignore kube-* namespaces.
if strings.Contains(namespace.Name, "kube-") {
continue
}

foundCount := 0
for _, substring := range substrings {
if strings.Contains(line, substring) {
foundCount++
namespacedArtifactPath := filepath.Join(artifactPath, namespace.Name)
if err := os.Mkdir(namespacedArtifactPath, 0755); err != nil {
fmt.Printf("Failed to create namespaced artifact path: %v", err)
continue
}

// get all deployments in the namespace and save them to the artifact path.
deployments := appsv1.DeploymentList{}
if err := c.List(context.Background(), &deployments, client.InNamespace(namespace.Name)); err != nil {
fmt.Printf("Failed to list deployments %v in namespace: %q", err, namespace.Name)
continue
}

for _, deployment := range deployments.Items {
// Save deployment to artifact path
deploymentYaml, err := yaml.Marshal(deployment)
if err != nil {
fmt.Printf("Failed to marshal deployment: %v", err)
continue
}
if err := os.WriteFile(filepath.Join(namespacedArtifactPath, deployment.Name+"-deployment.yaml"), deploymentYaml, 0600); err != nil {
fmt.Printf("Failed to write deployment to file: %v", err)
}
}
if foundCount == len(substrings) {
return true, nil

// Get logs from all pods in all namespaces
pods := corev1.PodList{}
if err := c.List(context.Background(), &pods, client.InNamespace(namespace.Name)); err != nil {
fmt.Printf("Failed to list pods %v in namespace: %q", err, namespace.Name)
}
}
for _, pod := range pods.Items {
if pod.Status.Phase != corev1.PodRunning && pod.Status.Phase != corev1.PodSucceeded && pod.Status.Phase != corev1.PodFailed {
continue
}
for _, container := range pod.Spec.Containers {
logs, err := kubeClient.CoreV1().Pods(namespace.Name).GetLogs(pod.Name, &corev1.PodLogOptions{Container: container.Name}).Stream(context.Background())
if err != nil {
fmt.Printf("Failed to get logs for pod %q in namespace %q: %v", pod.Name, namespace.Name, err)
continue
}
defer logs.Close()

return false, scanner.Err()
outFile, err := os.Create(filepath.Join(namespacedArtifactPath, pod.Name+"-"+container.Name+"-logs.txt"))
if err != nil {
fmt.Printf("Failed to create file for pod %q in namespace %q: %v", pod.Name, namespace.Name, err)
continue
}
defer outFile.Close()

if _, err := io.Copy(outFile, logs); err != nil {
fmt.Printf("Failed to copy logs for pod %q in namespace %q: %v", pod.Name, namespace.Name, err)
continue
}
}
}
}
}
3 changes: 3 additions & 0 deletions test/upgrade-e2e/upgrade_e2e_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"testing"

"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

Expand All @@ -21,12 +22,14 @@ var (
c client.Client
kclientset kubernetes.Interface

cfg *rest.Config
testClusterCatalogName string
testClusterExtensionName string
)

func TestMain(m *testing.M) {
var ok bool
cfg = ctrl.GetConfigOrDie()
testClusterCatalogName, ok = os.LookupEnv(testClusterCatalogNameEnv)
if !ok {
fmt.Printf("%q is not set", testClusterCatalogNameEnv)
Expand Down
Loading