diff --git a/.github/workflows/check-file-updates.yaml b/.github/workflows/check-file-updates.yaml index b6b7f0f1fee..8c27a6e6c8d 100644 --- a/.github/workflows/check-file-updates.yaml +++ b/.github/workflows/check-file-updates.yaml @@ -3,42 +3,34 @@ on: pull_request: jobs: file-updates: - permissions: - pull-requests: write name: Ensure generated files are included runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - name: Generate files - id: generate-files + - uses: actions/checkout@v4.2.2 + with: + ref: ${{github.event.pull_request.head.ref}} + repository: ${{github.event.pull_request.head.repo.full_name}} + - name: Save the PR number for artifact upload run: | - CMD="make generate manifests api-docs" - $CMD - echo "CMD=$CMD" >> $GITHUB_OUTPUT + echo ${{ github.event.number }} > pr_number.txt + - name: Upload the PR number as artifact + id: artifact-upload + uses: actions/upload-artifact@v4 + with: + name: pr_number + path: ./pr_number.txt + retention-days: 1 # This will delete the generated artifacts every day. + - name: Generate files + run: make generate manifests api-docs - name: Ensure generated files are up-to-date id: check_generated_files run : | + rm ./pr_number.txt # remove the pr_number.txt before checking "git status", to have correct assessment of the changed files. if [[ -n $(git status -s) ]] then echo "Generated files have been missed in the PR" git diff - echo "missing_generated_files=true" >> $GITHUB_OUTPUT + exit 1 else echo "No new files to commit" - echo "missing_generated_files=false" >> $GITHUB_OUTPUT - fi - - name: Report issue in PR - if: ${{ steps.check_generated_files.outputs.missing_generated_files == 'true' }} - uses: thollander/actions-comment-pull-request@v2 - with: - message: | - ## This PR can't be merged just yet 😢 - - Please run `${{ steps.generate-files.outputs.CMD }}` and commit the changes. - - For more info: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - - name: Print git status and fail pr - if: ${{ steps.check_generated_files.outputs.missing_generated_files == 'true' }} - run: | - git status - exit 1 + fi \ No newline at end of file diff --git a/.github/workflows/comment-on-pr.yaml b/.github/workflows/comment-on-pr.yaml new file mode 100644 index 00000000000..77272613b05 --- /dev/null +++ b/.github/workflows/comment-on-pr.yaml @@ -0,0 +1,59 @@ +name: Comment on pr +on: + workflow_run: + workflows: ["Check config and readme updates"] + types: + - completed +jobs: + download-artifact-data: + if: ${{ github.event.workflow_run.conclusion == 'failure' }} + runs-on: ubuntu-latest + outputs: + pr_number: ${{ steps.artifact-data.outputs.pr_number }} + steps: + - name: Download artifact + id: artifact-download + uses: actions/github-script@v7 + with: + script: | + let allArtifacts = await github.rest.actions.listWorkflowRunArtifacts({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: context.payload.workflow_run.id, + }); + + let matchArtifact = allArtifacts.data.artifacts.filter((artifact) => { + return artifact.name == "pr_number" + })[0]; + + let download = await github.rest.actions.downloadArtifact({ + owner: context.repo.owner, + repo: context.repo.repo, + artifact_id: matchArtifact.id, + archive_format: 'zip', + }); + let fs = require('fs'); + fs.writeFileSync(`${process.env.GITHUB_WORKSPACE}/pr_number.zip`, Buffer.from(download.data)); + - name: Unzip artifact + run: unzip pr_number.zip + - name: Extract data + id: artifact-data + run: | + echo "pr_number=$(head -n 1 pr_number.txt)" >> $GITHUB_OUTPUT + comment-on-pr: + needs: + - download-artifact-data + runs-on: ubuntu-latest + permissions: + pull-requests: write + steps: + - name: Report issue in PR + uses: thollander/actions-comment-pull-request@v3.0.1 + with: + message: | + ## This PR can't be merged just yet 😢 + + Please run `make generate manifests api-docs` and commit the changes. + + For more info: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.event.workflow_run.id }} + pr-number: ${{ needs.download-artifact-data.outputs.pr_number }} \ No newline at end of file diff --git a/.github/workflows/linter.yaml b/.github/workflows/linter.yaml index 4df641cb6b1..ebbc76c2529 100644 --- a/.github/workflows/linter.yaml +++ b/.github/workflows/linter.yaml @@ -22,5 +22,5 @@ jobs: - name: golangci-lint uses: golangci/golangci-lint-action@v6 with: - version: v1.60.2 + version: v1.61.0 args: --timeout 5m0s diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml index b7f470a58d6..15d58f02b64 100644 --- a/.github/workflows/unit-tests.yaml +++ b/.github/workflows/unit-tests.yaml @@ -23,6 +23,6 @@ jobs: run: make unit-test - name: Upload results to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v4.6.0 with: token: ${{ secrets.CODECOV_TOKEN }} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d6a99206393..6fd268aa3db 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -20,7 +20,7 @@ Issues are tracked using [Jira](https://issues.redhat.com/secure/RapidBoard.jspa 1. **Fork the Repository:** Create your own fork of the repository to work on your changes. 2. **Create a Branch:** Create your own branch to include changes for the feature or a bug fix off of `incubation` branch. 3. **Work on Your Changes:** Commit often, and ensure your code adheres to these [Code Style Guidelines](#code-style-guidelines) and passes all the [quality gates](#quality-gates) for the operator. -4. **Testing:** Make sure your code passes all the tests, including any new tests you've added. +4. **Testing:** Make sure your code passes all the tests, including any new tests you've added. And that your changes do not decrease the test coverage as shown on report. Every new feature should come with unit tests that cover that new part of the code. ### Open a Pull Request: diff --git a/Dockerfiles/Dockerfile b/Dockerfiles/Dockerfile index 2c6743185ff..a74e73a1135 100644 --- a/Dockerfiles/Dockerfile +++ b/Dockerfiles/Dockerfile @@ -1,27 +1,22 @@ # Build the manager binary ARG GOLANG_VERSION=1.21 -ARG USE_LOCAL=false -ARG OVERWRITE_MANIFESTS="" ################################################################################ -FROM registry.access.redhat.com/ubi8/go-toolset:$GOLANG_VERSION as builder_local_false -ARG OVERWRITE_MANIFESTS -# Get all manifests from remote git repo to builder_local_false by script +FROM registry.access.redhat.com/ubi8/toolbox as manifests +ARG USE_LOCAL=false +ARG OVERWRITE_MANIFESTS="" USER root WORKDIR / -COPY get_all_manifests.sh get_all_manifests.sh -RUN ./get_all_manifests.sh ${OVERWRITE_MANIFESTS} - -################################################################################ -FROM registry.access.redhat.com/ubi8/go-toolset:$GOLANG_VERSION as builder_local_true -# Get all manifests from local to builder_local_true -USER root -WORKDIR /opt -# copy local manifests to build COPY opt/manifests/ /opt/manifests/ +COPY get_all_manifests.sh get_all_manifests.sh +RUN if [ "${USE_LOCAL}" != "true" ]; then \ + rm -rf /opt/manifests/*; \ + ./get_all_manifests.sh ${OVERWRITE_MANIFESTS}; \ + fi ################################################################################ -FROM builder_local_${USE_LOCAL} as builder +FROM registry.access.redhat.com/ubi8/go-toolset:$GOLANG_VERSION as builder +ARG CGO_ENABLED=1 USER root WORKDIR /workspace # Copy the Go Modules manifests @@ -39,13 +34,13 @@ COPY main.go main.go COPY pkg/ pkg/ # Build -RUN CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -a -o manager main.go +RUN CGO_ENABLED=${CGO_ENABLED} GOOS=linux GOARCH=amd64 go build -a -o manager main.go ################################################################################ FROM registry.access.redhat.com/ubi8/ubi-minimal:latest WORKDIR / COPY --from=builder /workspace/manager . -COPY --chown=1001:0 --from=builder /opt/manifests /opt/manifests +COPY --chown=1001:0 --from=manifests /opt/manifests /opt/manifests # Recursive change all files RUN chown -R 1001:0 /opt/manifests &&\ chmod -R g=u /opt/manifests diff --git a/Makefile b/Makefile index ef081108979..9fe129993f9 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ # To re-generate a bundle for another specific version without changing the standard setup, you can: # - use the VERSION as arg of the bundle target (e.g make bundle VERSION=0.0.2) # - use environment variables to overwrite this value (e.g export VERSION=0.0.2) -VERSION ?= 2.19.0 +VERSION ?= 2.21.0 # IMAGE_TAG_BASE defines the opendatahub.io namespace and part of the image name for remote images. # This variable is used to construct full image tags for bundle and catalog images. # @@ -22,7 +22,8 @@ BUNDLE_IMG ?= $(IMAGE_TAG_BASE)-bundle:v$(VERSION) IMAGE_BUILDER ?= podman OPERATOR_NAMESPACE ?= opendatahub-operator-system DEFAULT_MANIFESTS_PATH ?= opt/manifests - +CGO_ENABLED ?= 1 +USE_LOCAL = false CHANNELS="fast" # CHANNELS define the bundle channels used in the bundle. @@ -68,7 +69,7 @@ YQ ?= $(LOCALBIN)/yq KUSTOMIZE_VERSION ?= v5.0.2 CONTROLLER_GEN_VERSION ?= v0.16.1 OPERATOR_SDK_VERSION ?= v1.31.0 -GOLANGCI_LINT_VERSION ?= v1.60.2 +GOLANGCI_LINT_VERSION ?= v1.61.0 YQ_VERSION ?= v4.12.2 # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. ENVTEST_K8S_VERSION = 1.31.0 @@ -94,7 +95,8 @@ E2E_TEST_FLAGS = -timeout 25m # Default image-build is to not use local odh-manifests folder # set to "true" to use local instead # see target "image-build" -IMAGE_BUILD_FLAGS ?= --build-arg USE_LOCAL=false +IMAGE_BUILD_FLAGS ?= --build-arg USE_LOCAL=$(USE_LOCAL) +IMAGE_BUILD_FLAGS += --build-arg CGO_ENABLED=$(CGO_ENABLED) # Read any custom variables overrides from a local.mk file. This will only be read if it exists in the # same directory as this Makefile. Variables can be specified in the standard format supported by @@ -189,9 +191,11 @@ run: manifests generate fmt vet ## Run a controller from your host. .PHONY: run-nowebhook run-nowebhook: GO_RUN_ARGS += -tags nowebhook + run-nowebhook: manifests generate fmt vet ## Run a controller from your host without webhook enabled $(GO_RUN_MAIN) + .PHONY: image-build image-build: # unit-test ## Build image with the manager. $(IMAGE_BUILDER) build --no-cache -f Dockerfiles/Dockerfile ${IMAGE_BUILD_FLAGS} -t $(IMG) . @@ -381,6 +385,7 @@ CLEANFILES += cover.out e2e-test: ## Run e2e tests for the controller go test ./tests/e2e/ -run ^TestOdhOperator -v --operator-namespace=${OPERATOR_NAMESPACE} ${E2E_TEST_FLAGS} +.PHONY: clean clean: $(GOLANGCI_LINT) $(GOLANGCI_LINT) cache clean chmod u+w -R $(LOCALBIN) # envtest makes its dir RO diff --git a/OWNERS_ALIASES b/OWNERS_ALIASES index f7f3153ed45..5f5f754ded9 100644 --- a/OWNERS_ALIASES +++ b/OWNERS_ALIASES @@ -1,8 +1,6 @@ aliases: platform: - - adelton - AjayJagan - - ajaypratap003 - asanzgom - biswassri - CFSNM @@ -15,8 +13,10 @@ aliases: - lphiri - MarianMacik - mattmahoneyrh + - robotmaxtron - Sara4994 - StevenTobin + - ugiordan - VaishnaviHire - ykaliuta - zdtsw \ No newline at end of file diff --git a/README.md b/README.md index 9a0618e2dfc..c7f16214b7d 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ and configure these applications. - [Deployment](#deployment) - [Test with customized manifests](#test-with-customized-manifests) - [Update API docs](#update-api-docs) + - [Enabled logging](#enabled-logging) - [Example DSCInitialization](#example-dscinitialization) - [Example DataScienceCluster](#example-datasciencecluster) - [Run functional Tests](#run-functional-tests) @@ -134,21 +135,21 @@ make image-build By default, building an image without any local changes(as a clean build) This is what the production build system is doing. -In order to build an image with local `opt/manifests` folder, to set `IMAGE_BUILD_FLAGS ="--build-arg USE_LOCAL=true"` in make. -e.g `make image-build -e IMAGE_BUILD_FLAGS="--build-arg USE_LOCAL=true"` +In order to build an image with local `opt/manifests` folder set `USE_LOCAL` make variable to `true` +e.g `make image-build USE_LOCAL=true"` #### Build Image - Custom operator image can be built using your local repository ```commandline - make image -e IMG=quay.io//opendatahub-operator: + make image IMG=quay.io//opendatahub-operator: ``` or (for example to user ) ```commandline - make image -e IMAGE_OWNER= + make image IMAGE_OWNER= ``` The default image used is `quay.io/opendatahub/opendatahub-operator:dev-0.0.1` when not supply argument for `make image` @@ -173,7 +174,7 @@ e.g `make image-build -e IMAGE_BUILD_FLAGS="--build-arg USE_LOCAL=true"` - Deploy the created image in your cluster using following command: ```commandline - make deploy -e IMG=quay.io//opendatahub-operator: -e OPERATOR_NAMESPACE= + make deploy IMG=quay.io//opendatahub-operator: OPERATOR_NAMESPACE= ``` - To remove resources created during installation use: @@ -222,26 +223,18 @@ This will ensure that the doc for the apis are updated accordingly. ### Enabled logging -#### Controller level +Global logger configuration can be changed with an environemnt variable `ZAP_LOG_LEVEL` +or a command line switch `--log-mode ` for example from CSV. +Command line switch has higher priority. +Valid values for ``: "" (as default) || prod || production || devel || development. -Logger on all controllers can only be changed from CSV with parameters: --log-mode devel -valid value: "" (as default) || prod || production || devel || development +Verbosity level is INFO. +To fine tune zap backend [standard operator sdk zap switches](https://sdk.operatorframework.io/docs/building-operators/golang/references/logging/) +can be used. -This mainly impacts logging for operator pod startup, generating common resource, monitoring deployment. - -| --log-mode value | mapping Log level | Comments | -| ---------------- | ------------------- | -------------- | -| devel | debug / 0 | lowest level | -| "" | info / 1 | default option | -| default | info / 1 | default option | -| prod | error / 2 | highest level | - -#### Component level - -Logger on components can be changed by DSCI devFlags during runtime. -By default, if not set .spec.devFlags.logmode, it uses INFO level -Modification applies to all components, not only these "Managed" ones. -Update DSCI CR with .spec.devFlags.logmode, see example : +Log level can be changed by DSCI devFlags during runtime by setting +.spec.devFlags.logLevel. It accepts the same values as `--zap-log-level` +command line switch. See example : ```console apiVersion: dscinitialization.opendatahub.io/v1 @@ -250,20 +243,17 @@ metadata: name: default-dsci spec: devFlags: - logmode: development + logLevel: debug ... ``` -Avaiable value for logmode is "devel", "development", "prod", "production". -The first two work the same set to DEBUG level; the later two work the same as using ERROR level. - -| .spec.devFlags.logmode | stacktrace level | verbosity | Output | Comments | -| ---------------------- | ---------------- | --------- | -------- | -------------- | -| devel | WARN | INFO | Console | lowest level, using epoch time | -| development | WARN | INFO | Console | same as devel | -| "" | ERROR | INFO | JSON | default option | -| prod | ERROR | INFO | JSON | highest level, using human readable timestamp | -| production | ERROR | INFO | JSON | same as prod | +| logmode | stacktrace level | verbosity | Output | Comments | +|-------------|------------------|-----------|---------|-----------------------------------------------| +| devel | WARN | INFO | Console | lowest level, using epoch time | +| development | WARN | INFO | Console | same as devel | +| "" | ERROR | INFO | JSON | default option | +| prod | ERROR | INFO | JSON | highest level, using human readable timestamp | +| production | ERROR | INFO | JSON | same as prod | ### Example DSCInitialization @@ -405,7 +395,7 @@ variable. Following table lists all the available flags to run the tests: Example command to run full test suite skipping the test for DataScienceCluster deletion. ```shell -make e2e-test -e OPERATOR_NAMESPACE= -e E2E_TEST_FLAGS="--skip-deletion=true" +make e2e-test OPERATOR_NAMESPACE= E2E_TEST_FLAGS="--skip-deletion=true" ``` Example commands to run test suite for the dashboard `component` only, with the operator running out of the cluster. diff --git a/apis/dscinitialization/v1/dscinitialization_types.go b/apis/dscinitialization/v1/dscinitialization_types.go index 4aa600255b8..c283286021d 100644 --- a/apis/dscinitialization/v1/dscinitialization_types.go +++ b/apis/dscinitialization/v1/dscinitialization_types.go @@ -69,9 +69,13 @@ type DevFlags struct { // Custom manifests uri for odh-manifests // +optional ManifestsUri string `json:"manifestsUri,omitempty"` + // ## DEPRECATED ##: Ignored, use LogLevel instead // +kubebuilder:validation:Enum=devel;development;prod;production;default // +kubebuilder:default="production" LogMode string `json:"logmode,omitempty"` + // Override Zap log level. Can be "debug", "info", "error" or a number (more verbose). + // +optional + LogLevel string `json:"logLevel,omitempty"` } type TrustedCABundleSpec struct { diff --git a/bundle/manifests/dscinitialization.opendatahub.io_dscinitializations.yaml b/bundle/manifests/dscinitialization.opendatahub.io_dscinitializations.yaml index 175ec126c22..a4138f67837 100644 --- a/bundle/manifests/dscinitialization.opendatahub.io_dscinitializations.yaml +++ b/bundle/manifests/dscinitialization.opendatahub.io_dscinitializations.yaml @@ -67,8 +67,13 @@ spec: Internal development useful field to test customizations. This is not recommended to be used in production environment. properties: + logLevel: + description: Override Zap log level. Can be "debug", "info", "error" + or a number (more verbose). + type: string logmode: default: production + description: '## DEPRECATED ##: Ignored, use LogLevel instead' enum: - devel - development diff --git a/bundle/manifests/opendatahub-operator.clusterserviceversion.yaml b/bundle/manifests/opendatahub-operator.clusterserviceversion.yaml index d79bdba158d..a1f903ae63d 100644 --- a/bundle/manifests/opendatahub-operator.clusterserviceversion.yaml +++ b/bundle/manifests/opendatahub-operator.clusterserviceversion.yaml @@ -105,9 +105,9 @@ metadata: capabilities: Full Lifecycle categories: AI/Machine Learning, Big Data certified: "False" - containerImage: quay.io/opendatahub/opendatahub-operator:v2.19.0 - createdAt: "2024-12-04T17:35:30Z" - olm.skipRange: '>=1.0.0 <2.19.0' + containerImage: quay.io/opendatahub/opendatahub-operator:v2.21.0 + createdAt: "2024-11-22T19:16:14Z" + olm.skipRange: '>=1.0.0 <2.21.0' operators.operatorframework.io/builder: operator-sdk-v1.31.0 operators.operatorframework.io/internal-objects: '["featuretrackers.features.opendatahub.io", "codeflares.components.platform.opendatahub.io", "dashboards.components.platform.opendatahub.io", @@ -118,7 +118,7 @@ metadata: "monitorings.services.platform.opendatahub.io","modelcontrollers.components.platform.opendatahub.io"]' operators.operatorframework.io/project_layout: go.kubebuilder.io/v3 repository: https://github.com/opendatahub-io/opendatahub-operator - name: opendatahub-operator.v2.19.0 + name: opendatahub-operator.v2.21.0 namespace: placeholder spec: apiservicedefinitions: {} @@ -249,11 +249,11 @@ spec: built on top of Kubeflow Notebook Controller with support for OAuth\n* Jupyter Notebooks - JupyterLab notebook that provide Python support for GPU workloads\n* Data Science Pipelines - Pipeline solution for end to end MLOps workflows that - support the Kubeflow Pipelines SDK and Tekton\n* Model Mesh - ModelMesh Serving - is the Controller for managing ModelMesh, a general-purpose model serving management/routing - layer\n* Distributed Workloads(Incubation) - Stack built to make managing distributed - compute infrastructure in the cloud easy and intuitive for Data Scientists. This - stack consists of three components \n Codeflare + support the Kubeflow Pipelines SDK and Argo Workflows\n* Model Mesh - ModelMesh + Serving is the Controller for managing ModelMesh, a general-purpose model serving + management/routing layer\n* Distributed Workloads(Incubation) - Stack built to + make managing distributed compute infrastructure in the cloud easy and intuitive + for Data Scientists. This stack consists of three components \n Codeflare , KubeRay and Kueue.\n* Kserve - Kserve is the Controller for for serving machine learning (ML) models on arbitrary frameworks" displayName: Open Data Hub Operator @@ -501,6 +501,7 @@ spec: - apiGroups: - config.openshift.io resources: + - authentications - clusterversions verbs: - get @@ -1353,7 +1354,7 @@ spec: selector: matchLabels: component: opendatahub-operator - version: 2.19.0 + version: 2.21.0 webhookdefinitions: - admissionReviewVersions: - v1 diff --git a/components/Component Reconcile Workflow.png b/components/Component Reconcile Workflow.png deleted file mode 100644 index e3b37957fc0..00000000000 Binary files a/components/Component Reconcile Workflow.png and /dev/null differ diff --git a/components/README.md b/components/README.md deleted file mode 100644 index 3e1b5c86ba4..00000000000 --- a/components/README.md +++ /dev/null @@ -1,68 +0,0 @@ -# Component Integration - -The `components` dir of the codebase is hosts all the component specific logic of the operator. Since, ODH operator is an -integration point to deploy ODH component manifests it is essential to have common processes to integrate new components. - -## Integrating a new component - -To ensure a component is integrated seamlessly in the operator, follow the steps below: - -### Add Component to DataScienceCluster API spec - -DataScienceCluster CRD is responsible for defining the component fields and exposing them to end users. -Add your component to it's [api spec](../docs/api-overview.md#datascienceclusterspec): - -```go -type Components struct { - NewComponent newcomponent.newComponentName `json:"newcomponent,omitempty"` -} -``` - -### Add Component module - -- Add a new module, ``, under `components/` directory to define code specific to the new component. Example -can be found [here](https://github.com/opendatahub-io/opendatahub-operator/tree/main/components/datasciencepipelines) -- Define `Path` and `ComponentName` [variables](https://github.com/opendatahub-io/opendatahub-operator/blob/main/components/datasciencepipelines/datasciencepipelines.go#L11) for the new component. - -### Implement common Interface - -- Define struct that includes a shared struct `Component` with common fields. -- Implement [interface](https://github.com/opendatahub-io/opendatahub-operator/blob/main/components/component.go#L15) methods according to your component - - ```go - type ComponentInterface interface { - ReconcileComponent(ctx context.Context, cli client.Client, logger logr.Logger, owner metav1.Object, DSCISpec *dsciv1.DSCInitializationSpec, currentComponentStatus bool) error - Cleanup(cli client.Client, DSCISpec *dsciv1.DSCInitializationSpec) error - GetComponentName() string - GetManagementState() operatorv1.ManagementState - OverrideManifests(platform cluster.Platform) error - UpdatePrometheusConfig(cli client.Client, enable bool, component string) error - } - ``` - -### Add reconcile and Events - -- Once you set up the new component module, add the component to [Reconcile](https://github.com/opendatahub-io/opendatahub-operator/blob/acaaf31f43e371456363f3fd272aec91ba413482/controllers/datasciencecluster/datasciencecluster_controller.go#L135) - function in order to deploy manifests. -- This will also enable/add status updates of the component in the operator. - -### Reconcile Workflow -![Component Reconcile Workflow.png](Component%20Reconcile%20Workflow.png) - -### Add Unit and e2e tests - -- Components should add `unit` tests for any component specific functions added to the codebase -- Components should update [e2e tests](https://github.com/opendatahub-io/opendatahub-operator/tree/main/tests/e2e) to - capture deployments introduced by the new component -## Integrated Components - -- [Dashboard](https://github.com/opendatahub-io/opendatahub-operator/tree/main/components/dashboard) -- [Codeflare](https://github.com/opendatahub-io/opendatahub-operator/tree/main/components/codeflare) -- [Ray](https://github.com/opendatahub-io/opendatahub-operator/tree/main/components/ray) -- [Data Science Pipelines](https://github.com/opendatahub-io/opendatahub-operator/tree/main/components/datasciencepipelines) -- [KServe](https://github.com/opendatahub-io/opendatahub-operator/tree/main/components/kserve) -- [ModelMesh Serving](https://github.com/opendatahub-io/opendatahub-operator/tree/main/components/modelmeshserving) -- [Workbenches](https://github.com/opendatahub-io/opendatahub-operator/tree/main/components/workbenches) -- [TrustyAI](https://github.com/opendatahub-io/opendatahub-operator/tree/main/components/trustyai) -- [ModelRegistry](https://github.com/opendatahub-io/opendatahub-operator/tree/main/components/modelregistry) -- [Kueue](https://github.com/opendatahub-io/kueue) diff --git a/config/crd/bases/dscinitialization.opendatahub.io_dscinitializations.yaml b/config/crd/bases/dscinitialization.opendatahub.io_dscinitializations.yaml index d1feaa25578..d1750a44756 100644 --- a/config/crd/bases/dscinitialization.opendatahub.io_dscinitializations.yaml +++ b/config/crd/bases/dscinitialization.opendatahub.io_dscinitializations.yaml @@ -67,8 +67,13 @@ spec: Internal development useful field to test customizations. This is not recommended to be used in production environment. properties: + logLevel: + description: Override Zap log level. Can be "debug", "info", "error" + or a number (more verbose). + type: string logmode: default: production + description: '## DEPRECATED ##: Ignored, use LogLevel instead' enum: - devel - development diff --git a/config/crd/external/config.openshift.io_authentications.yaml b/config/crd/external/config.openshift.io_authentications.yaml new file mode 100644 index 00000000000..86ad306c7fd --- /dev/null +++ b/config/crd/external/config.openshift.io_authentications.yaml @@ -0,0 +1,175 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + name: authentications.config.openshift.io +spec: + group: config.openshift.io + names: + kind: Authentication + listKind: AuthenticationList + plural: authentications + singular: authentication + scope: Cluster + versions: + - name: v1 + schema: + openAPIV3Schema: + description: |- + Authentication specifies cluster-wide settings for authentication (like OAuth and + webhook token authenticators). The canonical name of an instance is `cluster`. + + Compatibility level 1: Stable within a major release for a minimum of 12 months or 3 minor releases (whichever is longer). + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: spec holds user settable values for configuration + properties: + oauthMetadata: + description: |- + oauthMetadata contains the discovery endpoint data for OAuth 2.0 + Authorization Server Metadata for an external OAuth server. + This discovery document can be viewed from its served location: + oc get --raw '/.well-known/oauth-authorization-server' + For further details, see the IETF Draft: + https://tools.ietf.org/html/draft-ietf-oauth-discovery-04#section-2 + If oauthMetadata.name is non-empty, this value has precedence + over any metadata reference stored in status. + The key "oauthMetadata" is used to locate the data. + If specified and the config map or expected key is not found, no metadata is served. + If the specified metadata is not valid, no metadata is served. + The namespace for this config map is openshift-config. + properties: + name: + description: name is the metadata.name of the referenced config + map + type: string + required: + - name + type: object + serviceAccountIssuer: + description: |- + serviceAccountIssuer is the identifier of the bound service account token + issuer. + The default is https://kubernetes.default.svc + WARNING: Updating this field will not result in immediate invalidation of all bound tokens with the + previous issuer value. Instead, the tokens issued by previous service account issuer will continue to + be trusted for a time period chosen by the platform (currently set to 24h). + This time period is subject to change over time. + This allows internal components to transition to use new service account issuer without service distruption. + type: string + type: + description: |- + type identifies the cluster managed, user facing authentication mode in use. + Specifically, it manages the component that responds to login attempts. + The default is IntegratedOAuth. + type: string + webhookTokenAuthenticator: + description: |- + webhookTokenAuthenticator configures a remote token reviewer. + These remote authentication webhooks can be used to verify bearer tokens + via the tokenreviews.authentication.k8s.io REST API. This is required to + honor bearer tokens that are provisioned by an external authentication service. + properties: + kubeConfig: + description: |- + kubeConfig references a secret that contains kube config file data which + describes how to access the remote webhook service. + The namespace for the referenced secret is openshift-config. + + For further details, see: + + https://kubernetes.io/docs/reference/access-authn-authz/authentication/#webhook-token-authentication + + The key "kubeConfig" is used to locate the data. + If the secret or expected key is not found, the webhook is not honored. + If the specified kube config data is not valid, the webhook is not honored. + properties: + name: + description: name is the metadata.name of the referenced secret + type: string + required: + - name + type: object + required: + - kubeConfig + type: object + webhookTokenAuthenticators: + description: webhookTokenAuthenticators is DEPRECATED, setting it + has no effect. + items: + description: |- + deprecatedWebhookTokenAuthenticator holds the necessary configuration options for a remote token authenticator. + It's the same as WebhookTokenAuthenticator but it's missing the 'required' validation on KubeConfig field. + properties: + kubeConfig: + description: |- + kubeConfig contains kube config file data which describes how to access the remote webhook service. + For further details, see: + https://kubernetes.io/docs/reference/access-authn-authz/authentication/#webhook-token-authentication + The key "kubeConfig" is used to locate the data. + If the secret or expected key is not found, the webhook is not honored. + If the specified kube config data is not valid, the webhook is not honored. + The namespace for this secret is determined by the point of use. + properties: + name: + description: name is the metadata.name of the referenced + secret + type: string + required: + - name + type: object + type: object + type: array + type: object + status: + description: status holds observed values from the cluster. They may not + be overridden. + properties: + integratedOAuthMetadata: + description: |- + integratedOAuthMetadata contains the discovery endpoint data for OAuth 2.0 + Authorization Server Metadata for the in-cluster integrated OAuth server. + This discovery document can be viewed from its served location: + oc get --raw '/.well-known/oauth-authorization-server' + For further details, see the IETF Draft: + https://tools.ietf.org/html/draft-ietf-oauth-discovery-04#section-2 + This contains the observed value based on cluster state. + An explicitly set value in spec.oauthMetadata has precedence over this field. + This field has no meaning if authentication spec.type is not set to IntegratedOAuth. + The key "oauthMetadata" is used to locate the data. + If the config map or expected key is not found, no metadata is served. + If the specified metadata is not valid, no metadata is served. + The namespace for this config map is openshift-config-managed. + properties: + name: + description: name is the metadata.name of the referenced config + map + type: string + required: + - name + type: object + type: object + required: + - spec + type: object + served: true + storage: true diff --git a/config/manifests/bases/opendatahub-operator.clusterserviceversion.yaml b/config/manifests/bases/opendatahub-operator.clusterserviceversion.yaml index 6a80bc23642..698ca519c01 100644 --- a/config/manifests/bases/opendatahub-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/opendatahub-operator.clusterserviceversion.yaml @@ -6,9 +6,9 @@ metadata: capabilities: Full Lifecycle categories: AI/Machine Learning, Big Data certified: "False" - containerImage: quay.io/opendatahub/opendatahub-operator:v2.19.0 - createdAt: "2024-10-09T00:00:00Z" - olm.skipRange: '>=1.0.0 <2.19.0' + containerImage: quay.io/opendatahub/opendatahub-operator:v2.21.0 + createdAt: "2024-11-18T00:00:00Z" + olm.skipRange: '>=1.0.0 <2.21.0' operators.operatorframework.io/internal-objects: '["featuretrackers.features.opendatahub.io", "codeflares.components.platform.opendatahub.io", "dashboards.components.platform.opendatahub.io", "datasciencepipelines.components.platform.opendatahub.io", "kserves.components.platform.opendatahub.io", @@ -17,7 +17,7 @@ metadata: "trainingoperators.components.platform.opendatahub.io", "trustyais.components.platform.opendatahub.io", "workbenches.components.platform.opendatahub.io", "monitorings.services.platform.opendatahub.io","modelcontrollers.components.platform.opendatahub.io"]' repository: https://github.com/opendatahub-io/opendatahub-operator - name: opendatahub-operator.v2.19.0 + name: opendatahub-operator.v2.21.0 namespace: placeholder spec: apiservicedefinitions: {} @@ -174,4 +174,4 @@ spec: selector: matchLabels: component: opendatahub-operator - version: 2.19.0 + version: 2.21.0 diff --git a/config/manifests/description-patch.yml b/config/manifests/description-patch.yml index 74d47766222..23357a48002 100644 --- a/config/manifests/description-patch.yml +++ b/config/manifests/description-patch.yml @@ -13,7 +13,7 @@ spec: * Open Data Hub Dashboard - A web dashboard that displays installed Open Data Hub components with easy access to component UIs and documentation * ODH Notebook Controller - Secure management of Jupyter Notebook in Kubernetes environments built on top of Kubeflow Notebook Controller with support for OAuth * Jupyter Notebooks - JupyterLab notebook that provide Python support for GPU workloads - * Data Science Pipelines - Pipeline solution for end to end MLOps workflows that support the Kubeflow Pipelines SDK and Tekton + * Data Science Pipelines - Pipeline solution for end to end MLOps workflows that support the Kubeflow Pipelines SDK and Argo Workflows * Model Mesh - ModelMesh Serving is the Controller for managing ModelMesh, a general-purpose model serving management/routing layer * Distributed Workloads(Incubation) - Stack built to make managing distributed compute infrastructure in the cloud easy and intuitive for Data Scientists. This stack consists of three components Codeflare , KubeRay and Kueue. diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index ccfa7439677..9d91b3e60d0 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -241,6 +241,7 @@ rules: - apiGroups: - config.openshift.io resources: + - authentications - clusterversions verbs: - get diff --git a/controllers/certconfigmapgenerator/certconfigmapgenerator_controller.go b/controllers/certconfigmapgenerator/certconfigmapgenerator_controller.go index a9565eb6d19..283bd24db90 100644 --- a/controllers/certconfigmapgenerator/certconfigmapgenerator_controller.go +++ b/controllers/certconfigmapgenerator/certconfigmapgenerator_controller.go @@ -5,7 +5,6 @@ import ( "context" "reflect" - "github.com/go-logr/logr" operatorv1 "github.com/openshift/api/operator/v1" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" @@ -16,6 +15,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/handler" + logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -29,13 +29,11 @@ import ( type CertConfigmapGeneratorReconciler struct { *odhClient.Client Scheme *runtime.Scheme - Log logr.Logger } // SetupWithManager sets up the controller with the Manager. -func (r *CertConfigmapGeneratorReconciler) SetupWithManager(mgr ctrl.Manager) error { - log := r.Log - log.Info("Adding controller for Configmap Generation.") +func (r *CertConfigmapGeneratorReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { + logf.FromContext(ctx).Info("Adding controller for Configmap Generation.") return ctrl.NewControllerManagedBy(mgr). Named("cert-configmap-generator-controller"). Watches(&corev1.ConfigMap{}, handler.EnqueueRequestsFromMapFunc(r.watchTrustedCABundleConfigMapResource), builder.WithPredicates(ConfigMapChangedPredicate)). @@ -46,7 +44,7 @@ func (r *CertConfigmapGeneratorReconciler) SetupWithManager(mgr ctrl.Manager) er // Reconcile will generate new configmap, odh-trusted-ca-bundle, that includes cluster-wide trusted-ca bundle and custom // ca bundle in every new namespace created. func (r *CertConfigmapGeneratorReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - log := r.Log + log := logf.FromContext(ctx).WithName("CertConfigmapGenerator") // Request includes namespace that is newly created or where odh-trusted-ca-bundle configmap is updated. log.Info("Reconciling CertConfigMapGenerator.", " Request.Namespace", req.NamespacedName) // Get namespace instance @@ -110,8 +108,8 @@ func (r *CertConfigmapGeneratorReconciler) watchNamespaceResource(_ context.Cont return nil } -func (r *CertConfigmapGeneratorReconciler) watchTrustedCABundleConfigMapResource(_ context.Context, a client.Object) []reconcile.Request { - log := r.Log +func (r *CertConfigmapGeneratorReconciler) watchTrustedCABundleConfigMapResource(ctx context.Context, a client.Object) []reconcile.Request { + log := logf.FromContext(ctx) if a.GetName() == trustedcabundle.CAConfigMapName { log.Info("Cert configmap has been updated, start reconcile") return []reconcile.Request{{NamespacedName: types.NamespacedName{Name: a.GetName(), Namespace: a.GetNamespace()}}} diff --git a/controllers/components/dashboard/dashboard_support.go b/controllers/components/dashboard/dashboard_support.go index d3461c48d65..bbd18abc1ba 100644 --- a/controllers/components/dashboard/dashboard_support.go +++ b/controllers/components/dashboard/dashboard_support.go @@ -25,36 +25,36 @@ var ( PathManagedDownstream = PathDownstream + "/addon" adminGroups = map[cluster.Platform]string{ - cluster.SelfManagedRhods: "rhods-admins", - cluster.ManagedRhods: "dedicated-admins", + cluster.SelfManagedRhoai: "rhods-admins", + cluster.ManagedRhoai: "dedicated-admins", cluster.OpenDataHub: "odh-admins", cluster.Unknown: "odh-admins", } sectionTitle = map[cluster.Platform]string{ - cluster.SelfManagedRhods: "OpenShift Self Managed Services", - cluster.ManagedRhods: "OpenShift Managed Services", + cluster.SelfManagedRhoai: "OpenShift Self Managed Services", + cluster.ManagedRhoai: "OpenShift Managed Services", cluster.OpenDataHub: "OpenShift Open Data Hub", cluster.Unknown: "OpenShift Open Data Hub", } baseConsoleURL = map[cluster.Platform]string{ - cluster.SelfManagedRhods: "https://rhods-dashboard-", - cluster.ManagedRhods: "https://rhods-dashboard-", + cluster.SelfManagedRhoai: "https://rhods-dashboard-", + cluster.ManagedRhoai: "https://rhods-dashboard-", cluster.OpenDataHub: "https://odh-dashboard-", cluster.Unknown: "https://odh-dashboard-", } manifestPaths = map[cluster.Platform]string{ - cluster.SelfManagedRhods: PathSelfDownstream, - cluster.ManagedRhods: PathManagedDownstream, + cluster.SelfManagedRhoai: PathSelfDownstream, + cluster.ManagedRhoai: PathManagedDownstream, cluster.OpenDataHub: PathUpstream, cluster.Unknown: PathUpstream, } serviceAccounts = map[cluster.Platform][]string{ - cluster.SelfManagedRhods: {"rhods-dashboard"}, - cluster.ManagedRhods: {"rhods-dashboard"}, + cluster.SelfManagedRhoai: {"rhods-dashboard"}, + cluster.ManagedRhoai: {"rhods-dashboard"}, cluster.OpenDataHub: {"odh-dashboard"}, cluster.Unknown: {"odh-dashboard"}, } @@ -89,7 +89,7 @@ func computeComponentName() string { release := cluster.GetRelease() name := ComponentNameUpstream - if release.Name == cluster.SelfManagedRhods || release.Name == cluster.ManagedRhods { + if release.Name == cluster.SelfManagedRhoai || release.Name == cluster.ManagedRhoai { name = ComponentNameDownstream } diff --git a/controllers/components/kserve/kserve_support.go b/controllers/components/kserve/kserve_support.go index ffff51c3984..43a175ffa05 100644 --- a/controllers/components/kserve/kserve_support.go +++ b/controllers/components/kserve/kserve_support.go @@ -34,8 +34,8 @@ import ( var serviceAccounts = map[cluster.Platform][]string{ cluster.Unknown: {odhModelControllerComponentName}, cluster.OpenDataHub: {odhModelControllerComponentName}, - cluster.ManagedRhods: {odhModelControllerComponentName}, - cluster.SelfManagedRhods: {odhModelControllerComponentName}, + cluster.ManagedRhoai: {odhModelControllerComponentName}, + cluster.SelfManagedRhoai: {odhModelControllerComponentName}, } func kserveManifestInfo(sourcePath string) odhtypes.ManifestInfo { diff --git a/controllers/components/modelcontroller/modelcontroller_controller.go b/controllers/components/modelcontroller/modelcontroller_controller.go index e85ee21837a..4c58b3b191a 100644 --- a/controllers/components/modelcontroller/modelcontroller_controller.go +++ b/controllers/components/modelcontroller/modelcontroller_controller.go @@ -42,8 +42,8 @@ import ( ) var serviceAccounts = map[cluster.Platform][]string{ - cluster.SelfManagedRhods: {componentApi.ModelControllerComponentName}, - cluster.ManagedRhods: {componentApi.ModelControllerComponentName}, + cluster.SelfManagedRhoai: {componentApi.ModelControllerComponentName}, + cluster.ManagedRhoai: {componentApi.ModelControllerComponentName}, cluster.OpenDataHub: {componentApi.ModelControllerComponentName}, cluster.Unknown: {componentApi.ModelControllerComponentName}, } diff --git a/controllers/components/modelmeshserving/modelmeshserving_controller.go b/controllers/components/modelmeshserving/modelmeshserving_controller.go index 2780cc5fc3f..75e61523f2e 100644 --- a/controllers/components/modelmeshserving/modelmeshserving_controller.go +++ b/controllers/components/modelmeshserving/modelmeshserving_controller.go @@ -41,8 +41,8 @@ import ( ) var serviceAccounts = map[cluster.Platform][]string{ - cluster.SelfManagedRhods: {"modelmesh", "modelmesh-controller"}, - cluster.ManagedRhods: {"modelmesh", "modelmesh-controller"}, + cluster.SelfManagedRhoai: {"modelmesh", "modelmesh-controller"}, + cluster.ManagedRhoai: {"modelmesh", "modelmesh-controller"}, cluster.OpenDataHub: {"modelmesh", "modelmesh-controller"}, cluster.Unknown: {"modelmesh", "modelmesh-controller"}, } diff --git a/controllers/components/trustyai/trustyai.go b/controllers/components/trustyai/trustyai.go index 0c39a262986..40ad9243d80 100644 --- a/controllers/components/trustyai/trustyai.go +++ b/controllers/components/trustyai/trustyai.go @@ -22,8 +22,8 @@ const ( var ( SourcePath = map[cluster.Platform]string{ - cluster.SelfManagedRhods: "/overlays/rhoai", - cluster.ManagedRhods: "/overlays/rhoai", + cluster.SelfManagedRhoai: "/overlays/rhoai", + cluster.ManagedRhoai: "/overlays/rhoai", cluster.OpenDataHub: "/overlays/odh", cluster.Unknown: "/overlays/odh", } diff --git a/controllers/components/workbenches/workbenches_controller_actions.go b/controllers/components/workbenches/workbenches_controller_actions.go index 01dbebaf1b7..32b2b198d32 100644 --- a/controllers/components/workbenches/workbenches_controller_actions.go +++ b/controllers/components/workbenches/workbenches_controller_actions.go @@ -93,7 +93,7 @@ func configureDependencies(ctx context.Context, rr *odhtypes.ReconciliationReque } platform := rr.Release.Name - if platform == cluster.SelfManagedRhods || platform == cluster.ManagedRhods { + if platform == cluster.SelfManagedRhoai || platform == cluster.ManagedRhoai { // Intentionally leaving the ownership unset for this namespace. // Specifying this label triggers its deletion when the operator is uninstalled. if err := rr.AddResources(&corev1.Namespace{ diff --git a/controllers/components/workbenches/workbenches_support.go b/controllers/components/workbenches/workbenches_support.go index 1ad9c39f98b..67561702c6e 100644 --- a/controllers/components/workbenches/workbenches_support.go +++ b/controllers/components/workbenches/workbenches_support.go @@ -23,8 +23,8 @@ const ( ) var serviceAccounts = map[cluster.Platform][]string{ - cluster.SelfManagedRhods: {nbcServiceAccountName}, - cluster.ManagedRhods: {nbcServiceAccountName}, + cluster.SelfManagedRhoai: {nbcServiceAccountName}, + cluster.ManagedRhoai: {nbcServiceAccountName}, cluster.OpenDataHub: {nbcServiceAccountName}, cluster.Unknown: {nbcServiceAccountName}, } diff --git a/controllers/datasciencecluster/datasciencecluster_controller.go b/controllers/datasciencecluster/datasciencecluster_controller.go index 85ac0f8a557..016905f989e 100644 --- a/controllers/datasciencecluster/datasciencecluster_controller.go +++ b/controllers/datasciencecluster/datasciencecluster_controller.go @@ -23,7 +23,6 @@ import ( "fmt" "time" - "github.com/go-logr/logr" operatorv1 "github.com/openshift/api/operator/v1" admissionregistrationv1 "k8s.io/api/admissionregistration/v1" appsv1 "k8s.io/api/apps/v1" @@ -39,6 +38,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/handler" + logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -59,7 +59,6 @@ import ( type DataScienceClusterReconciler struct { *odhClient.Client Scheme *runtime.Scheme - Log logr.Logger // Recorder to generate events Recorder record.EventRecorder DataScienceCluster *DataScienceClusterConfig @@ -77,7 +76,7 @@ const ( // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. func (r *DataScienceClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - log := r.Log + log := logf.FromContext(ctx).WithName("DataScienceCluster") log.Info("Reconciling DataScienceCluster resources", "Request.Name", req.Name) // Get information on version and platform @@ -161,7 +160,7 @@ func (r *DataScienceClusterReconciler) Reconcile(ctx context.Context, req ctrl.R saved.Status.Phase = status.PhaseError }) if err != nil { - r.reportError(err, instance, "failed to update DataScienceCluster condition") + r.reportError(ctx, err, instance, "failed to update DataScienceCluster condition") return ctrl.Result{}, err } @@ -210,7 +209,7 @@ func (r *DataScienceClusterReconciler) Reconcile(ctx context.Context, req ctrl.R saved.Status.Release = currentOperatorRelease }) if err != nil { - _ = r.reportError(err, instance, fmt.Sprintf("failed to add conditions to status of DataScienceCluster resource name %s", req.Name)) + _ = r.reportError(ctx, err, instance, fmt.Sprintf("failed to add conditions to status of DataScienceCluster resource name %s", req.Name)) return ctrl.Result{}, err } @@ -268,17 +267,18 @@ func (r *DataScienceClusterReconciler) ReconcileComponent( instance *dscv1.DataScienceCluster, component cr.ComponentHandler, ) (*dscv1.DataScienceCluster, error) { + log := logf.FromContext(ctx) componentName := component.GetName() - r.Log.Info("Starting reconciliation of component: " + componentName) + log.Info("Starting reconciliation of component: " + componentName) enabled := component.GetManagementState(instance) == operatorv1.Managed componentCR := component.NewCRObject(instance) err := r.apply(ctx, instance, componentCR) if err != nil { - r.Log.Error(err, "Failed to reconciled component CR: "+componentName) - instance = r.reportError(err, instance, fmt.Sprintf("failed to reconciled %s by DSC", componentName)) + log.Error(err, "Failed to reconciled component CR: "+componentName) + instance = r.reportError(ctx, err, instance, fmt.Sprintf("failed to reconciled %s by DSC", componentName)) instance, _ = status.UpdateWithRetry(ctx, r.Client, instance, func(saved *dscv1.DataScienceCluster) { status.SetComponentCondition(&saved.Status.Conditions, componentName, status.ReconcileFailed, fmt.Sprintf("Component reconciliation failed: %v", err), corev1.ConditionFalse) }) @@ -286,7 +286,7 @@ func (r *DataScienceClusterReconciler) ReconcileComponent( } // TODO: check component status before update DSC status to successful .GetStatus().Phase == "Ready" - r.Log.Info("component reconciled successfully: " + componentName) + log.Info("component reconciled successfully: " + componentName) instance, err = status.UpdateWithRetry(ctx, r.Client, instance, func(saved *dscv1.DataScienceCluster) { if saved.Status.InstalledComponents == nil { saved.Status.InstalledComponents = make(map[string]bool) @@ -307,8 +307,8 @@ func (r *DataScienceClusterReconciler) ReconcileComponent( // TODO: report failed component status .GetStatus().Phase == "NotReady" not only creation if err != nil { - r.Log.Error(err, "Failed to reconcile component: "+componentName) - instance = r.reportError(err, instance, fmt.Sprintf("failed to reconcile %s", componentName)) + log.Error(err, "Failed to reconcile component: "+componentName) + instance = r.reportError(ctx, err, instance, fmt.Sprintf("failed to reconcile %s", componentName)) instance, _ = status.UpdateWithRetry(ctx, r.Client, instance, func(saved *dscv1.DataScienceCluster) { status.SetComponentCondition(&saved.Status.Conditions, componentName, status.ReconcileFailed, fmt.Sprintf("Component reconciliation failed: %v", err), corev1.ConditionFalse) }) @@ -317,9 +317,8 @@ func (r *DataScienceClusterReconciler) ReconcileComponent( return instance, nil } -func (r *DataScienceClusterReconciler) reportError(err error, instance *dscv1.DataScienceCluster, message string) *dscv1.DataScienceCluster { - log := r.Log - log.Error(err, message, "instance.Name", instance.Name) +func (r *DataScienceClusterReconciler) reportError(ctx context.Context, err error, instance *dscv1.DataScienceCluster, message string) *dscv1.DataScienceCluster { + logf.FromContext(ctx).Error(err, message, "instance.Name", instance.Name) r.Recorder.Eventf(instance, corev1.EventTypeWarning, "DataScienceClusterReconcileError", "%s for instance %s", message, instance.Name) return instance diff --git a/controllers/dscinitialization/dscinitialization_controller.go b/controllers/dscinitialization/dscinitialization_controller.go index 77ae0b1bdf9..4aa7cd8ecc3 100644 --- a/controllers/dscinitialization/dscinitialization_controller.go +++ b/controllers/dscinitialization/dscinitialization_controller.go @@ -22,13 +22,13 @@ import ( "path/filepath" "reflect" - "github.com/go-logr/logr" operatorv1 "github.com/openshift/api/operator/v1" routev1 "github.com/openshift/api/route/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" networkingv1 "k8s.io/api/networking/v1" rbacv1 "k8s.io/api/rbac/v1" + k8serr "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" @@ -39,6 +39,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/handler" + logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -48,6 +49,7 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" odhClient "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/client" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/logger" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/trustedcabundle" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/upgrade" ) @@ -64,14 +66,13 @@ var managementStateChangeTrustedCA = false type DSCInitializationReconciler struct { *odhClient.Client Scheme *runtime.Scheme - Log logr.Logger Recorder record.EventRecorder ApplicationsNamespace string } // Reconcile contains controller logic specific to DSCInitialization instance updates. func (r *DSCInitializationReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { //nolint:funlen,gocyclo,maintidx - log := r.Log + log := logf.FromContext(ctx).WithName("DSCInitialization") log.Info("Reconciling DSCInitialization.", "DSCInitialization Request.Name", req.Name) currentOperatorRelease := cluster.GetRelease() @@ -94,6 +95,15 @@ func (r *DSCInitializationReconciler) Reconcile(ctx context.Context, req ctrl.Re instance = &instances.Items[0] } + if instance.Spec.DevFlags != nil { + level := instance.Spec.DevFlags.LogLevel + log.V(1).Info("Setting log level", "level", level) + err := logger.SetLevel(level) + if err != nil { + log.Error(err, "Failed to set log level", "level", level) + } + } + if instance.ObjectMeta.DeletionTimestamp.IsZero() { if !controllerutil.ContainsFinalizer(instance, finalizerName) { log.Info("Adding finalizer for DSCInitialization", "name", instance.Name, "finalizer", finalizerName) @@ -177,7 +187,7 @@ func (r *DSCInitializationReconciler) Reconcile(ctx context.Context, req ctrl.Re switch req.Name { case "prometheus": // prometheus configmap - if instance.Spec.Monitoring.ManagementState == operatorv1.Managed && platform == cluster.ManagedRhods { + if instance.Spec.Monitoring.ManagementState == operatorv1.Managed && platform == cluster.ManagedRhoai { log.Info("Monitoring enabled to restart deployment", "cluster", "Managed Service Mode") err := r.configureManagedMonitoring(ctx, instance, "updates") if err != nil { @@ -187,7 +197,7 @@ func (r *DSCInitializationReconciler) Reconcile(ctx context.Context, req ctrl.Re return ctrl.Result{}, nil case "addon-managed-odh-parameters": - if instance.Spec.Monitoring.ManagementState == operatorv1.Managed && platform == cluster.ManagedRhods { + if instance.Spec.Monitoring.ManagementState == operatorv1.Managed && platform == cluster.ManagedRhoai { log.Info("Monitoring enabled when notification updated", "cluster", "Managed Service Mode") err := r.configureManagedMonitoring(ctx, instance, "updates") if err != nil { @@ -197,7 +207,7 @@ func (r *DSCInitializationReconciler) Reconcile(ctx context.Context, req ctrl.Re return ctrl.Result{}, nil case "backup": // revert back to the original prometheus.yml - if instance.Spec.Monitoring.ManagementState == operatorv1.Managed && platform == cluster.ManagedRhods { + if instance.Spec.Monitoring.ManagementState == operatorv1.Managed && platform == cluster.ManagedRhoai { log.Info("Monitoring enabled to restore back", "cluster", "Managed Service Mode") err := r.configureManagedMonitoring(ctx, instance, "revertbackup") if err != nil { @@ -207,11 +217,21 @@ func (r *DSCInitializationReconciler) Reconcile(ctx context.Context, req ctrl.Re return ctrl.Result{}, nil default: + createUsergroup, err := cluster.IsDefaultAuthMethod(ctx, r.Client) + if err != nil && !k8serr.IsNotFound(err) { // only keep reconcile if real error but not missing CRD or missing CR + return ctrl.Result{}, err + } + switch platform { - case cluster.SelfManagedRhods: - err := r.createUserGroup(ctx, instance, "rhods-admins") - if err != nil { - return reconcile.Result{}, err + case cluster.SelfManagedRhoai: + // Check if user opted for disabling creating user groups + if !createUsergroup { + log.Info("DSCI disabled usergroup creation") + } else { + err := r.createUserGroup(ctx, instance, "rhods-admins") + if err != nil { + return reconcile.Result{}, err + } } if instance.Spec.Monitoring.ManagementState == operatorv1.Managed { log.Info("Monitoring enabled, won't apply changes", "cluster", "Self-Managed RHODS Mode") @@ -220,7 +240,7 @@ func (r *DSCInitializationReconciler) Reconcile(ctx context.Context, req ctrl.Re return reconcile.Result{}, err } } - case cluster.ManagedRhods: + case cluster.ManagedRhoai: osdConfigsPath := filepath.Join(deploy.DefaultManifestPath, "osd-configs") err = deploy.DeployManifestsFromPath(ctx, r.Client, instance, osdConfigsPath, r.ApplicationsNamespace, "osd", true) if err != nil { @@ -241,9 +261,14 @@ func (r *DSCInitializationReconciler) Reconcile(ctx context.Context, req ctrl.Re } } default: - err := r.createUserGroup(ctx, instance, "odh-admins") - if err != nil { - return reconcile.Result{}, err + // Check if user opted for disabling creating user groups + if !createUsergroup { + log.Info("DSCI disabled usergroup creation") + } else { + err := r.createUserGroup(ctx, instance, "odh-admins") + if err != nil { + return reconcile.Result{}, err + } } if instance.Spec.Monitoring.ManagementState == operatorv1.Managed { log.Info("Monitoring enabled, won't apply changes", "cluster", "ODH Mode") @@ -370,8 +395,8 @@ var dsciPredicateStateChangeTrustedCA = predicate.Funcs{ }, } -func (r *DSCInitializationReconciler) watchMonitoringConfigMapResource(_ context.Context, a client.Object) []reconcile.Request { - log := r.Log +func (r *DSCInitializationReconciler) watchMonitoringConfigMapResource(ctx context.Context, a client.Object) []reconcile.Request { + log := logf.FromContext(ctx) if a.GetName() == "prometheus" && a.GetNamespace() == "redhat-ods-monitoring" { log.Info("Found monitoring configmap has updated, start reconcile") @@ -380,8 +405,8 @@ func (r *DSCInitializationReconciler) watchMonitoringConfigMapResource(_ context return nil } -func (r *DSCInitializationReconciler) watchMonitoringSecretResource(_ context.Context, a client.Object) []reconcile.Request { - log := r.Log +func (r *DSCInitializationReconciler) watchMonitoringSecretResource(ctx context.Context, a client.Object) []reconcile.Request { + log := logf.FromContext(ctx) operatorNs, err := cluster.GetOperatorNamespace() if err != nil { return nil @@ -396,7 +421,7 @@ func (r *DSCInitializationReconciler) watchMonitoringSecretResource(_ context.Co } func (r *DSCInitializationReconciler) watchDSCResource(ctx context.Context) []reconcile.Request { - log := r.Log + log := logf.FromContext(ctx) instanceList := &dscv1.DataScienceClusterList{} if err := r.Client.List(ctx, instanceList); err != nil { // do not handle if cannot get list diff --git a/controllers/dscinitialization/dscinitialization_test.go b/controllers/dscinitialization/dscinitialization_test.go index 7acfc39aedb..47f9fc60ec6 100644 --- a/controllers/dscinitialization/dscinitialization_test.go +++ b/controllers/dscinitialization/dscinitialization_test.go @@ -4,6 +4,7 @@ import ( "context" operatorv1 "github.com/openshift/api/operator/v1" + userv1 "github.com/openshift/api/user/v1" corev1 "k8s.io/api/core/v1" networkingv1 "k8s.io/api/networking/v1" rbacv1 "k8s.io/api/rbac/v1" @@ -23,6 +24,7 @@ const ( workingNamespace = "test-operator-ns" applicationName = "default-dsci" applicationNamespace = "test-application-ns" + usergroupName = "odh-admins" configmapName = "odh-common-config" monitoringNamespace = "test-monitoring-ns" readyPhase = "Ready" @@ -111,6 +113,14 @@ var _ = Describe("DataScienceCluster initialization", func() { Expect(foundConfigMap.Data).To(Equal(expectedConfigmapData)) }) + It("Should not create user group when we do not have authentications CR in the cluster", func(ctx context.Context) { + userGroup := &userv1.Group{} + Eventually(objectExists(usergroupName, "", userGroup)). + WithContext(ctx). + WithTimeout(timeout). + WithPolling(interval). + Should(BeFalse()) + }) }) Context("Monitoring Resource", func() { @@ -343,9 +353,9 @@ func namespaceExists(ns string, obj client.Object) func(ctx context.Context) boo } } -func objectExists(ns string, name string, obj client.Object) func(ctx context.Context) bool { //nolint:unparam +func objectExists(name string, namespace string, obj client.Object) func(ctx context.Context) bool { return func(ctx context.Context) bool { - err := k8sClient.Get(ctx, client.ObjectKey{Name: ns, Namespace: name}, obj) + err := k8sClient.Get(ctx, client.ObjectKey{Name: name, Namespace: namespace}, obj) return err == nil } diff --git a/controllers/dscinitialization/kubebuilder_rbac.go b/controllers/dscinitialization/kubebuilder_rbac.go index 7ad6efacd59..866f0d65822 100644 --- a/controllers/dscinitialization/kubebuilder_rbac.go +++ b/controllers/dscinitialization/kubebuilder_rbac.go @@ -6,6 +6,9 @@ package dscinitialization // +kubebuilder:rbac:groups="features.opendatahub.io",resources=featuretrackers,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="features.opendatahub.io",resources=featuretrackers/status,verbs=get;update;patch;delete +/* Auth */ +// +kubebuilder:rbac:groups="config.openshift.io",resources=authentications,verbs=get;watch;list + /* Service Mesh Integration */ // +kubebuilder:rbac:groups="maistra.io",resources=servicemeshcontrolplanes,verbs=create;get;list;patch;update;use;watch // +kubebuilder:rbac:groups="maistra.io",resources=servicemeshmemberrolls,verbs=create;get;list;patch;update;use;watch diff --git a/controllers/dscinitialization/monitoring.go b/controllers/dscinitialization/monitoring.go index d07ef553fdc..2d34ed0df31 100644 --- a/controllers/dscinitialization/monitoring.go +++ b/controllers/dscinitialization/monitoring.go @@ -15,6 +15,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + logf "sigs.k8s.io/controller-runtime/pkg/log" dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" @@ -35,7 +36,7 @@ var ( // only when reconcile on DSCI CR, initial set to true // if reconcile from monitoring, initial set to false, skip blackbox and rolebinding. func (r *DSCInitializationReconciler) configureManagedMonitoring(ctx context.Context, dscInit *dsciv1.DSCInitialization, initial string) error { - log := r.Log + log := logf.FromContext(ctx) if initial == "init" { // configure Blackbox exporter if err := configureBlackboxExporter(ctx, dscInit, r); err != nil { @@ -85,7 +86,7 @@ func (r *DSCInitializationReconciler) configureManagedMonitoring(ctx context.Con } func configureAlertManager(ctx context.Context, dsciInit *dsciv1.DSCInitialization, r *DSCInitializationReconciler) error { - log := r.Log + log := logf.FromContext(ctx) // Get Deadmansnitch secret deadmansnitchSecret, err := r.waitForManagedSecret(ctx, "redhat-rhods-deadmanssnitch", dsciInit.Spec.Monitoring.Namespace) if err != nil { @@ -196,7 +197,7 @@ func configureAlertManager(ctx context.Context, dsciInit *dsciv1.DSCInitializati } func configurePrometheus(ctx context.Context, dsciInit *dsciv1.DSCInitialization, r *DSCInitializationReconciler) error { - log := r.Log + log := logf.FromContext(ctx) // Update rolebinding-viewer err := common.ReplaceStringsInFile(filepath.Join(prometheusManifestsPath, "prometheus-rolebinding-viewer.yaml"), map[string]string{ @@ -344,7 +345,7 @@ func configurePrometheus(ctx context.Context, dsciInit *dsciv1.DSCInitialization } func configureBlackboxExporter(ctx context.Context, dsciInit *dsciv1.DSCInitialization, r *DSCInitializationReconciler) error { - log := r.Log + log := logf.FromContext(ctx) consoleRoute := &routev1.Route{} err := r.Client.Get(ctx, client.ObjectKey{Name: "console", Namespace: "openshift-console"}, consoleRoute) if err != nil { @@ -414,7 +415,7 @@ func createMonitoringProxySecret(ctx context.Context, cli client.Client, name st } foundProxySecret := &corev1.Secret{} - err = cli.Get(ctx, client.ObjectKey{Name: name, Namespace: dsciInit.Spec.Monitoring.Namespace}, foundProxySecret) + err = cli.Get(ctx, client.ObjectKeyFromObject(desiredProxySecret), foundProxySecret) if err != nil { if k8serr.IsNotFound(err) { // Set Controller reference @@ -434,7 +435,7 @@ func createMonitoringProxySecret(ctx context.Context, cli client.Client, name st } func (r *DSCInitializationReconciler) configureSegmentIO(ctx context.Context, dsciInit *dsciv1.DSCInitialization) error { - log := r.Log + log := logf.FromContext(ctx) // create segment.io only when configmap does not exist in the cluster segmentioConfigMap := &corev1.ConfigMap{} if err := r.Client.Get(ctx, client.ObjectKey{ @@ -463,7 +464,7 @@ func (r *DSCInitializationReconciler) configureSegmentIO(ctx context.Context, ds } func (r *DSCInitializationReconciler) configureCommonMonitoring(ctx context.Context, dsciInit *dsciv1.DSCInitialization) error { - log := r.Log + log := logf.FromContext(ctx) if err := r.configureSegmentIO(ctx, dsciInit); err != nil { return err } diff --git a/controllers/dscinitialization/servicemesh_setup.go b/controllers/dscinitialization/servicemesh_setup.go index aef6daba07b..ed3e5a9424b 100644 --- a/controllers/dscinitialization/servicemesh_setup.go +++ b/controllers/dscinitialization/servicemesh_setup.go @@ -9,6 +9,7 @@ import ( conditionsv1 "github.com/openshift/custom-resource-status/conditions/v1" corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" + logf "sigs.k8s.io/controller-runtime/pkg/log" dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" "github.com/opendatahub-io/opendatahub-operator/v2/controllers/status" @@ -19,7 +20,7 @@ import ( ) func (r *DSCInitializationReconciler) configureServiceMesh(ctx context.Context, instance *dsciv1.DSCInitialization) error { - log := r.Log + log := logf.FromContext(ctx) serviceMeshManagementState := operatorv1.Removed if instance.Spec.ServiceMesh != nil { serviceMeshManagementState = instance.Spec.ServiceMesh.ManagementState @@ -62,7 +63,7 @@ func (r *DSCInitializationReconciler) configureServiceMesh(ctx context.Context, } func (r *DSCInitializationReconciler) removeServiceMesh(ctx context.Context, instance *dsciv1.DSCInitialization) error { - log := r.Log + log := logf.FromContext(ctx) // on condition of Managed, do not handle Removed when set to Removed it trigger DSCI reconcile to clean up if instance.Spec.ServiceMesh == nil { return nil diff --git a/controllers/dscinitialization/suite_test.go b/controllers/dscinitialization/suite_test.go index aa7177f68a8..10ffa98e404 100644 --- a/controllers/dscinitialization/suite_test.go +++ b/controllers/dscinitialization/suite_test.go @@ -22,6 +22,7 @@ import ( "testing" "time" + configv1 "github.com/openshift/api/config/v1" routev1 "github.com/openshift/api/route/v1" templatev1 "github.com/openshift/api/template/v1" userv1 "github.com/openshift/api/user/v1" @@ -78,6 +79,7 @@ func TestDataScienceClusterInitialization(t *testing.T) { var testScheme = runtime.NewScheme() +//nolint:fatcontext var _ = BeforeSuite(func() { // can't use suite's context as the manager should survive the function gCtx, gCancel = context.WithCancel(context.Background()) @@ -119,6 +121,7 @@ var _ = BeforeSuite(func() { utilruntime.Must(userv1.Install(testScheme)) utilruntime.Must(monitoringv1.AddToScheme(testScheme)) utilruntime.Must(templatev1.Install(testScheme)) + utilruntime.Must(configv1.Install(testScheme)) // +kubebuilder:scaffold:scheme k8sClient, err = client.New(cfg, client.Options{Scheme: testScheme}) @@ -144,7 +147,6 @@ var _ = BeforeSuite(func() { err = (&dscictrl.DSCInitializationReconciler{ Client: odhClient, Scheme: testScheme, - Log: ctrl.Log.WithName("controllers").WithName("DSCInitialization"), Recorder: mgr.GetEventRecorderFor("dscinitialization-controller"), }).SetupWithManager(gCtx, mgr) diff --git a/controllers/dscinitialization/utils.go b/controllers/dscinitialization/utils.go index b2990c68ef3..b434278a1ad 100644 --- a/controllers/dscinitialization/utils.go +++ b/controllers/dscinitialization/utils.go @@ -18,6 +18,7 @@ import ( "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + logf "sigs.k8s.io/controller-runtime/pkg/log" dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" @@ -37,7 +38,7 @@ var ( // - Network Policies 'opendatahub' that allow traffic between the ODH namespaces // - RoleBinding 'opendatahub'. func (r *DSCInitializationReconciler) createOdhNamespace(ctx context.Context, dscInit *dsciv1.DSCInitialization, name string, platform cluster.Platform) error { - log := r.Log + log := logf.FromContext(ctx) // Expected application namespace for the given name desiredNamespace := &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ @@ -51,7 +52,7 @@ func (r *DSCInitializationReconciler) createOdhNamespace(ctx context.Context, ds // Create Application Namespace if it doesn't exist foundNamespace := &corev1.Namespace{} - err := r.Get(ctx, client.ObjectKey{Name: name}, foundNamespace) + err := r.Get(ctx, client.ObjectKeyFromObject(desiredNamespace), foundNamespace) if err != nil { if k8serr.IsNotFound(err) { log.Info("Creating namespace", "name", name) @@ -142,7 +143,7 @@ func (r *DSCInitializationReconciler) createOdhNamespace(ctx context.Context, ds } func (r *DSCInitializationReconciler) createDefaultRoleBinding(ctx context.Context, name string, dscInit *dsciv1.DSCInitialization) error { - log := r.Log + log := logf.FromContext(ctx) // Expected namespace for the given name desiredRoleBinding := &rbacv1.RoleBinding{ TypeMeta: metav1.TypeMeta{ @@ -169,10 +170,7 @@ func (r *DSCInitializationReconciler) createDefaultRoleBinding(ctx context.Conte // Create RoleBinding if doesn't exists foundRoleBinding := &rbacv1.RoleBinding{} - err := r.Client.Get(ctx, client.ObjectKey{ - Name: name, - Namespace: name, - }, foundRoleBinding) + err := r.Client.Get(ctx, client.ObjectKeyFromObject(desiredRoleBinding), foundRoleBinding) if err != nil { if k8serr.IsNotFound(err) { // Set Controller reference @@ -193,10 +191,16 @@ func (r *DSCInitializationReconciler) createDefaultRoleBinding(ctx context.Conte } func (r *DSCInitializationReconciler) reconcileDefaultNetworkPolicy(ctx context.Context, name string, dscInit *dsciv1.DSCInitialization, platform cluster.Platform) error { - log := r.Log - if platform == cluster.ManagedRhods || platform == cluster.SelfManagedRhods { + log := logf.FromContext(ctx) + if platform == cluster.ManagedRhoai || platform == cluster.SelfManagedRhoai { + // Get operator namepsace + operatorNs, err := cluster.GetOperatorNamespace() + if err != nil { + log.Error(err, "error getting operator namespace for networkplicy creation") + return err + } // Deploy networkpolicy for operator namespace - err := deploy.DeployManifestsFromPath(ctx, r.Client, dscInit, networkpolicyPath+"/operator", "redhat-ods-operator", "networkpolicy", true) + err = deploy.DeployManifestsFromPath(ctx, r.Client, dscInit, networkpolicyPath+"/operator", operatorNs, "networkpolicy", true) if err != nil { log.Error(err, "error to set networkpolicy in operator namespace", "path", networkpolicyPath) return err @@ -286,10 +290,7 @@ func (r *DSCInitializationReconciler) reconcileDefaultNetworkPolicy(ctx context. // Create NetworkPolicy if it doesn't exist foundNetworkPolicy := &networkingv1.NetworkPolicy{} justCreated := false - err := r.Client.Get(ctx, client.ObjectKey{ - Name: name, - Namespace: name, - }, foundNetworkPolicy) + err := r.Client.Get(ctx, client.ObjectKeyFromObject(desiredNetworkPolicy), foundNetworkPolicy) if err != nil { if k8serr.IsNotFound(err) { // Set Controller reference @@ -375,7 +376,7 @@ func GenerateRandomHex(length int) ([]byte, error) { } func (r *DSCInitializationReconciler) createOdhCommonConfigMap(ctx context.Context, name string, dscInit *dsciv1.DSCInitialization) error { - log := r.Log + log := logf.FromContext(ctx) // Expected configmap for the given namespace desiredConfigMap := &corev1.ConfigMap{ TypeMeta: metav1.TypeMeta{ @@ -391,10 +392,7 @@ func (r *DSCInitializationReconciler) createOdhCommonConfigMap(ctx context.Conte // Create Configmap if doesn't exists foundConfigMap := &corev1.ConfigMap{} - err := r.Client.Get(ctx, client.ObjectKey{ - Name: name, - Namespace: name, - }, foundConfigMap) + err := r.Client.Get(ctx, client.ObjectKeyFromObject(desiredConfigMap), foundConfigMap) if err != nil { if k8serr.IsNotFound(err) { // Set Controller reference @@ -424,10 +422,7 @@ func (r *DSCInitializationReconciler) createUserGroup(ctx context.Context, dscIn // Otherwise is errors with "error": "Group.user.openshift.io \"odh-admins\" is invalid: users: Invalid value: \"null\": users in body must be of type array: \"null\""} Users: []string{}, } - err := r.Client.Get(ctx, client.ObjectKey{ - Name: userGroup.Name, - Namespace: dscInit.Spec.ApplicationsNamespace, - }, userGroup) + err := r.Client.Get(ctx, client.ObjectKeyFromObject(userGroup), userGroup) if err != nil { if k8serr.IsNotFound(err) { err = r.Client.Create(ctx, userGroup) diff --git a/controllers/secretgenerator/secretgenerator_controller.go b/controllers/secretgenerator/secretgenerator_controller.go index ac57125050c..a9fb0235513 100644 --- a/controllers/secretgenerator/secretgenerator_controller.go +++ b/controllers/secretgenerator/secretgenerator_controller.go @@ -23,7 +23,6 @@ import ( "fmt" "time" - "github.com/go-logr/logr" oauthv1 "github.com/openshift/api/oauth/v1" routev1 "github.com/openshift/api/route/v1" corev1 "k8s.io/api/core/v1" @@ -37,6 +36,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/handler" + logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -53,13 +53,11 @@ const ( type SecretGeneratorReconciler struct { *odhClient.Client Scheme *runtime.Scheme - Log logr.Logger } // SetupWithManager sets up the controller with the Manager. -func (r *SecretGeneratorReconciler) SetupWithManager(mgr ctrl.Manager) error { - log := r.Log - log.Info("Adding controller for Secret Generation.") +func (r *SecretGeneratorReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { + logf.FromContext(ctx).Info("Adding controller for Secret Generation.") // Watch only new secrets with the corresponding annotation predicates := predicate.Funcs{ @@ -107,82 +105,98 @@ func (r *SecretGeneratorReconciler) SetupWithManager(mgr ctrl.Manager) error { // based on the specified type and complexity. This will avoid possible race // conditions when a deployment mounts the secret before it is reconciled. func (r *SecretGeneratorReconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.Result, error) { - log := r.Log foundSecret := &corev1.Secret{} err := r.Client.Get(ctx, request.NamespacedName, foundSecret) if err != nil { - if k8serr.IsNotFound(err) { - // If Secret is deleted, delete OAuthClient if exists - err = r.deleteOAuthClient(ctx, request.Name) + if !k8serr.IsNotFound(err) { + return ctrl.Result{}, err } - return ctrl.Result{}, err - } + // If Secret is deleted, delete OAuthClient if exists + err = r.deleteOAuthClient(ctx, request.NamespacedName) + if err != nil { + return ctrl.Result{}, err + } - owner := []metav1.OwnerReference{ - *metav1.NewControllerRef(foundSecret, foundSecret.GroupVersionKind()), + return ctrl.Result{}, nil } + // Generate the secret if it does not previously exist generatedSecret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ - Name: foundSecret.Name + "-generated", - Namespace: foundSecret.Namespace, - Labels: foundSecret.Labels, - OwnerReferences: owner, + Name: foundSecret.Name + "-generated", + Namespace: foundSecret.Namespace, }, } - generatedSecretKey := types.NamespacedName{ - Name: generatedSecret.Name, Namespace: generatedSecret.Namespace, + err = r.Client.Get(ctx, client.ObjectKeyFromObject(generatedSecret), generatedSecret) + if err == nil || !k8serr.IsNotFound(err) { + return ctrl.Result{}, err } - err = r.Client.Get(ctx, generatedSecretKey, generatedSecret) + + err = r.generateSecret(ctx, foundSecret, generatedSecret) if err != nil { - if k8serr.IsNotFound(err) { - // Generate secret random value - log.Info("Generating a random value for a secret in a namespace", - "secret", generatedSecret.Name, "namespace", generatedSecret.Namespace) + return ctrl.Result{}, err + } - secret, err := NewSecretFrom(foundSecret.GetAnnotations()) - if err != nil { - log.Error(err, "error creating secret %s in %s", generatedSecret.Name, generatedSecret.Namespace) - return ctrl.Result{}, err - } + // Don't requeue if secret is created successfully + return ctrl.Result{}, nil +} - generatedSecret.StringData = map[string]string{ - secret.Name: secret.Value, - } +func (r *SecretGeneratorReconciler) generateSecret(ctx context.Context, foundSecret *corev1.Secret, generatedSecret *corev1.Secret) error { + log := logf.FromContext(ctx).WithName("SecretGenerator") - err = r.Client.Create(ctx, generatedSecret) - if err != nil { - return ctrl.Result{}, err - } - log.Info("Done generating secret in namespace", - "secret", generatedSecret.Name, "namespace", generatedSecret.Namespace) - // check if annotation oauth-client-route exists - if secret.OAuthClientRoute != "" { - // Get OauthClient Route - oauthClientRoute, err := r.getRoute(ctx, secret.OAuthClientRoute, request.Namespace) - if err != nil { - log.Error(err, "Unable to retrieve route from OAuthClient", "route-name", secret.OAuthClientRoute) - return ctrl.Result{}, err - } - // Generate OAuthClient for the generated secret - log.Info("Generating an OAuthClient CR for route", "route-name", oauthClientRoute.Name) - err = r.createOAuthClient(ctx, foundSecret.Name, secret.Value, oauthClientRoute.Spec.Host) - if err != nil { - log.Error(err, "error creating oauth client resource. Recreate the Secret", "secret-name", - foundSecret.Name) - - return ctrl.Result{}, err - } - } - } else { - return ctrl.Result{}, err - } + // Generate secret random value + log.Info("Generating a random value for a secret in a namespace", + "secret", generatedSecret.Name, "namespace", generatedSecret.Namespace) + + generatedSecret.Labels = foundSecret.Labels + + generatedSecret.OwnerReferences = []metav1.OwnerReference{ + *metav1.NewControllerRef(foundSecret, foundSecret.GroupVersionKind()), } - // Don't requeue if secret is created successfully - return ctrl.Result{}, err + secret, err := NewSecretFrom(foundSecret.GetAnnotations()) + if err != nil { + log.Error(err, "error creating secret %s in %s", generatedSecret.Name, generatedSecret.Namespace) + return err + } + + generatedSecret.StringData = map[string]string{ + secret.Name: secret.Value, + } + + err = r.Client.Create(ctx, generatedSecret) + if err != nil { + return err + } + + log.Info("Done generating secret in namespace", + "secret", generatedSecret.Name, "namespace", generatedSecret.Namespace) + + // check if annotation oauth-client-route exists + if secret.OAuthClientRoute == "" { + return nil + } + + // Get OauthClient Route + oauthClientRoute, err := r.getRoute(ctx, secret.OAuthClientRoute, foundSecret.Namespace) + if err != nil { + log.Error(err, "Unable to retrieve route from OAuthClient", "route-name", secret.OAuthClientRoute) + return err + } + + // Generate OAuthClient for the generated secret + log.Info("Generating an OAuthClient CR for route", "route-name", oauthClientRoute.Name) + err = r.createOAuthClient(ctx, foundSecret.Name, secret.Value, oauthClientRoute.Spec.Host) + if err != nil { + log.Error(err, "error creating oauth client resource. Recreate the Secret", "secret-name", + foundSecret.Name) + + return err + } + + return nil } // getRoute returns an OpenShift route object. It waits until the .spec.host value exists to avoid possible race conditions, fails otherwise. @@ -210,7 +224,7 @@ func (r *SecretGeneratorReconciler) getRoute(ctx context.Context, name string, n } func (r *SecretGeneratorReconciler) createOAuthClient(ctx context.Context, name string, secretName string, uri string) error { - log := r.Log + log := logf.FromContext(ctx) // Create OAuthClient resource oauthClient := &oauthv1.OAuthClient{ TypeMeta: metav1.TypeMeta{ @@ -244,17 +258,16 @@ func (r *SecretGeneratorReconciler) createOAuthClient(ctx context.Context, name return err } -func (r *SecretGeneratorReconciler) deleteOAuthClient(ctx context.Context, secretName string) error { - oauthClient := &oauthv1.OAuthClient{} - - err := r.Client.Get(ctx, client.ObjectKey{ - Name: secretName, - }, oauthClient) - if err != nil { - return client.IgnoreNotFound(err) +func (r *SecretGeneratorReconciler) deleteOAuthClient(ctx context.Context, secretNamespacedName types.NamespacedName) error { + oauthClient := &oauthv1.OAuthClient{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretNamespacedName.Name, + Namespace: secretNamespacedName.Namespace, + }, } - if err = r.Client.Delete(ctx, oauthClient); err != nil { + err := r.Client.Delete(ctx, oauthClient) + if err != nil && !k8serr.IsNotFound(err) { return fmt.Errorf("error deleting OAuthClient %s: %w", oauthClient.Name, err) } diff --git a/controllers/webhook/webhook.go b/controllers/webhook/webhook.go index 3b339d535fe..73ccd9060c5 100644 --- a/controllers/webhook/webhook.go +++ b/controllers/webhook/webhook.go @@ -23,6 +23,7 @@ import ( "fmt" "net/http" + "github.com/go-logr/logr" operatorv1 "github.com/openshift/api/operator/v1" admissionv1 "k8s.io/api/admission/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -30,6 +31,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" @@ -38,14 +40,25 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster/gvk" ) -var log = ctrl.Log.WithName("odh-controller-webhook") - //+kubebuilder:webhook:path=/validate-opendatahub-io-v1,mutating=false,failurePolicy=fail,sideEffects=None,groups=datasciencecluster.opendatahub.io;dscinitialization.opendatahub.io,resources=datascienceclusters;dscinitializations,verbs=create;delete,versions=v1,name=operator.opendatahub.io,admissionReviewVersions=v1 //nolint:lll +// TODO: Get rid of platform in name, rename to ValidatingWebhook. type OpenDataHubValidatingWebhook struct { Client client.Client Decoder *admission.Decoder + Name string +} + +// newLogConstructor creates a new logger constructor for a webhook. +// It is based on the root controller-runtime logger witch is set in main.go +// The purpose of it is to remove "admission" from the log name. +func newLogConstructor(name string) func(logr.Logger, *admission.Request) logr.Logger { + return func(_ logr.Logger, req *admission.Request) logr.Logger { + base := ctrl.Log + l := admission.DefaultLogConstructor(base, req) + return l.WithValues("webhook", name) + } } func Init(mgr ctrl.Manager) { @@ -60,7 +73,8 @@ func Init(mgr ctrl.Manager) { func (w *OpenDataHubValidatingWebhook) SetupWithManager(mgr ctrl.Manager) { hookServer := mgr.GetWebhookServer() odhWebhook := &webhook.Admission{ - Handler: w, + Handler: w, + LogConstructor: newLogConstructor(w.Name), } hookServer.Register("/validate-opendatahub-io-v1", odhWebhook) } @@ -90,6 +104,8 @@ func denyCountGtZero(ctx context.Context, cli client.Client, gvk schema.GroupVer } func (w *OpenDataHubValidatingWebhook) checkDupCreation(ctx context.Context, req admission.Request) admission.Response { + log := logf.FromContext(ctx) + switch req.Kind.Kind { case "DataScienceCluster", "DSCInitialization": default: @@ -119,6 +135,9 @@ func (w *OpenDataHubValidatingWebhook) checkDeletion(ctx context.Context, req ad } func (w *OpenDataHubValidatingWebhook) Handle(ctx context.Context, req admission.Request) admission.Response { + log := logf.FromContext(ctx).WithName(w.Name).WithValues("operation", req.Operation) + ctx = logf.IntoContext(ctx, log) + var resp admission.Response resp.Allowed = true // initialize Allowed to be true in case Operation falls into "default" case @@ -141,19 +160,23 @@ func (w *OpenDataHubValidatingWebhook) Handle(ctx context.Context, req admission //+kubebuilder:webhook:path=/mutate-opendatahub-io-v1,mutating=true,failurePolicy=fail,sideEffects=None,groups=datasciencecluster.opendatahub.io,resources=datascienceclusters,verbs=create;update,versions=v1,name=mutate.operator.opendatahub.io,admissionReviewVersions=v1 //nolint:lll -type DSCDefaulter struct{} +type DSCDefaulter struct { + Name string +} // just assert that DSCDefaulter implements webhook.CustomDefaulter. var _ webhook.CustomDefaulter = &DSCDefaulter{} func (m *DSCDefaulter) SetupWithManager(mgr ctrl.Manager) { mutateWebhook := admission.WithCustomDefaulter(mgr.GetScheme(), &dscv1.DataScienceCluster{}, m) + mutateWebhook.LogConstructor = newLogConstructor(m.Name) mgr.GetWebhookServer().Register("/mutate-opendatahub-io-v1", mutateWebhook) } // Implement admission.CustomDefaulter interface. // It currently only sets defaults for modelregiestry in datascienceclusters. func (m *DSCDefaulter) Default(_ context.Context, obj runtime.Object) error { + // TODO: add debug logging, log := logf.FromContext(ctx).WithName(m.Name) dsc, isDSC := obj.(*dscv1.DataScienceCluster) if !isDSC { return fmt.Errorf("expected DataScienceCluster but got a different type: %T", obj) diff --git a/controllers/webhook/webhook_suite_test.go b/controllers/webhook/webhook_suite_test.go index 84b0256658c..b936ca30188 100644 --- a/controllers/webhook/webhook_suite_test.go +++ b/controllers/webhook/webhook_suite_test.go @@ -73,6 +73,7 @@ func TestAPIs(t *testing.T) { RunSpecs(t, "Webhook Suite") } +//nolint:fatcontext var _ = BeforeSuite(func() { // can't use suite's context as the manager should survive the function gCtx, gCancel = context.WithCancel(context.Background()) diff --git a/docs/api-overview.md b/docs/api-overview.md index 65224e5a991..ff78777b9a2 100644 --- a/docs/api-overview.md +++ b/docs/api-overview.md @@ -1792,7 +1792,8 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | | `manifestsUri` _string_ | Custom manifests uri for odh-manifests | | | -| `logmode` _string_ | | production | Enum: [devel development prod production default]
| +| `logmode` _string_ | ## DEPRECATED ##: Ignored, use LogLevel instead | production | Enum: [devel development prod production default]
| +| `logLevel` _string_ | Override Zap log level. Can be "debug", "info", "error" or a number (more verbose). | | | #### TrustedCABundleSpec diff --git a/main.go b/main.go index 8e159a9437a..4ac39092986 100644 --- a/main.go +++ b/main.go @@ -24,6 +24,7 @@ import ( addonv1alpha1 "github.com/openshift/addon-operator/apis/addons/v1alpha1" ocappsv1 "github.com/openshift/api/apps/v1" //nolint:importas //reason: conflicts with appsv1 "k8s.io/api/apps/v1" buildv1 "github.com/openshift/api/build/v1" + configv1 "github.com/openshift/api/config/v1" consolev1 "github.com/openshift/api/console/v1" imagev1 "github.com/openshift/api/image/v1" oauthv1 "github.com/openshift/api/oauth/v1" @@ -162,7 +163,7 @@ func main() { //nolint:funlen,maintidx flag.Parse() - ctrl.SetLogger(logger.NewLoggerWithOptions(logmode, &opts)) + ctrl.SetLogger(logger.NewLogger(logmode, &opts)) // root context ctx := ctrl.SetupSignalHandler() @@ -223,6 +224,10 @@ func main() { //nolint:funlen,maintidx &operatorv1.IngressController{}: { Field: fields.Set{"metadata.name": "default"}.AsSelector(), }, + // For authentication CR "cluster" + &configv1.Authentication{}: { + Field: fields.Set{"metadata.name": cluster.ClusterAuthenticationObj}.AsSelector(), + }, // for prometheus and black-box deployment and ones we owns &appsv1.Deployment{}: {Namespaces: deploymentCache}, // kueue need prometheusrules @@ -280,7 +285,6 @@ func main() { //nolint:funlen,maintidx if err = (&dscictrl.DSCInitializationReconciler{ Client: oc, Scheme: mgr.GetScheme(), - Log: logger.LogWithLevel(ctrl.Log.WithName(operatorName).WithName("controllers").WithName("DSCInitialization"), logmode), Recorder: mgr.GetEventRecorderFor("dscinitialization-controller"), ApplicationsNamespace: dscApplicationsNamespace, }).SetupWithManager(ctx, mgr); err != nil { @@ -291,7 +295,6 @@ func main() { //nolint:funlen,maintidx if err = (&dscctrl.DataScienceClusterReconciler{ Client: oc, Scheme: mgr.GetScheme(), - Log: logger.LogWithLevel(ctrl.Log.WithName(operatorName).WithName("controllers").WithName("DataScienceCluster"), logmode), DataScienceCluster: &dscctrl.DataScienceClusterConfig{ DSCISpec: &dsciv1.DSCInitializationSpec{ ApplicationsNamespace: dscApplicationsNamespace, @@ -306,8 +309,7 @@ func main() { //nolint:funlen,maintidx if err = (&secretgenerator.SecretGeneratorReconciler{ Client: oc, Scheme: mgr.GetScheme(), - Log: logger.LogWithLevel(ctrl.Log.WithName(operatorName).WithName("controllers").WithName("SecretGenerator"), logmode), - }).SetupWithManager(mgr); err != nil { + }).SetupWithManager(ctx, mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "SecretGenerator") os.Exit(1) } @@ -315,8 +317,7 @@ func main() { //nolint:funlen,maintidx if err = (&certconfigmapgenerator.CertConfigmapGeneratorReconciler{ Client: oc, Scheme: mgr.GetScheme(), - Log: logger.LogWithLevel(ctrl.Log.WithName(operatorName).WithName("controllers").WithName("CertConfigmapGenerator"), logmode), - }).SetupWithManager(mgr); err != nil { + }).SetupWithManager(ctx, mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "CertConfigmapGenerator") os.Exit(1) } @@ -366,8 +367,8 @@ func main() { //nolint:funlen,maintidx } } - // Create default DSC CR for managed RHODS - if platform == cluster.ManagedRhods { + // Create default DSC CR for managed RHOAI + if platform == cluster.ManagedRhoai { var createDefaultDSCFunc manager.RunnableFunc = func(ctx context.Context) error { err := upgrade.CreateDefaultDSC(ctx, setupClient) if err != nil { @@ -402,6 +403,10 @@ func main() { //nolint:funlen,maintidx setupLog.Error(err, "unable to set up ready check") os.Exit(1) } + if err := initComponents(ctx, platform); err != nil { + setupLog.Error(err, "unable to init components") + os.Exit(1) + } setupLog.Info("starting manager") if err := mgr.Start(ctx); err != nil { @@ -416,11 +421,15 @@ func createSecretCacheConfig(platform cluster.Platform) map[string]cache.Config "openshift-ingress": {}, } switch platform { - case cluster.ManagedRhods: + case cluster.ManagedRhoai: namespaceConfigs["redhat-ods-monitoring"] = cache.Config{} namespaceConfigs["redhat-ods-applications"] = cache.Config{} - namespaceConfigs["redhat-ods-operator"] = cache.Config{} - case cluster.SelfManagedRhods: + operatorNs, err := cluster.GetOperatorNamespace() + if err != nil { + operatorNs = "redhat-ods-operator" // fall back case + } + namespaceConfigs[operatorNs] = cache.Config{} + case cluster.SelfManagedRhoai: namespaceConfigs["redhat-ods-applications"] = cache.Config{} default: namespaceConfigs["opendatahub"] = cache.Config{} @@ -431,10 +440,10 @@ func createSecretCacheConfig(platform cluster.Platform) map[string]cache.Config func createDeploymentCacheConfig(platform cluster.Platform) map[string]cache.Config { namespaceConfigs := map[string]cache.Config{} switch platform { - case cluster.ManagedRhods: // no need workbench NS, only SFS no Deployment + case cluster.ManagedRhoai: // no need workbench NS, only SFS no Deployment namespaceConfigs["redhat-ods-monitoring"] = cache.Config{} namespaceConfigs["redhat-ods-applications"] = cache.Config{} - case cluster.SelfManagedRhods: + case cluster.SelfManagedRhoai: namespaceConfigs["redhat-ods-applications"] = cache.Config{} default: namespaceConfigs["opendatahub"] = cache.Config{} diff --git a/pkg/cluster/cluster_config.go b/pkg/cluster/cluster_config.go index f855008cafe..08a1c445bfe 100644 --- a/pkg/cluster/cluster_config.go +++ b/pkg/cluster/cluster_config.go @@ -9,10 +9,12 @@ import ( "github.com/blang/semver/v4" "github.com/go-logr/logr" + configv1 "github.com/openshift/api/config/v1" "github.com/operator-framework/api/pkg/lib/version" ofapiv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" corev1 "k8s.io/api/core/v1" k8serr "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" "sigs.k8s.io/controller-runtime/pkg/client" @@ -135,11 +137,11 @@ func GetClusterServiceVersion(ctx context.Context, c client.Client, namespace st gvk.ClusterServiceVersion.Kind) } -// detectSelfManaged detects if it is Self Managed Rhods or OpenDataHub. +// detectSelfManaged detects if it is Self Managed Rhoai or OpenDataHub. func detectSelfManaged(ctx context.Context, cli client.Client) (Platform, error) { variants := map[string]Platform{ "opendatahub-operator": OpenDataHub, - "rhods-operator": SelfManagedRhods, + "rhods-operator": SelfManagedRhoai, } for k, v := range variants { @@ -155,29 +157,36 @@ func detectSelfManaged(ctx context.Context, cli client.Client) (Platform, error) return Unknown, nil } -// detectManagedRHODS checks if catsrc CR add-on exists ManagedRhods. -func detectManagedRHODS(ctx context.Context, cli client.Client) (Platform, error) { +// detectManagedRhoai checks if catsrc CR add-on exists ManagedRhoai. +func detectManagedRhoai(ctx context.Context, cli client.Client) (Platform, error) { catalogSource := &ofapiv1alpha1.CatalogSource{} - err := cli.Get(ctx, client.ObjectKey{Name: "addon-managed-odh-catalog", Namespace: "redhat-ods-operator"}, catalogSource) + operatorNs, err := GetOperatorNamespace() + if err != nil { + operatorNs = "redhat-ods-operator" + } + err = cli.Get(ctx, client.ObjectKey{Name: "addon-managed-odh-catalog", Namespace: operatorNs}, catalogSource) if err != nil { return Unknown, client.IgnoreNotFound(err) } - return ManagedRhods, nil + return ManagedRhoai, nil } func getPlatform(ctx context.Context, cli client.Client) (Platform, error) { switch os.Getenv("ODH_PLATFORM_TYPE") { - case "OpenDataHub", "": + case "OpenDataHub": return OpenDataHub, nil case "ManagedRHOAI": - return ManagedRhods, nil + return ManagedRhoai, nil case "SelfManagedRHOAI": - return SelfManagedRhods, nil - default: // fall back to detect platform if ODH_PLATFORM_TYPE env is not provided - if platform, err := detectManagedRHODS(ctx, cli); err != nil { + return SelfManagedRhoai, nil + default: + // fall back to detect platform if ODH_PLATFORM_TYPE env is not provided in CSV or set to "" + platform, err := detectManagedRhoai(ctx, cli) + if err != nil { return Unknown, err - } else if platform == ManagedRhods { - return ManagedRhods, nil + } + if platform == ManagedRhoai { + return ManagedRhoai, nil } return detectSelfManaged(ctx, cli) } @@ -220,3 +229,20 @@ func getRelease(ctx context.Context, cli client.Client) (Release, error) { initRelease.Version = csv.Spec.Version return initRelease, nil } + +// IsDefaultAuthMethod returns true if the default authentication method is IntegratedOAuth or empty. +// This will give indication that Operator should create userGroups or not in the cluster. +func IsDefaultAuthMethod(ctx context.Context, cli client.Client) (bool, error) { + authenticationobj := &configv1.Authentication{} + if err := cli.Get(ctx, client.ObjectKey{Name: ClusterAuthenticationObj, Namespace: ""}, authenticationobj); err != nil { + if errors.Is(err, &meta.NoKindMatchError{}) { // when CRD is missing, conver error type + return false, k8serr.NewNotFound(configv1.Resource("authentications"), ClusterAuthenticationObj) + } + return false, err + } + + // for now, HPC support "" "None" "IntegratedOAuth"(default) "OIDC" + // other offering support "" "None" "IntegratedOAuth"(default) + // we only create userGroups for "IntegratedOAuth" or "" and leave other or new supported type value in the future + return authenticationobj.Spec.Type == configv1.AuthenticationTypeIntegratedOAuth || authenticationobj.Spec.Type == "", nil +} diff --git a/pkg/cluster/const.go b/pkg/cluster/const.go index 5e218430db7..6a6562bff07 100644 --- a/pkg/cluster/const.go +++ b/pkg/cluster/const.go @@ -1,10 +1,10 @@ package cluster const ( - // ManagedRhods defines expected addon catalogsource. - ManagedRhods Platform = "OpenShift AI Cloud Service" - // SelfManagedRhods defines display name in csv. - SelfManagedRhods Platform = "OpenShift AI Self-Managed" + // ManagedRhoai defines expected addon catalogsource. + ManagedRhoai Platform = "OpenShift AI Cloud Service" + // SelfManagedRhoai defines display name in csv. + SelfManagedRhoai Platform = "OpenShift AI Self-Managed" // OpenDataHub defines display name in csv. OpenDataHub Platform = "Open Data Hub" // Unknown indicates that operator is not deployed using OLM. @@ -12,4 +12,7 @@ const ( // DefaultNotebooksNamespace defines default namespace for notebooks. DefaultNotebooksNamespace = "rhods-notebooks" + + // Default cluster-scope Authentication CR name. + ClusterAuthenticationObj = "cluster" ) diff --git a/pkg/cluster/operator.go b/pkg/cluster/operator.go index f7c35cd1256..5e2b0e7fe36 100644 --- a/pkg/cluster/operator.go +++ b/pkg/cluster/operator.go @@ -4,9 +4,14 @@ import ( "context" "fmt" "strings" + "time" "github.com/operator-framework/api/pkg/operators/v1alpha1" ofapiv2 "github.com/operator-framework/api/pkg/operators/v2" + apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/wait" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -67,3 +72,31 @@ func OperatorExists(ctx context.Context, cli client.Client, operatorPrefix strin return false, nil } + +// CustomResourceDefinitionExists checks if a CustomResourceDefinition with the given GVK exists. +func CustomResourceDefinitionExists(ctx context.Context, cli client.Client, crdGK schema.GroupKind) error { + crd := &apiextv1.CustomResourceDefinition{} + resourceInterval, resourceTimeout := 2*time.Second, 5*time.Second + name := strings.ToLower(fmt.Sprintf("%ss.%s", crdGK.Kind, crdGK.Group)) // we need plural form of the kind + + err := wait.PollUntilContextTimeout(ctx, resourceInterval, resourceTimeout, false, func(ctx context.Context) (bool, error) { + err := cli.Get(ctx, client.ObjectKey{Name: name}, crd) + if err != nil { + if errors.IsNotFound(err) { + return false, nil + } + return false, err + } + + for _, condition := range crd.Status.Conditions { + if condition.Type == apiextv1.Established { + if condition.Status == apiextv1.ConditionTrue { + return true, nil + } + } + } + return false, nil + }) + + return err +} diff --git a/pkg/cluster/resources.go b/pkg/cluster/resources.go index 71177c0661d..9435e7b4df0 100644 --- a/pkg/cluster/resources.go +++ b/pkg/cluster/resources.go @@ -13,8 +13,8 @@ import ( k8serr "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + logf "sigs.k8s.io/controller-runtime/pkg/log" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels" ) @@ -71,7 +71,7 @@ func CreateSecret(ctx context.Context, cli client.Client, name, namespace string } foundSecret := &corev1.Secret{} - err := cli.Get(ctx, client.ObjectKey{Name: name, Namespace: namespace}, foundSecret) + err := cli.Get(ctx, client.ObjectKeyFromObject(desiredSecret), foundSecret) if err != nil { if k8serr.IsNotFound(err) { err = cli.Create(ctx, desiredSecret) @@ -99,11 +99,7 @@ func CreateOrUpdateConfigMap(ctx context.Context, c client.Client, desiredCfgMap } existingCfgMap := &corev1.ConfigMap{} - err := c.Get(ctx, client.ObjectKey{ - Name: desiredCfgMap.Name, - Namespace: desiredCfgMap.Namespace, - }, existingCfgMap) - + err := c.Get(ctx, client.ObjectKeyFromObject(desiredCfgMap), existingCfgMap) if k8serr.IsNotFound(err) { return c.Create(ctx, desiredCfgMap) } else if err != nil { @@ -143,7 +139,7 @@ func CreateNamespace(ctx context.Context, cli client.Client, namespace string, m } foundNamespace := &corev1.Namespace{} - if getErr := cli.Get(ctx, client.ObjectKey{Name: namespace}, foundNamespace); client.IgnoreNotFound(getErr) != nil { + if getErr := cli.Get(ctx, client.ObjectKeyFromObject(desiredNamespace), foundNamespace); client.IgnoreNotFound(getErr) != nil { return nil, getErr } @@ -181,6 +177,7 @@ func ExecuteOnAllNamespaces(ctx context.Context, cli client.Client, processFunc // WaitForDeploymentAvailable to check if component deployment from 'namespace' is ready within 'timeout' before apply prometheus rules for the component. func WaitForDeploymentAvailable(ctx context.Context, c client.Client, componentName string, namespace string, interval int, timeout int) error { + log := logf.FromContext(ctx) resourceInterval := time.Duration(interval) * time.Second resourceTimeout := time.Duration(timeout) * time.Minute @@ -191,7 +188,7 @@ func WaitForDeploymentAvailable(ctx context.Context, c client.Client, componentN return false, fmt.Errorf("error fetching list of deployments: %w", err) } - ctrl.Log.Info("waiting for " + strconv.Itoa(len(componentDeploymentList.Items)) + " deployment to be ready for " + componentName) + log.Info("waiting for " + strconv.Itoa(len(componentDeploymentList.Items)) + " deployment to be ready for " + componentName) for _, deployment := range componentDeploymentList.Items { if deployment.Status.ReadyReplicas != deployment.Status.Replicas { return false, nil @@ -203,6 +200,7 @@ func WaitForDeploymentAvailable(ctx context.Context, c client.Client, componentN } func CreateWithRetry(ctx context.Context, cli client.Client, obj client.Object, timeoutMin int) error { + log := logf.FromContext(ctx) interval := time.Second * 5 // arbitrary value timeout := time.Duration(timeoutMin) * time.Minute @@ -230,7 +228,7 @@ func CreateWithRetry(ctx context.Context, cli client.Client, obj client.Object, // retry if 500, assume webhook is not available if k8serr.IsInternalError(errCreate) { - ctrl.Log.Info("Error creating object, retrying...", "reason", errCreate) + log.Info("Error creating object, retrying...", "reason", errCreate) return false, nil } diff --git a/pkg/cluster/roles.go b/pkg/cluster/roles.go index c989915aefe..96ccbae0eb4 100644 --- a/pkg/cluster/roles.go +++ b/pkg/cluster/roles.go @@ -23,7 +23,7 @@ func CreateOrUpdateClusterRole(ctx context.Context, cli client.Client, name stri } foundClusterRole := &rbacv1.ClusterRole{} - err := cli.Get(ctx, client.ObjectKey{Name: desiredClusterRole.GetName()}, foundClusterRole) + err := cli.Get(ctx, client.ObjectKeyFromObject(desiredClusterRole), foundClusterRole) if k8serr.IsNotFound(err) { return desiredClusterRole, cli.Create(ctx, desiredClusterRole) } @@ -63,7 +63,7 @@ func CreateOrUpdateClusterRoleBinding(ctx context.Context, cli client.Client, na } foundClusterRoleBinding := &rbacv1.ClusterRoleBinding{} - err := cli.Get(ctx, client.ObjectKey{Name: desiredClusterRoleBinding.GetName()}, foundClusterRoleBinding) + err := cli.Get(ctx, client.ObjectKeyFromObject(desiredClusterRoleBinding), foundClusterRoleBinding) if k8serr.IsNotFound(err) { return desiredClusterRoleBinding, cli.Create(ctx, desiredClusterRoleBinding) } diff --git a/pkg/controller/actions/security/actions_test.go b/pkg/controller/actions/security/actions_test.go index b03a0ff1418..fdc1aca627d 100644 --- a/pkg/controller/actions/security/actions_test.go +++ b/pkg/controller/actions/security/actions_test.go @@ -27,8 +27,8 @@ func TestUpdatePodSecurityRoleBindingAction(t *testing.T) { m := map[cluster.Platform][]string{ cluster.OpenDataHub: {"odh-dashboard"}, - cluster.SelfManagedRhods: {"rhods-dashboard"}, - cluster.ManagedRhods: {"rhods-dashboard", "fake-account"}, + cluster.SelfManagedRhoai: {"rhods-dashboard"}, + cluster.ManagedRhoai: {"rhods-dashboard", "fake-account"}, } action := security.NewUpdatePodSecurityRoleBindingAction(m) diff --git a/pkg/feature/conditions.go b/pkg/feature/conditions.go index 9a9373ba130..7b27196dd1a 100644 --- a/pkg/feature/conditions.go +++ b/pkg/feature/conditions.go @@ -64,6 +64,7 @@ func WaitForPodsToBeReady(namespace string) Action { return false, err } + podList.Items = filterEvictedPods(podList.Items) readyPods := 0 totalPods := len(podList.Items) @@ -102,6 +103,18 @@ func WaitForPodsToBeReady(namespace string) Action { } } +func filterEvictedPods(pods []corev1.Pod) []corev1.Pod { + var filteredPods []corev1.Pod + + for _, pod := range pods { + if pod.Status.Phase != corev1.PodFailed || pod.Status.Reason != "Evicted" { + filteredPods = append(filteredPods, pod) + } + } + + return filteredPods +} + func WaitForResourceToBeCreated(namespace string, gvk schema.GroupVersionKind) Action { return func(ctx context.Context, cli client.Client, f *Feature) error { f.Log.Info("waiting for resource to be created", "namespace", namespace, "resource", gvk) diff --git a/pkg/feature/servicemesh/conditions.go b/pkg/feature/servicemesh/conditions.go index 64bd360b49e..e845868aa3d 100644 --- a/pkg/feature/servicemesh/conditions.go +++ b/pkg/feature/servicemesh/conditions.go @@ -7,6 +7,8 @@ import ( "github.com/hashicorp/go-multierror" "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + k8serr "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/util/wait" "sigs.k8s.io/controller-runtime/pkg/client" @@ -37,6 +39,21 @@ func EnsureServiceMeshOperatorInstalled(ctx context.Context, cli client.Client, if err := feature.EnsureOperatorIsInstalled("servicemeshoperator")(ctx, cli, f); err != nil { return fmt.Errorf("failed to find the pre-requisite Service Mesh Operator subscription, please ensure Service Mesh Operator is installed. %w", err) } + // Extra check SMCP CRD is installed and is active. + if err := cluster.CustomResourceDefinitionExists(ctx, cli, gvk.ServiceMeshControlPlane.GroupKind()); err != nil { + return fmt.Errorf("failed to find the Service Mesh Control Plane CRD, please ensure Service Mesh Operator is installed. %w", err) + } + // Extra check smcp validation service is running. + validationService := &corev1.Service{} + if err := cli.Get(ctx, client.ObjectKey{ + Name: "istio-operator-service", + Namespace: "openshift-operators", + }, validationService); err != nil { + if k8serr.IsNotFound(err) { + return fmt.Errorf("failed to find the Service Mesh VWC service, please ensure Service Mesh Operator is running. %w", err) + } + return fmt.Errorf("failed to find the Service Mesh VWC service. %w", err) + } return nil } diff --git a/pkg/logger/logger.go b/pkg/logger/logger.go index dc8a4cf97f6..8844e6e3925 100644 --- a/pkg/logger/logger.go +++ b/pkg/logger/logger.go @@ -1,74 +1,108 @@ package logger import ( + "errors" "flag" + "fmt" "os" + "strconv" "strings" + "sync/atomic" "github.com/go-logr/logr" + "go.uber.org/zap" "go.uber.org/zap/zapcore" - "sigs.k8s.io/controller-runtime/pkg/log/zap" + ctrlzap "sigs.k8s.io/controller-runtime/pkg/log/zap" ) -var logLevelMapping = map[string]int{ - "devel": 0, - "default": 1, // default one when not set log-mode - "prod": 2, +const envVarName = "ZAP_LOG_LEVEL" + +var defaultLogLevel = zap.InfoLevel + +var logLevel atomic.Value + +// copy from controller-runtime/pkg/log/zap/flag.go. +var levelStrings = map[string]zapcore.Level{ + "debug": zap.DebugLevel, + "info": zap.InfoLevel, + "error": zap.ErrorLevel, } -// NewNamedLogger creates a new logger for a component. -// If the mode is set (so can be different from the default one), -// it will create a new logger with the specified mode's options. -func NewNamedLogger(log logr.Logger, name string, mode string) logr.Logger { - if mode != "" { - log = NewLogger(mode) +// adjusted copy from controller-runtime/pkg/log/zap/flag.go, keep the same argument name. +func stringToLevel(flagValue string) (zapcore.Level, error) { + level, validLevel := levelStrings[strings.ToLower(flagValue)] + if validLevel { + return level, nil + } + logLevel, err := strconv.ParseInt(flagValue, 10, 8) + if err != nil { + return 0, fmt.Errorf("invalid log level \"%s\"", flagValue) } - return log.WithName(name) + if logLevel > 0 { + intLevel := -1 * int8(logLevel) + return zapcore.Level(intLevel), nil + } + + return 0, fmt.Errorf("invalid log level \"%s\"", flagValue) } -// in each controller, to use different log level. -func LogWithLevel(logger logr.Logger, level string) logr.Logger { - level = strings.TrimSpace(level) - verbosityLevel, ok := logLevelMapping[level] +func SetLevel(levelStr string) error { + if levelStr == "" { + return nil + } + levelNum, err := stringToLevel(levelStr) + if err != nil { + return err + } + + // ctrlzap.addDefauls() uses a pointer to the AtomicLevel, + // but ctrlzap.(*levelFlag).Set() the structure itsef. + // So use the structure and always set the value in newOptions() to addDefaults() call + level, ok := logLevel.Load().(zap.AtomicLevel) if !ok { - verbosityLevel = 1 // fallback to info level + return errors.New("stored loglevel is not of type *zap.AtomicLevel") } - return logger.V(verbosityLevel) -} -func NewLoggerWithOptions(mode string, override *zap.Options) logr.Logger { - opts := newOptions(mode) - overrideOptions(opts, override) - return newLogger(opts) + level.SetLevel(levelNum) + return nil } -// in DSC component, to use different mode for logging, e.g. development, production -// when not set mode it falls to "default" which is used by startup main.go. -func NewLogger(mode string) logr.Logger { - return newLogger(newOptions(mode)) +func levelFromEnvOrDefault() zapcore.Level { + levelStr := os.Getenv(envVarName) + if levelStr == "" { + return defaultLogLevel + } + level, err := stringToLevel(levelStr) + if err != nil { + return defaultLogLevel + } + return level } -func newLogger(opts *zap.Options) logr.Logger { - return zap.New(zap.UseFlagOptions(opts)) +func NewLogger(mode string, override *ctrlzap.Options) logr.Logger { + opts := newOptions(mode, levelFromEnvOrDefault()) + overrideOptions(opts, override) + logLevel.Store(opts.Level) + return ctrlzap.New(ctrlzap.UseFlagOptions(opts)) } -func newOptions(mode string) *zap.Options { - var opts zap.Options +func newOptions(mode string, defaultLevel zapcore.Level) *ctrlzap.Options { + var opts ctrlzap.Options + level := zap.NewAtomicLevelAt(defaultLevel) + switch mode { case "devel", "development": // the most logging verbosity - opts = zap.Options{ + opts = ctrlzap.Options{ Development: true, StacktraceLevel: zapcore.WarnLevel, - Level: zapcore.InfoLevel, DestWriter: os.Stdout, } case "prod", "production": // the least logging verbosity - opts = zap.Options{ + opts = ctrlzap.Options{ Development: false, StacktraceLevel: zapcore.ErrorLevel, - Level: zapcore.InfoLevel, DestWriter: os.Stdout, - EncoderConfigOptions: []zap.EncoderConfigOption{func(config *zapcore.EncoderConfig) { + EncoderConfigOptions: []ctrlzap.EncoderConfigOption{func(config *zapcore.EncoderConfig) { config.EncodeTime = zapcore.ISO8601TimeEncoder // human readable not epoch config.EncodeDuration = zapcore.SecondsDurationEncoder config.LevelKey = "LogLevel" @@ -80,17 +114,18 @@ func newOptions(mode string) *zap.Options { }}, } default: - opts = zap.Options{ + opts = ctrlzap.Options{ Development: false, StacktraceLevel: zapcore.ErrorLevel, - Level: zapcore.InfoLevel, DestWriter: os.Stdout, } } + + opts.Level = level return &opts } -func overrideOptions(orig, override *zap.Options) { +func overrideOptions(orig, override *ctrlzap.Options) { // Development is boolean, cannot check for nil, so check if it was set isDevelopmentSet := false flag.Visit(func(f *flag.Flag) { diff --git a/pkg/trustedcabundle/trustedcabundle.go b/pkg/trustedcabundle/trustedcabundle.go index 7e05256f34b..41a9ab2ef8b 100644 --- a/pkg/trustedcabundle/trustedcabundle.go +++ b/pkg/trustedcabundle/trustedcabundle.go @@ -5,6 +5,7 @@ import ( "context" "fmt" "strconv" + "strings" "time" "github.com/go-logr/logr" @@ -47,6 +48,9 @@ func HasCABundleAnnotationDisabled(ns client.Object) bool { // or update existing odh-trusted-ca-bundle configmap if already exists with new content of .data.odh-ca-bundle.crt // this is certificates for the cluster trusted CA Cert Bundle. func CreateOdhTrustedCABundleConfigMap(ctx context.Context, cli client.Client, namespace string, customCAData string) error { + // Adding newline breaker if user input does not have it + customCAData = strings.TrimSpace(customCAData) + "\n" + // Expected configmap for the given namespace desiredConfigMap := &corev1.ConfigMap{ TypeMeta: metav1.TypeMeta{ @@ -71,10 +75,7 @@ func CreateOdhTrustedCABundleConfigMap(ctx context.Context, cli client.Client, n // Create Configmap if doesn't exist foundConfigMap := &corev1.ConfigMap{} - if err := cli.Get(ctx, client.ObjectKey{ - Name: CAConfigMapName, - Namespace: namespace, - }, foundConfigMap); err != nil { + if err := cli.Get(ctx, client.ObjectKeyFromObject(desiredConfigMap), foundConfigMap); err != nil { if k8serr.IsNotFound(err) { err = cli.Create(ctx, desiredConfigMap) if err != nil && !k8serr.IsAlreadyExists(err) { @@ -105,12 +106,12 @@ func DeleteOdhTrustedCABundleConfigMap(ctx context.Context, cli client.Client, n return cli.Delete(ctx, foundConfigMap) } -// IsTrustedCABundleUpdated check if data in CM "odh-trusted-ca-bundle" from applciation namespace matches DSCI's TrustedCABundle.CustomCABundle +// IsTrustedCABundleUpdated check if data in CM "odh-trusted-ca-bundle" from application namespace matches DSCI's TrustedCABundle.CustomCABundle // return false when these two are matching => skip update // return true when not match => need upate. func IsTrustedCABundleUpdated(ctx context.Context, cli client.Client, dscInit *dsciv1.DSCInitialization) (bool, error) { - userNamespace := &corev1.Namespace{} - if err := cli.Get(ctx, client.ObjectKey{Name: dscInit.Spec.ApplicationsNamespace}, userNamespace); err != nil { + appNamespace := &corev1.Namespace{} + if err := cli.Get(ctx, client.ObjectKey{Name: dscInit.Spec.ApplicationsNamespace}, appNamespace); err != nil { if k8serr.IsNotFound(err) { // if namespace is not found, return true. This is to ensure we reconcile, and check for other namespaces. return true, nil @@ -118,7 +119,7 @@ func IsTrustedCABundleUpdated(ctx context.Context, cli client.Client, dscInit *d return false, err } - if !ShouldInjectTrustedBundle(userNamespace) { + if !ShouldInjectTrustedBundle(appNamespace) { return false, nil } diff --git a/pkg/upgrade/uninstallation.go b/pkg/upgrade/uninstallation.go index 315b23c1244..0618fd7be45 100644 --- a/pkg/upgrade/uninstallation.go +++ b/pkg/upgrade/uninstallation.go @@ -10,6 +10,7 @@ import ( k8serr "k8s.io/apimachinery/pkg/api/errors" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + logf "sigs.k8s.io/controller-runtime/pkg/log" dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" @@ -25,6 +26,7 @@ const ( // OperatorUninstall deletes all the externally generated resources. // This includes DSCI, namespace created by operator (but not workbench or MR's), subscription and CSV. func OperatorUninstall(ctx context.Context, cli client.Client, platform cluster.Platform) error { + log := logf.FromContext(ctx) if err := removeDSCInitialization(ctx, cli); err != nil { return err } @@ -51,7 +53,7 @@ func OperatorUninstall(ctx context.Context, cli client.Client, platform cluster. if err := cli.Delete(ctx, &namespace); err != nil { return fmt.Errorf("error deleting namespace %v: %w", namespace.Name, err) } - ctrl.Log.Info("Namespace " + namespace.Name + " deleted as a part of uninstallation.") + log.Info("Namespace " + namespace.Name + " deleted as a part of uninstallation.") } } @@ -66,21 +68,21 @@ func OperatorUninstall(ctx context.Context, cli client.Client, platform cluster. return err } - ctrl.Log.Info("Removing operator subscription which in turn will remove installplan") + log.Info("Removing operator subscription which in turn will remove installplan") subsName := "opendatahub-operator" - if platform == cluster.SelfManagedRhods { + if platform == cluster.SelfManagedRhoai { subsName = "rhods-operator" } - if platform != cluster.ManagedRhods { + if platform != cluster.ManagedRhoai { if err := cluster.DeleteExistingSubscription(ctx, cli, operatorNs, subsName); err != nil { return err } } - ctrl.Log.Info("Removing the operator CSV in turn remove operator deployment") + log.Info("Removing the operator CSV in turn remove operator deployment") err = removeCSV(ctx, cli) - ctrl.Log.Info("All resources deleted as part of uninstall.") + log.Info("All resources deleted as part of uninstall.") return err } @@ -126,6 +128,7 @@ func HasDeleteConfigMap(ctx context.Context, c client.Client) bool { } func removeCSV(ctx context.Context, c client.Client) error { + log := logf.FromContext(ctx) // Get watchNamespace operatorNamespace, err := cluster.GetOperatorNamespace() if err != nil { @@ -142,7 +145,7 @@ func removeCSV(ctx context.Context, c client.Client) error { return err } - ctrl.Log.Info("Deleting CSV " + operatorCsv.Name) + log.Info("Deleting CSV " + operatorCsv.Name) err = c.Delete(ctx, operatorCsv) if err != nil { if k8serr.IsNotFound(err) { @@ -151,7 +154,7 @@ func removeCSV(ctx context.Context, c client.Client) error { return fmt.Errorf("error deleting clusterserviceversion: %w", err) } - ctrl.Log.Info("Clusterserviceversion " + operatorCsv.Name + " deleted as a part of uninstall") + log.Info("Clusterserviceversion " + operatorCsv.Name + " deleted as a part of uninstall") return nil } diff --git a/pkg/upgrade/upgrade.go b/pkg/upgrade/upgrade.go index 1e3aff31728..4572830d972 100644 --- a/pkg/upgrade/upgrade.go +++ b/pkg/upgrade/upgrade.go @@ -13,6 +13,7 @@ import ( routev1 "github.com/openshift/api/route/v1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" @@ -22,8 +23,8 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + logf "sigs.k8s.io/controller-runtime/pkg/log" "github.com/opendatahub-io/opendatahub-operator/v2/apis/common" componentApi "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1alpha1" @@ -34,6 +35,7 @@ import ( serviceApi "github.com/opendatahub-io/opendatahub-operator/v2/apis/services/v1alpha1" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster/gvk" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels" ) type ResourceSpec struct { @@ -106,6 +108,7 @@ func CreateDefaultDSC(ctx context.Context, cli client.Client) error { // If there exists default-dsci instance already, it will not update DSCISpec on it. // Note: DSCI CR modifcations are not supported, as it is the initial prereq setting for the components. func CreateDefaultDSCI(ctx context.Context, cli client.Client, _ cluster.Platform, appNamespace, monNamespace string) error { + log := logf.FromContext(ctx) defaultDsciSpec := &dsciv1.DSCInitializationSpec{ ApplicationsNamespace: appNamespace, Monitoring: serviceApi.DSCMonitoring{ @@ -145,14 +148,14 @@ func CreateDefaultDSCI(ctx context.Context, cli client.Client, _ cluster.Platfor switch { case len(instances.Items) > 1: - ctrl.Log.Info("only one instance of DSCInitialization object is allowed. Please delete other instances.") + log.Info("only one instance of DSCInitialization object is allowed. Please delete other instances.") return nil case len(instances.Items) == 1: // Do not patch/update if DSCI already exists. - ctrl.Log.Info("DSCInitialization resource already exists. It will not be updated with default DSCI.") + log.Info("DSCInitialization resource already exists. It will not be updated with default DSCI.") return nil case len(instances.Items) == 0: - ctrl.Log.Info("create default DSCI CR.") + log.Info("create default DSCI CR.") err := cluster.CreateWithRetry(ctx, cli, defaultDsci, 1) // 1 min timeout if err != nil { return err @@ -209,7 +212,7 @@ func CleanupExistingResource(ctx context.Context, ) error { var multiErr *multierror.Error // Special Handling of cleanup of deprecated model monitoring stack - if platform == cluster.ManagedRhods { + if platform == cluster.ManagedRhoai { deprecatedDeployments := []string{"rhods-prometheus-operator"} multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedDeployments, &appsv1.DeploymentList{})) @@ -262,16 +265,25 @@ func CleanupExistingResource(ctx context.Context, }) multiErr = multierror.Append(multiErr, deleteResources(ctx, cli, &odhDocJPH)) // only apply on RHOAI since ODH has a different way to create this CR by dashboard - if platform == cluster.SelfManagedRhods || platform == cluster.ManagedRhods { + if platform == cluster.SelfManagedRhoai || platform == cluster.ManagedRhoai { if err := upgradeODCCR(ctx, cli, "odh-dashboard-config", dscApplicationsNamespace, oldReleaseVersion); err != nil { return err } } + // remove modelreg proxy container from deployment in ODH + if platform == cluster.OpenDataHub { + if err := removeRBACProxyModelRegistry(ctx, cli, "model-registry-operator", "kube-rbac-proxy", dscApplicationsNamespace); err != nil { + return err + } + } // to take a reference toDelete := getDashboardWatsonResources(dscApplicationsNamespace) multiErr = multierror.Append(multiErr, deleteResources(ctx, cli, &toDelete)) + // cleanup nvidia nim integration remove tech preview + multiErr = multierror.Append(multiErr, cleanupNimIntegrationTechPreview(ctx, cli, oldReleaseVersion, dscApplicationsNamespace)) + return multiErr.ErrorOrNil() } @@ -287,13 +299,14 @@ func deleteResources(ctx context.Context, c client.Client, resources *[]Resource } func deleteOneResource(ctx context.Context, c client.Client, res ResourceSpec) error { + log := logf.FromContext(ctx) list := &unstructured.UnstructuredList{} list.SetGroupVersionKind(res.Gvk) err := c.List(ctx, list, client.InNamespace(res.Namespace)) if err != nil { if errors.Is(err, &meta.NoKindMatchError{}) { - ctrl.Log.Info("CRD not found, will not delete " + res.Gvk.String()) + log.Info("CRD not found, will not delete " + res.Gvk.String()) return nil } return fmt.Errorf("failed to list %s: %w", res.Gvk.Kind, err) @@ -316,7 +329,7 @@ func deleteOneResource(ctx context.Context, c client.Client, res ResourceSpec) e if err != nil { return fmt.Errorf("failed to delete %s %s/%s: %w", res.Gvk.Kind, res.Namespace, item.GetName(), err) } - ctrl.Log.Info("Deleted object " + item.GetName() + " " + res.Gvk.String() + "in namespace" + res.Namespace) + log.Info("Deleted object " + item.GetName() + " " + res.Gvk.String() + "in namespace" + res.Namespace) } } } @@ -325,6 +338,7 @@ func deleteOneResource(ctx context.Context, c client.Client, res ResourceSpec) e } func deleteDeprecatedResources(ctx context.Context, cli client.Client, namespace string, resourceList []string, resourceType client.ObjectList) error { + log := logf.FromContext(ctx) var multiErr *multierror.Error listOpts := &client.ListOptions{Namespace: namespace} if err := cli.List(ctx, resourceType, listOpts); err != nil { @@ -335,16 +349,16 @@ func deleteDeprecatedResources(ctx context.Context, cli client.Client, namespace item := items.Index(i).Addr().Interface().(client.Object) //nolint:errcheck,forcetypeassert for _, name := range resourceList { if name == item.GetName() { - ctrl.Log.Info("Attempting to delete " + item.GetName() + " in namespace " + namespace) + log.Info("Attempting to delete " + item.GetName() + " in namespace " + namespace) err := cli.Delete(ctx, item) if err != nil { if k8serr.IsNotFound(err) { - ctrl.Log.Info("Could not find " + item.GetName() + " in namespace " + namespace) + log.Info("Could not find " + item.GetName() + " in namespace " + namespace) } else { multiErr = multierror.Append(multiErr, err) } } - ctrl.Log.Info("Successfully deleted " + item.GetName()) + log.Info("Successfully deleted " + item.GetName()) } } } @@ -353,6 +367,7 @@ func deleteDeprecatedResources(ctx context.Context, cli client.Client, namespace // Need to handle ServiceMonitor deletion separately as the generic function does not work for ServiceMonitors because of how the package is built. func deleteDeprecatedServiceMonitors(ctx context.Context, cli client.Client, namespace string, resourceList []string) error { + log := logf.FromContext(ctx) var multiErr *multierror.Error listOpts := &client.ListOptions{Namespace: namespace} servicemonitors := &monitoringv1.ServiceMonitorList{} @@ -364,16 +379,16 @@ func deleteDeprecatedServiceMonitors(ctx context.Context, cli client.Client, nam servicemonitor := servicemonitor for _, name := range resourceList { if name == servicemonitor.Name { - ctrl.Log.Info("Attempting to delete " + servicemonitor.Name + " in namespace " + namespace) + log.Info("Attempting to delete " + servicemonitor.Name + " in namespace " + namespace) err := cli.Delete(ctx, servicemonitor) if err != nil { if k8serr.IsNotFound(err) { - ctrl.Log.Info("Could not find " + servicemonitor.Name + " in namespace " + namespace) + log.Info("Could not find " + servicemonitor.Name + " in namespace " + namespace) } else { multiErr = multierror.Append(multiErr, err) } } - ctrl.Log.Info("Successfully deleted " + servicemonitor.Name) + log.Info("Successfully deleted " + servicemonitor.Name) } } } @@ -444,10 +459,11 @@ func unsetOwnerReference(ctx context.Context, cli client.Client, instanceName st } func updateODCBiasMetrics(ctx context.Context, cli client.Client, instanceName string, oldRelease cluster.Release, odhObject *unstructured.Unstructured) error { + log := logf.FromContext(ctx) // "from version" as oldRelease, if return "0.0.0" meaning running on 2.10- release/dummy CI build // if oldRelease is lower than 2.14.0(e.g 2.13.x-a), flip disableBiasMetrics to false (even the field did not exist) if oldRelease.Version.Minor < 14 { - ctrl.Log.Info("Upgrade force BiasMetrics to false in " + instanceName + " CR due to old release < 2.14.0") + log.Info("Upgrade force BiasMetrics to false in " + instanceName + " CR due to old release < 2.14.0") // flip TrustyAI BiasMetrics to false (.spec.dashboardConfig.disableBiasMetrics) disableBiasMetricsValue := []byte(`{"spec": {"dashboardConfig": {"disableBiasMetrics": false}}}`) if err := cli.Patch(ctx, odhObject, client.RawPatch(types.MergePatchType, disableBiasMetricsValue)); err != nil { @@ -455,22 +471,55 @@ func updateODCBiasMetrics(ctx context.Context, cli client.Client, instanceName s } return nil } - ctrl.Log.Info("Upgrade does not force BiasMetrics to false due to from release >= 2.14.0") + log.Info("Upgrade does not force BiasMetrics to false due to from release >= 2.14.0") return nil } func updateODCModelRegistry(ctx context.Context, cli client.Client, instanceName string, oldRelease cluster.Release, odhObject *unstructured.Unstructured) error { + log := logf.FromContext(ctx) // "from version" as oldRelease, if return "0.0.0" meaning running on 2.10- release/dummy CI build // if oldRelease is lower than 2.14.0(e.g 2.13.x-a), flip disableModelRegistry to false (even the field did not exist) if oldRelease.Version.Minor < 14 { - ctrl.Log.Info("Upgrade force ModelRegistry to false in " + instanceName + " CR due to old release < 2.14.0") + log.Info("Upgrade force ModelRegistry to false in " + instanceName + " CR due to old release < 2.14.0") disableModelRegistryValue := []byte(`{"spec": {"dashboardConfig": {"disableModelRegistry": false}}}`) if err := cli.Patch(ctx, odhObject, client.RawPatch(types.MergePatchType, disableModelRegistryValue)); err != nil { return fmt.Errorf("error enable ModelRegistry in CR %s : %w", instanceName, err) } return nil } - ctrl.Log.Info("Upgrade does not force ModelRegistry to false due to from release >= 2.14.0") + log.Info("Upgrade does not force ModelRegistry to false due to from release >= 2.14.0") + return nil +} + +// workaround for RHOAIENG-15328 +// TODO: this can be removed from ODH 2.22. +func removeRBACProxyModelRegistry(ctx context.Context, cli client.Client, componentName string, containerName string, applicationNS string) error { + log := logf.FromContext(ctx) + deploymentList := &appsv1.DeploymentList{} + if err := cli.List(ctx, deploymentList, client.InNamespace(applicationNS), client.HasLabels{labels.ODH.Component(componentName)}); err != nil { + return fmt.Errorf("error fetching list of deployments: %w", err) + } + + if len(deploymentList.Items) != 1 { // ModelRegistry operator is not deployed + return nil + } + mrDeployment := deploymentList.Items[0] + mrContainerList := mrDeployment.Spec.Template.Spec.Containers + // if only one container in deployment, we are already on newer deployment, no need more action + if len(mrContainerList) == 1 { + return nil + } + + log.Info("Upgrade force ModelRegistry to remove container from deployment") + for i, container := range mrContainerList { + if container.Name == containerName { + removeUnusedKubeRbacProxy := []byte(fmt.Sprintf("[{\"op\": \"remove\", \"path\": \"/spec/template/spec/containers/%d\"}]", i)) + if err := cli.Patch(ctx, &mrDeployment, client.RawPatch(types.JSONPatchType, removeUnusedKubeRbacProxy)); err != nil { + return fmt.Errorf("error removing ModelRegistry %s container from deployment: %w", containerName, err) + } + break + } + } return nil } @@ -490,6 +539,7 @@ func RemoveLabel(ctx context.Context, cli client.Client, objectName string, labe } func deleteDeprecatedNamespace(ctx context.Context, cli client.Client, namespace string) error { + log := logf.FromContext(ctx) foundNamespace := &corev1.Namespace{} if err := cli.Get(ctx, client.ObjectKey{Name: namespace}, foundNamespace); err != nil { if k8serr.IsNotFound(err) { @@ -518,7 +568,7 @@ func deleteDeprecatedNamespace(ctx context.Context, cli client.Client, namespace return fmt.Errorf("error getting pods from namespace %s: %w", namespace, err) } if len(podList.Items) != 0 { - ctrl.Log.Info("Skip deletion of namespace " + namespace + " due to running Pods in it") + log.Info("Skip deletion of namespace " + namespace + " due to running Pods in it") return nil } @@ -550,3 +600,52 @@ func GetDeployedRelease(ctx context.Context, cli client.Client) (cluster.Release // could be a clean installation or both CRs are deleted already return cluster.Release{}, nil } + +func cleanupNimIntegrationTechPreview(ctx context.Context, cli client.Client, oldRelease cluster.Release, applicationNS string) error { + var errs *multierror.Error + + if oldRelease.Version.Minor >= 14 && oldRelease.Version.Minor <= 15 { + log := logf.FromContext(ctx) + nimCronjob := "nvidia-nim-periodic-validator" + nimConfigMap := "nvidia-nim-validation-result" + nimAPISec := "nvidia-nim-access" + + deleteObjs := []struct { + obj client.Object + name, desc string + }{ + { + obj: &batchv1.CronJob{}, + name: nimCronjob, + desc: "validator CronJob", + }, + { + obj: &corev1.ConfigMap{}, + name: nimConfigMap, + desc: "data ConfigMap", + }, + { + obj: &corev1.Secret{}, + name: nimAPISec, + desc: "API key Secret", + }, + } + for _, delObj := range deleteObjs { + if gErr := cli.Get(ctx, types.NamespacedName{Name: delObj.name, Namespace: applicationNS}, delObj.obj); gErr != nil { + if !k8serr.IsNotFound(gErr) { + log.V(1).Error(gErr, fmt.Sprintf("failed to get NIM %s %s", delObj.desc, delObj.name)) + errs = multierror.Append(errs, gErr) + } + } else { + if dErr := cli.Delete(ctx, delObj.obj); dErr != nil { + log.Error(dErr, fmt.Sprintf("failed to remove NIM %s %s", delObj.desc, delObj.name)) + errs = multierror.Append(errs, dErr) + } else { + log.Info(fmt.Sprintf("removed NIM %s successfully", delObj.desc)) + } + } + } + } + + return errs.ErrorOrNil() +} diff --git a/tests/e2e/deletion_test.go b/tests/e2e/deletion_test.go index 1d04b6750c6..fe6169998e7 100644 --- a/tests/e2e/deletion_test.go +++ b/tests/e2e/deletion_test.go @@ -5,7 +5,7 @@ import ( "testing" "github.com/stretchr/testify/require" - "k8s.io/apimachinery/pkg/api/errors" + k8serr "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -42,7 +42,7 @@ func (tc *testContext) testDeletionExistDSC() error { if dscerr != nil { return fmt.Errorf("error deleting DSC instance %s: %w", expectedDSC.Name, dscerr) } - } else if !errors.IsNotFound(err) { + } else if !k8serr.IsNotFound(err) { if err != nil { return fmt.Errorf("could not find DSC instance to delete: %w", err) } @@ -65,7 +65,7 @@ func (tc *testContext) testDeletionExistDSCI() error { if dscierr != nil { return fmt.Errorf("error deleting DSCI instance %s: %w", expectedDSCI.Name, dscierr) } - } else if !errors.IsNotFound(err) { + } else if !k8serr.IsNotFound(err) { if err != nil { return fmt.Errorf("could not find DSCI instance to delete :%w", err) } diff --git a/tests/e2e/helper_test.go b/tests/e2e/helper_test.go index ecac1c4b6e7..e7f04e39afe 100644 --- a/tests/e2e/helper_test.go +++ b/tests/e2e/helper_test.go @@ -14,7 +14,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" - "k8s.io/apimachinery/pkg/api/errors" + k8serr "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" @@ -53,7 +53,7 @@ func (tc *testContext) waitForOperatorDeployment(name string, replicas int32) er err := wait.PollUntilContextTimeout(tc.ctx, generalRetryInterval, operatorReadyTimeout, false, func(ctx context.Context) (bool, error) { controllerDeployment, err := tc.kubeClient.AppsV1().Deployments(tc.operatorNamespace).Get(ctx, name, metav1.GetOptions{}) if err != nil { - if errors.IsNotFound(err) { + if k8serr.IsNotFound(err) { return false, nil } log.Printf("Failed to get %s controller deployment", name) @@ -214,7 +214,7 @@ func (tc *testContext) validateCRD(crdName string) error { err := wait.PollUntilContextTimeout(tc.ctx, generalRetryInterval, crdReadyTimeout, false, func(ctx context.Context) (bool, error) { err := tc.customClient.Get(ctx, obj, crd) if err != nil { - if errors.IsNotFound(err) { + if k8serr.IsNotFound(err) { return false, nil } log.Printf("Failed to get CRD %s", crdName) @@ -263,7 +263,7 @@ func getCSV(ctx context.Context, cli client.Client, name string, namespace strin } } - return nil, errors.NewNotFound(schema.GroupResource{}, name) + return nil, k8serr.NewNotFound(schema.GroupResource{}, name) } // Use existing or create a new one. @@ -286,7 +286,7 @@ func getSubscription(tc *testContext, name string, ns string) (*ofapi.Subscripti } err := tc.customClient.Get(tc.ctx, key, sub) - if errors.IsNotFound(err) { + if k8serr.IsNotFound(err) { return createSubscription(name, ns) } if err != nil { @@ -300,7 +300,7 @@ func waitCSV(tc *testContext, name string, ns string) error { interval := generalRetryInterval isReady := func(ctx context.Context) (bool, error) { csv, err := getCSV(ctx, tc.customClient, name, ns) - if errors.IsNotFound(err) { + if k8serr.IsNotFound(err) { return false, nil } if err != nil { diff --git a/tests/integration/features/cleanup_int_test.go b/tests/integration/features/cleanup_int_test.go index 8d0634cfc0c..070346ccbb3 100644 --- a/tests/integration/features/cleanup_int_test.go +++ b/tests/integration/features/cleanup_int_test.go @@ -4,7 +4,7 @@ import ( "context" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" + k8serr "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -81,7 +81,7 @@ var _ = Describe("feature cleanup", func() { WithContext(ctx). WithTimeout(fixtures.Timeout). WithPolling(fixtures.Interval). - Should(WithTransform(errors.IsNotFound, BeTrue())) + Should(WithTransform(k8serr.IsNotFound, BeTrue())) }) }) @@ -154,11 +154,11 @@ var _ = Describe("feature cleanup", func() { WithContext(ctx). WithTimeout(fixtures.Timeout). WithPolling(fixtures.Interval). - Should(WithTransform(errors.IsNotFound, BeTrue())) + Should(WithTransform(k8serr.IsNotFound, BeTrue())) Consistently(func() error { _, err := fixtures.GetFeatureTracker(ctx, envTestClient, namespace, featureName) - if errors.IsNotFound(err) { + if k8serr.IsNotFound(err) { return nil } return err @@ -213,7 +213,7 @@ func createdSecretHasOwnerReferenceToOwningFeature(namespace, featureName string func namespaceExists(ctx context.Context, cli client.Client, f *feature.Feature) (bool, error) { namespace, err := fixtures.GetNamespace(ctx, cli, "conditional-ns") - if errors.IsNotFound(err) { + if k8serr.IsNotFound(err) { return false, nil } // ensuring it fails if namespace is still deleting diff --git a/tests/integration/features/fixtures/cluster_test_fixtures.go b/tests/integration/features/fixtures/cluster_test_fixtures.go index 0837050fd5a..47b17414274 100644 --- a/tests/integration/features/fixtures/cluster_test_fixtures.go +++ b/tests/integration/features/fixtures/cluster_test_fixtures.go @@ -79,6 +79,28 @@ func GetService(ctx context.Context, client client.Client, namespace, name strin return svc, err } +func CreateService(ctx context.Context, client client.Client, namespace, svcName string) (*corev1.Service, error) { + if err := client.Create(ctx, &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: svcName, + Namespace: namespace, + }, + Spec: corev1.ServiceSpec{ + Selector: map[string]string{ + "name": "istio-operator", + }, + Ports: []corev1.ServicePort{ + { + Port: 443, + }, + }, + }, + }); err != nil { + return nil, err + } + return GetService(ctx, client, namespace, svcName) +} + func CreateSecret(name, namespace string) feature.Action { return func(ctx context.Context, cli client.Client, f *feature.Feature) error { secret := &corev1.Secret{ diff --git a/tests/integration/features/servicemesh_feature_test.go b/tests/integration/features/servicemesh_feature_test.go index 16a7b419063..6cb5ec5cbf2 100644 --- a/tests/integration/features/servicemesh_feature_test.go +++ b/tests/integration/features/servicemesh_feature_test.go @@ -4,8 +4,9 @@ import ( "context" "path" + corev1 "k8s.io/api/core/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" - "k8s.io/apimachinery/pkg/api/errors" + k8serr "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/util/yaml" "sigs.k8s.io/controller-runtime/pkg/client" @@ -49,9 +50,11 @@ var _ = Describe("Service Mesh setup", func() { Context("operator setup", func() { - When("operator is not installed", func() { + Context("operator is not installed", Ordered, func() { - It("should fail using precondition check", func(ctx context.Context) { + var smcpCrdObj *apiextensionsv1.CustomResourceDefinition + + It("should fail using precondition subscription check", func(ctx context.Context) { // given featuresHandler := feature.ClusterFeaturesHandler(dsci, func(registry feature.FeaturesRegistry) error { errFeatureAdd := registry.Add(feature.Define("no-service-mesh-operator-check"). @@ -69,19 +72,72 @@ var _ = Describe("Service Mesh setup", func() { // then Expect(applyErr).To(MatchError(ContainSubstring("failed to find the pre-requisite operator subscription \"servicemeshoperator\""))) }) + + It("should fail using precondition CRD check", func(ctx context.Context) { + // given + err := fixtures.CreateSubscription(ctx, envTestClient, "openshift-operators", fixtures.OssmSubscription) + Expect(err).ToNot(HaveOccurred()) + + featuresHandler := feature.ClusterFeaturesHandler(dsci, func(registry feature.FeaturesRegistry) error { + errFeatureAdd := registry.Add(feature.Define("no-service-mesh-crd-check"). + PreConditions(servicemesh.EnsureServiceMeshOperatorInstalled), + ) + + Expect(errFeatureAdd).ToNot(HaveOccurred()) + + return nil + }) + + // when + applyErr := featuresHandler.Apply(ctx, envTestClient) + + // then + Expect(applyErr).To(MatchError(ContainSubstring("failed to find the Service Mesh Control Plane CRD"))) + }) + + It("should fail using precondition service check", func(ctx context.Context) { + // given + smcpCrdObj = installServiceMeshCRD(ctx) + + featuresHandler := feature.ClusterFeaturesHandler(dsci, func(registry feature.FeaturesRegistry) error { + errFeatureAdd := registry.Add(feature.Define("no-service-mesh-service-check"). + PreConditions(servicemesh.EnsureServiceMeshOperatorInstalled), + ) + + Expect(errFeatureAdd).ToNot(HaveOccurred()) + + return nil + }) + + // when + applyErr := featuresHandler.Apply(ctx, envTestClient) + + // then + Expect(applyErr).To(MatchError(ContainSubstring("failed to find the Service Mesh VWC service"))) + }) + + AfterAll(func(ctx context.Context) { + objectCleaner.DeleteAll(ctx, smcpCrdObj) + }) }) When("operator is installed", func() { var smcpCrdObj *apiextensionsv1.CustomResourceDefinition + var svc *corev1.Service BeforeEach(func(ctx context.Context) { err := fixtures.CreateSubscription(ctx, envTestClient, "openshift-operators", fixtures.OssmSubscription) Expect(err).ToNot(HaveOccurred()) + smcpCrdObj = installServiceMeshCRD(ctx) + + svc, err = fixtures.CreateService(ctx, envTestClient, "openshift-operators", "istio-operator-service") + Expect(err).ToNot(HaveOccurred()) + }) AfterEach(func(ctx context.Context) { - objectCleaner.DeleteAll(ctx, smcpCrdObj) + objectCleaner.DeleteAll(ctx, smcpCrdObj, svc) }) It("should succeed using precondition check", func(ctx context.Context) { @@ -251,7 +307,7 @@ var _ = Describe("Service Mesh setup", func() { Expect(found).To(BeTrue()) _, err = fixtures.GetNamespace(ctx, envTestClient, serviceMeshSpec.Auth.Namespace) - Expect(errors.IsNotFound(err)).To(BeTrue()) + Expect(k8serr.IsNotFound(err)).To(BeTrue()) return extensionProviders