diff --git a/apis/components/v1/codeflare_types.go b/apis/components/v1/codeflare_types.go index c403bf9341a..e29f4f6339d 100644 --- a/apis/components/v1/codeflare_types.go +++ b/apis/components/v1/codeflare_types.go @@ -21,17 +21,12 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! -// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. - -// CodeFlareSpec defines the desired state of CodeFlare -type CodeFlareSpec struct { - // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster - // Important: Run "make" to regenerate code after modifying this file - - // Foo is an example field of CodeFlare. Edit codeflare_types.go to remove/update - Foo string `json:"foo,omitempty"` -} +const ( + CodeFlareComponentName = "codeflare" + // value should match whats set in the XValidation below + CodeFlareInstanceName = "default-codeflare" + CodeFlareKind = "CodeFlare" +) // CodeFlareStatus defines the observed state of CodeFlare type CodeFlareStatus struct { @@ -41,6 +36,9 @@ type CodeFlareStatus struct { // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:resource:scope=Cluster +// +kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.conditions[?(@.type=="Ready")].status`,description="Ready" +// +kubebuilder:printcolumn:name="Reason",type=string,JSONPath=`.status.conditions[?(@.type=="Ready")].reason`,description="Reason" +// +kubebuilder:validation:XValidation:rule="self.metadata.name == 'default-codeflare'",message="CodeFlare name must be default-codeflare" // CodeFlare is the Schema for the codeflares API type CodeFlare struct { @@ -51,14 +49,26 @@ type CodeFlare struct { Status CodeFlareStatus `json:"status,omitempty"` } +type CodeFlareSpec struct { + CodeFlareCommonSpec `json:",inline"` +} + +type CodeFlareCommonSpec struct { + components.DevFlagsSpec `json:",inline"` +} + func (c *CodeFlare) GetDevFlags() *components.DevFlags { - return nil + return c.Spec.DevFlags } func (c *CodeFlare) GetStatus() *components.Status { return &c.Status.Status } +func init() { + SchemeBuilder.Register(&CodeFlare{}, &CodeFlareList{}) +} + // +kubebuilder:object:root=true // CodeFlareList contains a list of CodeFlare @@ -71,3 +81,8 @@ type CodeFlareList struct { func init() { SchemeBuilder.Register(&CodeFlare{}, &CodeFlareList{}) } + +type DSCCodeFlare struct { + components.ManagementSpec `json:",inline"` + CodeFlareCommonSpec `json:",inline"` +} diff --git a/apis/components/v1/zz_generated.deepcopy.go b/apis/components/v1/zz_generated.deepcopy.go index ae2d20b9829..06648b36dd4 100644 --- a/apis/components/v1/zz_generated.deepcopy.go +++ b/apis/components/v1/zz_generated.deepcopy.go @@ -29,7 +29,7 @@ func (in *CodeFlare) DeepCopyInto(out *CodeFlare) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.Spec = in.Spec + in.Spec.DeepCopyInto(&out.Spec) in.Status.DeepCopyInto(&out.Status) } @@ -51,6 +51,22 @@ func (in *CodeFlare) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CodeFlareCommonSpec) DeepCopyInto(out *CodeFlareCommonSpec) { + *out = *in + in.DevFlagsSpec.DeepCopyInto(&out.DevFlagsSpec) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CodeFlareCommonSpec. +func (in *CodeFlareCommonSpec) DeepCopy() *CodeFlareCommonSpec { + if in == nil { + return nil + } + out := new(CodeFlareCommonSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *CodeFlareList) DeepCopyInto(out *CodeFlareList) { *out = *in @@ -86,6 +102,7 @@ func (in *CodeFlareList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *CodeFlareSpec) DeepCopyInto(out *CodeFlareSpec) { *out = *in + in.CodeFlareCommonSpec.DeepCopyInto(&out.CodeFlareCommonSpec) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CodeFlareSpec. @@ -114,6 +131,23 @@ func (in *CodeFlareStatus) DeepCopy() *CodeFlareStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DSCCodeFlare) DeepCopyInto(out *DSCCodeFlare) { + *out = *in + out.ManagementSpec = in.ManagementSpec + in.CodeFlareCommonSpec.DeepCopyInto(&out.CodeFlareCommonSpec) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DSCCodeFlare. +func (in *DSCCodeFlare) DeepCopy() *DSCCodeFlare { + if in == nil { + return nil + } + out := new(DSCCodeFlare) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DSCDashboard) DeepCopyInto(out *DSCDashboard) { *out = *in diff --git a/apis/datasciencecluster/v1/datasciencecluster_types.go b/apis/datasciencecluster/v1/datasciencecluster_types.go index f31be96c96c..2267dbe43a1 100644 --- a/apis/datasciencecluster/v1/datasciencecluster_types.go +++ b/apis/datasciencecluster/v1/datasciencecluster_types.go @@ -26,7 +26,6 @@ import ( componentsv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1" "github.com/opendatahub-io/opendatahub-operator/v2/components" - "github.com/opendatahub-io/opendatahub-operator/v2/components/codeflare" "github.com/opendatahub-io/opendatahub-operator/v2/components/kserve" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" "github.com/opendatahub-io/opendatahub-operator/v2/components/workbenches" @@ -65,7 +64,7 @@ type Components struct { // CodeFlare component configuration. // If CodeFlare Operator has been installed in the cluster, it should be uninstalled first before enabled component. - CodeFlare codeflare.CodeFlare `json:"codeflare,omitempty"` + CodeFlare componentsv1.DSCCodeFlare `json:"codeflare,omitempty"` // Ray component configuration. Ray componentsv1.DSCRay `json:"ray,omitempty"` diff --git a/components/codeflare/codeflare.go b/components/codeflare/codeflare.go deleted file mode 100644 index 5e731c28ba4..00000000000 --- a/components/codeflare/codeflare.go +++ /dev/null @@ -1,140 +0,0 @@ -// Package codeflare provides utility functions to config CodeFlare as part of the stack -// which makes managing distributed compute infrastructure in the cloud easy and intuitive for Data Scientists -// +groupName=datasciencecluster.opendatahub.io -package codeflare - -import ( - "context" - "fmt" - "path/filepath" - - "github.com/go-logr/logr" - operatorv1 "github.com/openshift/api/operator/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/controller-runtime/pkg/client" - logf "sigs.k8s.io/controller-runtime/pkg/log" - - dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" - "github.com/opendatahub-io/opendatahub-operator/v2/components" - "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" - "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" -) - -var ( - ComponentName = "codeflare" - CodeflarePath = deploy.DefaultManifestPath + "/" + ComponentName + "/default" - CodeflareOperator = "codeflare-operator" - ParamsPath = deploy.DefaultManifestPath + "/" + ComponentName + "/manager" -) - -// Verifies that CodeFlare implements ComponentInterface. -var _ components.ComponentInterface = (*CodeFlare)(nil) - -// CodeFlare struct holds the configuration for the CodeFlare component. -// +kubebuilder:object:generate=true -type CodeFlare struct { - components.Component `json:""` -} - -func (c *CodeFlare) Init(ctx context.Context, _ cluster.Platform) error { - log := logf.FromContext(ctx).WithName(ComponentName) - - var imageParamMap = map[string]string{ - "codeflare-operator-controller-image": "RELATED_IMAGE_ODH_CODEFLARE_OPERATOR_IMAGE", // no need mcad, embedded in cfo - } - - if err := deploy.ApplyParams(ParamsPath, imageParamMap); err != nil { - log.Error(err, "failed to update image", "path", CodeflarePath+"/bases") - } - - return nil -} - -func (c *CodeFlare) OverrideManifests(ctx context.Context, _ cluster.Platform) error { - // If devflags are set, update default manifests path - if len(c.DevFlags.Manifests) != 0 { - manifestConfig := c.DevFlags.Manifests[0] - if err := deploy.DownloadManifests(ctx, ComponentName, manifestConfig); err != nil { - return err - } - // If overlay is defined, update paths - defaultKustomizePath := "default" - if manifestConfig.SourcePath != "" { - defaultKustomizePath = manifestConfig.SourcePath - } - CodeflarePath = filepath.Join(deploy.DefaultManifestPath, ComponentName, defaultKustomizePath) - } - - return nil -} - -func (c *CodeFlare) GetComponentName() string { - return ComponentName -} - -func (c *CodeFlare) ReconcileComponent(ctx context.Context, - cli client.Client, - l logr.Logger, - owner metav1.Object, - dscispec *dsciv1.DSCInitializationSpec, - platform cluster.Platform, - _ bool) error { - enabled := c.GetManagementState() == operatorv1.Managed - monitoringEnabled := dscispec.Monitoring.ManagementState == operatorv1.Managed - - if enabled { - if c.DevFlags != nil { - // Download manifests and update paths - if err := c.OverrideManifests(ctx, platform); err != nil { - return err - } - } - // check if the CodeFlare operator is installed: it should not be installed - // Both ODH and RHOAI should have the same operator name - dependentOperator := CodeflareOperator - - if found, err := cluster.OperatorExists(ctx, cli, dependentOperator); err != nil { - return fmt.Errorf("operator exists throws error %w", err) - } else if found { - return fmt.Errorf("operator %s is found. Please uninstall the operator before enabling %s component", - dependentOperator, ComponentName) - } - - // It updates stock manifests, overridden manifests should contain proper namespace - if err := deploy.ApplyParams(ParamsPath, nil, map[string]string{"namespace": dscispec.ApplicationsNamespace}); err != nil { - return fmt.Errorf("failed update image from %s : %w", CodeflarePath+"/bases", err) - } - } - - // Deploy Codeflare - if err := deploy.DeployManifestsFromPath(ctx, cli, owner, //nolint:revive,nolintlint - CodeflarePath, - dscispec.ApplicationsNamespace, - ComponentName, enabled); err != nil { - return err - } - l.Info("apply manifests done") - - if enabled { - if err := cluster.WaitForDeploymentAvailable(ctx, cli, ComponentName, dscispec.ApplicationsNamespace, 20, 2); err != nil { - return fmt.Errorf("deployment for %s is not ready to server: %w", ComponentName, err) - } - } - - // CloudServiceMonitoring handling - if platform == cluster.ManagedRhods { - // inject prometheus codeflare*.rules in to /opt/manifests/monitoring/prometheus/prometheus-configs.yaml - if err := c.UpdatePrometheusConfig(cli, l, enabled && monitoringEnabled, ComponentName); err != nil { - return err - } - if err := deploy.DeployManifestsFromPath(ctx, cli, owner, - filepath.Join(deploy.DefaultManifestPath, "monitoring", "prometheus", "apps"), - dscispec.Monitoring.Namespace, - "prometheus", true); err != nil { - return err - } - l.Info("updating SRE monitoring done") - } - - return nil -} diff --git a/components/codeflare/zz_generated.deepcopy.go b/components/codeflare/zz_generated.deepcopy.go deleted file mode 100644 index f761b2dbbd5..00000000000 --- a/components/codeflare/zz_generated.deepcopy.go +++ /dev/null @@ -1,39 +0,0 @@ -//go:build !ignore_autogenerated - -/* -Copyright 2023. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// Code generated by controller-gen. DO NOT EDIT. - -package codeflare - -import () - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *CodeFlare) DeepCopyInto(out *CodeFlare) { - *out = *in - in.Component.DeepCopyInto(&out.Component) -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CodeFlare. -func (in *CodeFlare) DeepCopy() *CodeFlare { - if in == nil { - return nil - } - out := new(CodeFlare) - in.DeepCopyInto(out) - return out -} diff --git a/config/crd/bases/components.opendatahub.io_codeflares.yaml b/config/crd/bases/components.opendatahub.io_codeflares.yaml index 5dd73ac04c8..ddfc9f4ac64 100644 --- a/config/crd/bases/components.opendatahub.io_codeflares.yaml +++ b/config/crd/bases/components.opendatahub.io_codeflares.yaml @@ -14,7 +14,16 @@ spec: singular: codeflare scope: Cluster versions: - - name: v1 + - additionalPrinterColumns: + - description: Ready + jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + - description: Reason + jsonPath: .status.conditions[?(@.type=="Ready")].reason + name: Reason + type: string + name: v1 schema: openAPIV3Schema: description: CodeFlare is the Schema for the codeflares API @@ -37,12 +46,33 @@ spec: metadata: type: object spec: - description: CodeFlareSpec defines the desired state of CodeFlare properties: - foo: - description: Foo is an example field of CodeFlare. Edit codeflare_types.go - to remove/update - type: string + devFlags: + description: Add developer fields + properties: + manifests: + description: List of custom manifests for the given component + items: + properties: + contextDir: + default: manifests + description: contextDir is the relative path to the folder + containing manifests in a repository, default value "manifests" + type: string + sourcePath: + default: "" + description: 'sourcePath is the subpath within contextDir + where kustomize builds start. Examples include any sub-folder + or path: `base`, `overlays/dev`, `default`, `odh` etc.' + type: string + uri: + default: "" + description: uri is the URI point to a git repo with tag/branch. + e.g. https://github.com/org/repo/tarball/ + type: string + type: object + type: array + type: object type: object status: description: CodeFlareStatus defines the observed state of CodeFlare @@ -110,6 +140,9 @@ spec: type: string type: object type: object + x-kubernetes-validations: + - message: CodeFlare name must be default-codeflare + rule: self.metadata.name == 'default-codeflare' served: true storage: true subresources: diff --git a/controllers/components/codeflare/codeflare.go b/controllers/components/codeflare/codeflare.go new file mode 100644 index 00000000000..9d83b4c04dc --- /dev/null +++ b/controllers/components/codeflare/codeflare.go @@ -0,0 +1,70 @@ +package codeflare + +import ( + "fmt" + + operatorv1 "github.com/openshift/api/operator/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + componentsv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1" + dscv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/datasciencecluster/v1" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" + cr "github.com/opendatahub-io/opendatahub-operator/v2/pkg/componentsregistry" + odhdeploy "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/annotations" +) + +const ( + ComponentName = componentsv1.CodeFlareComponentName +) + +var ( + DefaultPath = odhdeploy.DefaultManifestPath + "/" + ComponentName + "/default" // same path for both odh and rhoai +) + +type componentHandler struct{} + +func init() { //nolint:gochecknoinits + cr.Add(&componentHandler{}) +} +func (s *componentHandler) GetName() string { + return componentsv1.CodeFlareComponentName +} +func (s *componentHandler) GetManagementState(dsc *dscv1.DataScienceCluster) operatorv1.ManagementState { + if dsc.Spec.Components.CodeFlare.ManagementState == operatorv1.Managed { + return operatorv1.Managed + } + return operatorv1.Removed +} + +func (s *componentHandler) NewCRObject(dsc *dscv1.DataScienceCluster) client.Object { + codeflareAnnotations := make(map[string]string) + codeflareAnnotations[annotations.ManagementStateAnnotation] = string(s.GetManagementState(dsc)) + + return client.Object(&componentsv1.CodeFlare{ + TypeMeta: metav1.TypeMeta{ + Kind: componentsv1.CodeFlareKind, + APIVersion: componentsv1.GroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: componentsv1.CodeFlareInstanceName, + Annotations: codeflareAnnotations, + }, + Spec: componentsv1.CodeFlareSpec{ + CodeFlareCommonSpec: dsc.Spec.Components.CodeFlare.CodeFlareCommonSpec, + }, + }) +} + +func (s *componentHandler) Init(platform cluster.Platform) error { + imageParamMap := map[string]string{ + "codeflare-operator-controller-image": "RELATED_IMAGE_ODH_CODEFLARE_OPERATOR_IMAGE", + } + + if err := odhdeploy.ApplyParams(DefaultPath, imageParamMap); err != nil { + return fmt.Errorf("failed to update images on path %s: %w", DefaultPath, err) + } + + return nil +} diff --git a/controllers/components/codeflare/codeflare_controller.go b/controllers/components/codeflare/codeflare_controller.go index 301c1f01d9d..65c5e60563c 100644 --- a/controllers/components/codeflare/codeflare_controller.go +++ b/controllers/components/codeflare/codeflare_controller.go @@ -19,40 +19,62 @@ package codeflare import ( "context" - "k8s.io/apimachinery/pkg/runtime" + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/log" componentsv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/updatestatus" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/predicates/resources" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels" ) // CodeFlareReconciler reconciles a CodeFlare object. -type CodeFlareReconciler struct { - client.Client - Scheme *runtime.Scheme -} -// Reconcile is part of the main kubernetes reconciliation loop which aims to -// move the current state of the cluster closer to the desired state. -// TODO(user): Modify the Reconcile function to compare the state specified by -// the CodeFlare object against the actual cluster state, and then -// perform operations to make the cluster state reflect the state specified by -// the user. -// -// For more details, check Reconcile and its Result here: -// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.12.2/pkg/reconcile -func (r *CodeFlareReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - _ = log.FromContext(ctx) - - // TODO(user): your logic here - - return ctrl.Result{}, nil -} +func (s *componentHandler) NewComponentReconciler(ctx context.Context, mgr ctrl.Manager) error { + _, err := reconciler.ComponentReconcilerFor( + mgr, + &componentsv1.CodeFlare{}, + ). + // customized Owns() for Component with new predicates + Owns(&corev1.ConfigMap{}). + Owns(&corev1.Secret{}). + Owns(&rbacv1.ClusterRoleBinding{}). + Owns(&rbacv1.ClusterRole{}). + Owns(&rbacv1.Role{}). + Owns(&rbacv1.RoleBinding{}). + Owns(&corev1.ServiceAccount{}). + Owns(&corev1.Service{}). + Owns(&admissionregistrationv1.MutatingWebhookConfiguration{}). + Owns(&admissionregistrationv1.ValidatingWebhookConfiguration{}). + Owns(&appsv1.Deployment{}, reconciler.WithPredicates(resources.NewDeploymentPredicate())). + Watches(&extv1.CustomResourceDefinition{}). + // Add CodeFlare-specific actions + WithAction(initialize). + WithAction(devFlags). + WithAction(kustomize.NewAction( + kustomize.WithCache(), + kustomize.WithLabel(labels.ODH.Component(ComponentName), "true"), + kustomize.WithLabel(labels.K8SCommon.PartOf, ComponentName), + )). + WithAction(deploy.NewAction( + deploy.WithCache(), + )). + WithAction(updatestatus.NewAction()). + // must be final action + WithAction(gc.NewAction()). + Build(ctx) + + if err != nil { + return err // no need customize error, it is done in the caller main + } -// SetupWithManager sets up the controller with the Manager. -func (r *CodeFlareReconciler) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - For(&componentsv1.CodeFlare{}). - Complete(r) + return nil } diff --git a/controllers/components/codeflare/codeflare_controller_actions.go b/controllers/components/codeflare/codeflare_controller_actions.go new file mode 100644 index 00000000000..6531ca84442 --- /dev/null +++ b/controllers/components/codeflare/codeflare_controller_actions.go @@ -0,0 +1,48 @@ +package codeflare + +import ( + "context" + "fmt" + + componentsv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1" + odhtypes "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/types" + odhdeploy "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" +) + +func initialize(ctx context.Context, rr *odhtypes.ReconciliationRequest) error { + rr.Manifests = append(rr.Manifests, odhtypes.ManifestInfo{ + Path: DefaultPath, + ContextDir: "", + SourcePath: "", + }) + if err := odhdeploy.ApplyParams(DefaultPath, nil, map[string]string{"namespace": rr.DSCI.Spec.ApplicationsNamespace}); err != nil { + return fmt.Errorf("failed to update params.env from %s : %w", rr.Manifests[0], err) + } + return nil +} + +func devFlags(ctx context.Context, rr *odhtypes.ReconciliationRequest) error { + codeflare, ok := rr.Instance.(*componentsv1.CodeFlare) + if !ok { + return fmt.Errorf("resource instance %v is not a componentsv1.CodeFlare)", rr.Instance) + } + + if codeflare.Spec.DevFlags == nil { + return nil + } + // Implement devflags support logic + // If dev flags are set, update default manifests path + if len(codeflare.Spec.DevFlags.Manifests) != 0 { + manifestConfig := codeflare.Spec.DevFlags.Manifests[0] + if err := odhdeploy.DownloadManifests(ctx, ComponentName, manifestConfig); err != nil { + return err + } + if manifestConfig.SourcePath != "" { + rr.Manifests[0].Path = odhdeploy.DefaultManifestPath + rr.Manifests[0].ContextDir = ComponentName + rr.Manifests[0].SourcePath = manifestConfig.SourcePath + } + } + + return nil +} diff --git a/controllers/datasciencecluster/datasciencecluster_controller.go b/controllers/datasciencecluster/datasciencecluster_controller.go index f0acd2428e6..ae1a9157f5c 100644 --- a/controllers/datasciencecluster/datasciencecluster_controller.go +++ b/controllers/datasciencecluster/datasciencecluster_controller.go @@ -491,6 +491,7 @@ func (r *DataScienceClusterReconciler) SetupWithManager(ctx context.Context, mgr Owns(&componentsv1.ModelRegistry{}). Owns(&componentsv1.TrustyAI{}). Owns(&componentsv1.Kueue{}). + Owns(&componentsv1.CodeFlare{}). Owns(&componentsv1.TrainingOperator{}). Owns(&componentsv1.DataSciencePipelines{}). Owns( diff --git a/controllers/datasciencecluster/kubebuilder_rbac.go b/controllers/datasciencecluster/kubebuilder_rbac.go index 22a309194f5..9ca599a13ca 100644 --- a/controllers/datasciencecluster/kubebuilder_rbac.go +++ b/controllers/datasciencecluster/kubebuilder_rbac.go @@ -157,7 +157,7 @@ package datasciencecluster // +kubebuilder:rbac:groups="monitoring.coreos.com",resources=prometheusrules,verbs=get;create;patch;delete;deletecollection;list;watch // +kubebuilder:rbac:groups="monitoring.coreos.com",resources=podmonitors,verbs=get;create;delete;update;watch;list;patch -// TODO: CFO +// CFO //+kubebuilder:rbac:groups=components.opendatahub.io,resources=codeflares,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=components.opendatahub.io,resources=codeflares/status,verbs=get;update;patch //+kubebuilder:rbac:groups=components.opendatahub.io,resources=codeflares/finalizers,verbs=update diff --git a/controllers/webhook/webhook_suite_test.go b/controllers/webhook/webhook_suite_test.go index e6a282c4fc1..9236129e98c 100644 --- a/controllers/webhook/webhook_suite_test.go +++ b/controllers/webhook/webhook_suite_test.go @@ -45,7 +45,6 @@ import ( dscv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/datasciencecluster/v1" dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" componentsold "github.com/opendatahub-io/opendatahub-operator/v2/components" - "github.com/opendatahub-io/opendatahub-operator/v2/components/codeflare" "github.com/opendatahub-io/opendatahub-operator/v2/components/kserve" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" "github.com/opendatahub-io/opendatahub-operator/v2/components/workbenches" @@ -284,8 +283,8 @@ func newDSC(name string, namespace string) *dscv1.DataScienceCluster { ManagementState: operatorv1.Removed, }, }, - CodeFlare: codeflare.CodeFlare{ - Component: componentsold.Component{ + CodeFlare: componentsv1.DSCCodeFlare{ + ManagementSpec: components.ManagementSpec{ ManagementState: operatorv1.Removed, }, }, diff --git a/docs/api-overview.md b/docs/api-overview.md index f1078eff10d..9288d153041 100644 --- a/docs/api-overview.md +++ b/docs/api-overview.md @@ -58,6 +58,23 @@ _Appears in:_ | `status` _[CodeFlareStatus](#codeflarestatus)_ | | | | +#### CodeFlareCommonSpec + + + + + + + +_Appears in:_ +- [CodeFlareSpec](#codeflarespec) +- [DSCCodeFlare](#dsccodeflare) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `devFlags` _[DevFlags](#devflags)_ | Add developer fields | | | + + #### CodeFlareList @@ -82,7 +99,7 @@ CodeFlareList contains a list of CodeFlare -CodeFlareSpec defines the desired state of CodeFlare + @@ -91,7 +108,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `foo` _string_ | Foo is an example field of CodeFlare. Edit codeflare_types.go to remove/update | | | +| `devFlags` _[DevFlags](#devflags)_ | Add developer fields | | | #### CodeFlareStatus @@ -112,6 +129,23 @@ _Appears in:_ | `observedGeneration` _integer_ | | | | +#### DSCCodeFlare + + + + + + + +_Appears in:_ +- [Components](#components) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `managementState` _[ManagementState](#managementstate)_ | Set to one of the following values:

- "Managed" : the operator is actively managing the component and trying to keep it active.
It will only upgrade the component if it is safe to do so

- "Removed" : the operator is actively managing the component and will not install it,
or if it is installed, the operator will try to remove it | | Enum: [Managed Removed]
| +| `devFlags` _[DevFlags](#devflags)_ | Add developer fields | | | + + #### DSCDashboard @@ -1133,30 +1167,6 @@ _Appears in:_ -## datasciencecluster.opendatahub.io/codeflare - -Package codeflare provides utility functions to config CodeFlare as part of the stack -which makes managing distributed compute infrastructure in the cloud easy and intuitive for Data Scientists - - - -#### CodeFlare - - - -CodeFlare struct holds the configuration for the CodeFlare component. - - - -_Appears in:_ -- [Components](#components) - -| Field | Description | Default | Validation | -| --- | --- | --- | --- | -| `Component` _[Component](#component)_ | | | | - - - ## datasciencecluster.opendatahub.io/components @@ -1171,7 +1181,6 @@ Component struct defines the basis for each OpenDataHub component configuration. _Appears in:_ -- [CodeFlare](#codeflare) - [Kserve](#kserve) - [ModelMeshServing](#modelmeshserving) - [Workbenches](#workbenches) @@ -1212,7 +1221,10 @@ DevFlagsSpec struct defines the component's dev flags configuration. _Appears in:_ +- [CodeFlareCommonSpec](#codeflarecommonspec) +- [CodeFlareSpec](#codeflarespec) - [Component](#component) +- [DSCCodeFlare](#dsccodeflare) - [DSCDashboard](#dscdashboard) - [DSCDataSciencePipelines](#dscdatasciencepipelines) - [DSCKueue](#dsckueue) @@ -1250,6 +1262,7 @@ ManagementSpec struct defines the component's management configuration. _Appears in:_ - [Component](#component) +- [DSCCodeFlare](#dsccodeflare) - [DSCDashboard](#dscdashboard) - [DSCDataSciencePipelines](#dscdatasciencepipelines) - [DSCKueue](#dsckueue) @@ -1440,7 +1453,7 @@ _Appears in:_ | `datasciencepipelines` _[DSCDataSciencePipelines](#dscdatasciencepipelines)_ | DataServicePipeline component configuration.
Require OpenShift Pipelines Operator to be installed before enable component | | | | `kserve` _[Kserve](#kserve)_ | Kserve component configuration.
Require OpenShift Serverless and OpenShift Service Mesh Operators to be installed before enable component
Does not support enabled ModelMeshServing at the same time | | | | `kueue` _[DSCKueue](#dsckueue)_ | Kueue component configuration. | | | -| `codeflare` _[CodeFlare](#codeflare)_ | CodeFlare component configuration.
If CodeFlare Operator has been installed in the cluster, it should be uninstalled first before enabled component. | | | +| `codeflare` _[DSCCodeFlare](#dsccodeflare)_ | CodeFlare component configuration.
If CodeFlare Operator has been installed in the cluster, it should be uninstalled first before enabled component. | | | | `ray` _[DSCRay](#dscray)_ | Ray component configuration. | | | | `trustyai` _[DSCTrustyAI](#dsctrustyai)_ | TrustyAI component configuration. | | | | `modelregistry` _[DSCModelRegistry](#dscmodelregistry)_ | ModelRegistry component configuration. | | | diff --git a/main.go b/main.go index 3ffd829e571..ea00977df5f 100644 --- a/main.go +++ b/main.go @@ -77,6 +77,7 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/pkg/services/gc" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/upgrade" + _ "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/codeflare" _ "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/dashboard" _ "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/datasciencepipelines" _ "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/kueue" diff --git a/pkg/upgrade/upgrade.go b/pkg/upgrade/upgrade.go index 84d1e214c63..f454803b76f 100644 --- a/pkg/upgrade/upgrade.go +++ b/pkg/upgrade/upgrade.go @@ -32,7 +32,6 @@ import ( featuresv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/features/v1" infrav1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/infrastructure/v1" componentsold "github.com/opendatahub-io/opendatahub-operator/v2/components" - "github.com/opendatahub-io/opendatahub-operator/v2/components/codeflare" "github.com/opendatahub-io/opendatahub-operator/v2/components/kserve" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" "github.com/opendatahub-io/opendatahub-operator/v2/components/workbenches" @@ -78,8 +77,8 @@ func CreateDefaultDSC(ctx context.Context, cli client.Client) error { Kserve: kserve.Kserve{ Component: componentsold.Component{ManagementState: operatorv1.Managed}, }, - CodeFlare: codeflare.CodeFlare{ - Component: componentsold.Component{ManagementState: operatorv1.Managed}, + CodeFlare: componentsv1.DSCCodeFlare{ + ManagementSpec: components.ManagementSpec{ManagementState: operatorv1.Managed}, }, Ray: componentsv1.DSCRay{ ManagementSpec: components.ManagementSpec{ManagementState: operatorv1.Managed}, diff --git a/tests/e2e/codeflare_test.go b/tests/e2e/codeflare_test.go new file mode 100644 index 00000000000..10dafa25611 --- /dev/null +++ b/tests/e2e/codeflare_test.go @@ -0,0 +1,281 @@ +package e2e_test + +import ( + "context" + "errors" + "fmt" + "reflect" + "strings" + "testing" + "time" + + operatorv1 "github.com/openshift/api/operator/v1" + "github.com/stretchr/testify/require" + autoscalingv1 "k8s.io/api/autoscaling/v1" + k8serr "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" + "sigs.k8s.io/controller-runtime/pkg/client" + + componentsv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels" +) + +type CodeFlareTestCtx struct { + testCtx *testContext + testCodeFlareInstance componentsv1.CodeFlare +} + +func codeflareTestSuite(t *testing.T) { + t.Helper() + + codeflareCtx := CodeFlareTestCtx{} + var err error + codeflareCtx.testCtx, err = NewTestContext() + require.NoError(t, err) + + testCtx := codeflareCtx.testCtx + + t.Run(testCtx.testDsc.Name, func(t *testing.T) { + // creation + t.Run("Creation of CodeFlare CR", func(t *testing.T) { + err = codeflareCtx.testCodeFlareCreation() + require.NoError(t, err, "error creating CodeFlare CR") + }) + + t.Run("Validate CodeFlare instance", func(t *testing.T) { + err = codeflareCtx.validateCodeFlare() + require.NoError(t, err, "error validating CodeFlare instance") + }) + + t.Run("Validate Ownerreferences exist", func(t *testing.T) { + err = codeflareCtx.testOwnerReferences() + require.NoError(t, err, "error getting all CodeFlare's Ownerreferences") + }) + + t.Run("Validate CodeFlare Ready", func(t *testing.T) { + err = codeflareCtx.validateCodeFlareReady() + require.NoError(t, err, "CodeFlare instance is not Ready") + }) + + // reconcile + t.Run("Validate Controller reconcile", func(t *testing.T) { + err = codeflareCtx.testUpdateOnCodeFlareResources() + require.NoError(t, err, "error testing updates for CodeFlare's managed resources") + }) + + t.Run("Validate Disabling CodeFlare Component", func(t *testing.T) { + err = codeflareCtx.testUpdateCodeFlareComponentDisabled() + require.NoError(t, err, "error testing CodeFlare component enabled field") + }) + }) +} + +func (tc *CodeFlareTestCtx) testCodeFlareCreation() error { + if tc.testCtx.testDsc.Spec.Components.CodeFlare.ManagementState != operatorv1.Managed { + return nil + } + + err := tc.testCtx.wait(func(ctx context.Context) (bool, error) { + existingCodeFlareList := &componentsv1.CodeFlareList{} + + if err := tc.testCtx.customClient.List(ctx, existingCodeFlareList); err != nil { + return false, err + } + + switch { + case len(existingCodeFlareList.Items) == 1: + tc.testCodeFlareInstance = existingCodeFlareList.Items[0] + return true, nil + case len(existingCodeFlareList.Items) > 1: + return false, fmt.Errorf( + "unexpected CodeFlare CR instances. Expected 1 , Found %v instance", len(existingCodeFlareList.Items)) + default: + return false, nil + } + }) + + if err != nil { + return fmt.Errorf("unable to find CodeFlare CR instance: %w", err) + } + + return nil +} + +func (tc *CodeFlareTestCtx) validateCodeFlare() error { + // CodeFlare spec should match the spec of CodeFlare component in DSC + if !reflect.DeepEqual(tc.testCtx.testDsc.Spec.Components.CodeFlare.CodeFlareCommonSpec, tc.testCodeFlareInstance.Spec.CodeFlareCommonSpec) { + err := fmt.Errorf("expected .spec for CodeFlare %v, got %v", + tc.testCtx.testDsc.Spec.Components.CodeFlare.CodeFlareCommonSpec, tc.testCodeFlareInstance.Spec.CodeFlareCommonSpec) + return err + } + return nil +} + +func (tc *CodeFlareTestCtx) testOwnerReferences() error { + if len(tc.testCodeFlareInstance.OwnerReferences) != 1 { + return errors.New("expect CR has ownerreferences set") + } + + // Test CodeFlare CR ownerref + if tc.testCodeFlareInstance.OwnerReferences[0].Kind != dscKind { + return fmt.Errorf("expected ownerreference DataScienceCluster not found. Got ownereferrence: %v", + tc.testCodeFlareInstance.OwnerReferences[0].Kind) + } + + // Test CodeFlare resources + appDeployments, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).List(tc.testCtx.ctx, metav1.ListOptions{ + LabelSelector: labels.ODH.Component(componentsv1.CodeFlareComponentName), + }) + if err != nil { + return fmt.Errorf("error listing component deployments %w", err) + } + // test any one deployment for ownerreference + if len(appDeployments.Items) != 0 && appDeployments.Items[0].OwnerReferences[0].Kind != componentsv1.CodeFlareKind { + return fmt.Errorf("expected ownerreference not found. Got ownereferrence: %v", + appDeployments.Items[0].OwnerReferences) + } + + return nil +} + +// Verify CodeFlare instance is in Ready phase when CodeFlare deployments are up and running. +func (tc *CodeFlareTestCtx) validateCodeFlareReady() error { + err := wait.PollUntilContextTimeout(tc.testCtx.ctx, generalRetryInterval, componentReadyTimeout, true, func(ctx context.Context) (bool, error) { + key := types.NamespacedName{Name: tc.testCodeFlareInstance.Name} + CodeFlare := &componentsv1.CodeFlare{} + + err := tc.testCtx.customClient.Get(ctx, key, CodeFlare) + if err != nil { + return false, err + } + return CodeFlare.Status.Phase == readyStatus, nil + }) + + if err != nil { + return fmt.Errorf("error waiting Ready state for CodeFlare %v: %w", tc.testCodeFlareInstance.Name, err) + } + + return nil +} + +func (tc *CodeFlareTestCtx) testUpdateOnCodeFlareResources() error { + // Test Updating CodeFlare Replicas + + appDeployments, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).List(tc.testCtx.ctx, metav1.ListOptions{ + LabelSelector: labels.ComponentPartOf + "=" + strings.ToLower(tc.testCodeFlareInstance.Kind), + }) + if err != nil { + return err + } + + if len(appDeployments.Items) != 1 { + return fmt.Errorf("error getting deployment for component %s", tc.testCodeFlareInstance.Name) + } + + const expectedReplica int32 = 2 // from 1 to 2 + + testDeployment := appDeployments.Items[0] + patchedReplica := &autoscalingv1.Scale{ + ObjectMeta: metav1.ObjectMeta{ + Name: testDeployment.Name, + Namespace: testDeployment.Namespace, + }, + Spec: autoscalingv1.ScaleSpec{ + Replicas: expectedReplica, + }, + Status: autoscalingv1.ScaleStatus{}, + } + updatedDep, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).UpdateScale(tc.testCtx.ctx, + testDeployment.Name, patchedReplica, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("error patching component resources : %w", err) + } + if updatedDep.Spec.Replicas != patchedReplica.Spec.Replicas { + return fmt.Errorf("failed to patch replicas : expect to be %v but got %v", patchedReplica.Spec.Replicas, updatedDep.Spec.Replicas) + } + + // Sleep for 20 seconds to allow the operator to reconcile + // we expect it should not revert back to original value because of AllowList + time.Sleep(2 * generalRetryInterval) + reconciledDep, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).Get(tc.testCtx.ctx, testDeployment.Name, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("error getting component resource after reconcile: %w", err) + } + if *reconciledDep.Spec.Replicas != expectedReplica { + return fmt.Errorf("failed to revert back replicas : expect to be %v but got %v", expectedReplica, *reconciledDep.Spec.Replicas) + } + + return nil +} + +func (tc *CodeFlareTestCtx) testUpdateCodeFlareComponentDisabled() error { + // Test Updating CodeFlare to be disabled + var codeflareDeploymentName string + + if tc.testCtx.testDsc.Spec.Components.CodeFlare.ManagementState == operatorv1.Managed { + appDeployments, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).List(tc.testCtx.ctx, metav1.ListOptions{ + LabelSelector: labels.ODH.Component(componentsv1.CodeFlareComponentName), + }) + if err != nil { + return fmt.Errorf("error getting enabled component %v", componentsv1.CodeFlareComponentName) + } + if len(appDeployments.Items) > 0 { + codeflareDeploymentName = appDeployments.Items[0].Name + if appDeployments.Items[0].Status.ReadyReplicas == 0 { + return fmt.Errorf("error getting enabled component: %s its deployment 'ReadyReplicas'", codeflareDeploymentName) + } + } + } else { + return errors.New("CodeFlare spec should be in 'enabled: true' state in order to perform test") + } + + // Disable component CodeFlare + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + // refresh DSC instance in case it was updated during the reconcile + err := tc.testCtx.customClient.Get(tc.testCtx.ctx, types.NamespacedName{Name: tc.testCtx.testDsc.Name}, tc.testCtx.testDsc) + if err != nil { + return fmt.Errorf("error getting resource %w", err) + } + // Disable the Component + tc.testCtx.testDsc.Spec.Components.CodeFlare.ManagementState = operatorv1.Removed + + // Try to update + err = tc.testCtx.customClient.Update(tc.testCtx.ctx, tc.testCtx.testDsc) + // Return err itself here (not wrapped inside another error) + // so that RetryOnConflict can identify it correctly. + if err != nil { + return fmt.Errorf("error updating component from 'enabled: true' to 'enabled: false': %w", err) + } + + return nil + }) + if err != nil { + return fmt.Errorf("error after retry %w", err) + } + + if err = tc.testCtx.wait(func(ctx context.Context) (bool, error) { + // Verify CodeFlare CR is deleted + CodeFlare := &componentsv1.CodeFlare{} + err = tc.testCtx.customClient.Get(ctx, client.ObjectKey{Name: tc.testCodeFlareInstance.Name}, CodeFlare) + return k8serr.IsNotFound(err), nil + }); err != nil { + return fmt.Errorf("component CodeFlare is disabled, should not get the CodeFlare CR %v", tc.testCodeFlareInstance.Name) + } + + // Sleep for 20 seconds to allow the operator to reconcile + time.Sleep(2 * generalRetryInterval) + _, err = tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).Get(tc.testCtx.ctx, codeflareDeploymentName, metav1.GetOptions{}) + if err != nil { + if k8serr.IsNotFound(err) { + return nil // correct result: should not find deployment after we disable it already + } + return fmt.Errorf("error getting component resource after reconcile: %w", err) + } + return fmt.Errorf("component %v is disabled, should not get its deployment %v from NS %v any more", + componentsv1.CodeFlareKind, + codeflareDeploymentName, + tc.testCtx.applicationsNamespace) +} diff --git a/tests/e2e/controller_test.go b/tests/e2e/controller_test.go index 16539715d21..633ad250ff7 100644 --- a/tests/e2e/controller_test.go +++ b/tests/e2e/controller_test.go @@ -46,6 +46,7 @@ var ( "kueue": kueueTestSuite, "trainingoperator": trainingoperatorTestSuite, "datasciencepipelienes": dataSciencePipelinesTestSuite, + "codeflare": codeflareTestSuite, } ) diff --git a/tests/e2e/helper_test.go b/tests/e2e/helper_test.go index 6d4337f7eda..454a56d9974 100644 --- a/tests/e2e/helper_test.go +++ b/tests/e2e/helper_test.go @@ -27,7 +27,6 @@ import ( dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" infrav1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/infrastructure/v1" componentsold "github.com/opendatahub-io/opendatahub-operator/v2/components" - "github.com/opendatahub-io/opendatahub-operator/v2/components/codeflare" "github.com/opendatahub-io/opendatahub-operator/v2/components/kserve" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" "github.com/opendatahub-io/opendatahub-operator/v2/components/workbenches" @@ -143,9 +142,9 @@ func setupDSCInstance(name string) *dscv1.DataScienceCluster { ManagementState: operatorv1.Removed, }, }, - CodeFlare: codeflare.CodeFlare{ - Component: componentsold.Component{ - ManagementState: operatorv1.Removed, + CodeFlare: componentsv1.DSCCodeFlare{ + ManagementSpec: components.ManagementSpec{ + ManagementState: operatorv1.Managed, }, }, Ray: componentsv1.DSCRay{