From a1f0e624d7e1a0b7c6f98e57da9c904f4d80f4df Mon Sep 17 00:00:00 2001 From: Wen Zhou Date: Thu, 31 Oct 2024 08:22:27 +0100 Subject: [PATCH] feat: add support for Ray (#1315) * feat: add support for Ray - cleanup old component files: ray and dashboard - rename function to be more generic - status update on component for installedcomponent - add e2e test - move ireturn into golangci config Signed-off-by: Wen Zhou --------- Signed-off-by: Wen Zhou --- .golangci.yml | 1 + apis/components/v1/dashboard_types.go | 3 +- apis/components/v1/ray_types.go | 54 ++-- apis/components/v1/zz_generated.deepcopy.go | 36 ++- .../v1/datasciencecluster_types.go | 3 +- .../components.opendatahub.io_dashboards.yaml | 20 +- .../components.opendatahub.io_rays.yaml | 44 ++- ...atahub-operator.clusterserviceversion.yaml | 6 +- components/dashboard/dashboard.go | 244 --------------- components/dashboard/zz_generated.deepcopy.go | 40 --- components/ray/ray.go | 115 -------- components/ray/zz_generated.deepcopy.go | 39 --- .../bases/components.opendatahub.io_rays.yaml | 44 ++- ...atahub-operator.clusterserviceversion.yaml | 4 - controllers/components/dashboard/dashboard.go | 2 +- .../dashboard/dashboard_controller.go | 4 +- controllers/components/ray/ray.go | 52 ++++ controllers/components/ray/ray_controller.go | 91 +++--- .../components/ray/ray_controller_actions.go | 64 ++++ .../datasciencecluster_controller.go | 48 ++- .../datasciencecluster/kubebuilder_rbac.go | 14 +- controllers/webhook/webhook_suite_test.go | 5 +- docs/api-overview.md | 67 +++-- main.go | 10 +- pkg/controller/handlers/handlers.go | 1 - pkg/upgrade/upgrade.go | 5 +- tests/e2e/controller_test.go | 1 + tests/e2e/creation_test.go | 2 +- tests/e2e/dashboard_test.go | 13 +- tests/e2e/helper_test.go | 8 +- tests/e2e/odh_manager_test.go | 8 +- tests/e2e/ray_test.go | 278 ++++++++++++++++++ .../features/features_suite_int_test.go | 1 - 33 files changed, 727 insertions(+), 600 deletions(-) delete mode 100644 components/dashboard/dashboard.go delete mode 100644 components/dashboard/zz_generated.deepcopy.go delete mode 100644 components/ray/ray.go delete mode 100644 components/ray/zz_generated.deepcopy.go create mode 100644 controllers/components/ray/ray.go create mode 100644 controllers/components/ray/ray_controller_actions.go create mode 100644 tests/e2e/ray_test.go diff --git a/.golangci.yml b/.golangci.yml index af1ffddee58..aa86e9ab499 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -58,6 +58,7 @@ linters-settings: - stdlib # also allow generics - generic + - EventHandler # for ToOwner revive: rules: - name: dot-imports diff --git a/apis/components/v1/dashboard_types.go b/apis/components/v1/dashboard_types.go index db3ea9c04a9..a7ff3ef3f53 100644 --- a/apis/components/v1/dashboard_types.go +++ b/apis/components/v1/dashboard_types.go @@ -22,8 +22,9 @@ import ( ) const ( + DashboardComponentName = "dashboard" // DashboardInstanceName the name of the Dashboard instance singleton. - // It must match what is set in the XValidation below. + // value should match whats set in the XValidation below DashboardInstanceName = "default-dashboard" DashboardKind = "Dashboard" ) diff --git a/apis/components/v1/ray_types.go b/apis/components/v1/ray_types.go index 3de67e938c9..66d3e737809 100644 --- a/apis/components/v1/ray_types.go +++ b/apis/components/v1/ray_types.go @@ -21,26 +21,21 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! -// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. - -// RaySpec defines the desired state of Ray -type RaySpec struct { - // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster - // Important: Run "make" to regenerate code after modifying this file - - // Foo is an example field of Ray. Edit ray_types.go to remove/update - Foo string `json:"foo,omitempty"` -} +const ( + RayComponentName = "ray" + // value should match whats set in the XValidation below + RayInstanceName = "default-ray" + RayKind = "Ray" +) -// RayStatus defines the observed state of Ray -type RayStatus struct { - components.Status `json:",inline"` -} +// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:resource:scope=Cluster +// +kubebuilder:validation:XValidation:rule="self.metadata.name == 'default-ray'",message="Ray name must be default-ray" +// +kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.conditions[?(@.type=="Ready")].status`,description="Ready" +// +kubebuilder:printcolumn:name="Reason",type=string,JSONPath=`.status.conditions[?(@.type=="Ready")].reason`,description="Reason" // Ray is the Schema for the rays API type Ray struct { @@ -51,16 +46,21 @@ type Ray struct { Status RayStatus `json:"status,omitempty"` } -func (c *Ray) GetDevFlags() *components.DevFlags { - return nil +// RaySpec defines the desired state of Ray +type RaySpec struct { + RayCommonSpec `json:",inline"` } -func (c *Ray) GetStatus() *components.Status { - return &c.Status.Status +type RayCommonSpec struct { + components.DevFlagsSpec `json:",inline"` } -// +kubebuilder:object:root=true +// RayStatus defines the observed state of Ray +type RayStatus struct { + components.Status `json:",inline"` +} +// +kubebuilder:object:root=true // RayList contains a list of Ray type RayList struct { metav1.TypeMeta `json:",inline"` @@ -71,3 +71,17 @@ type RayList struct { func init() { SchemeBuilder.Register(&Ray{}, &RayList{}) } + +func (c *Ray) GetDevFlags() *components.DevFlags { + return c.Spec.DevFlags +} +func (c *Ray) GetStatus() *components.Status { + return &c.Status.Status +} + +// DSCRay contains all the configuration exposed in DSC instance for Ray component +type DSCRay struct { + components.ManagementSpec `json:",inline"` + // configuration fields common across components + RayCommonSpec `json:",inline"` +} diff --git a/apis/components/v1/zz_generated.deepcopy.go b/apis/components/v1/zz_generated.deepcopy.go index 456a5d1ad1f..f9264ae5659 100644 --- a/apis/components/v1/zz_generated.deepcopy.go +++ b/apis/components/v1/zz_generated.deepcopy.go @@ -131,6 +131,23 @@ func (in *DSCDashboard) DeepCopy() *DSCDashboard { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DSCRay) DeepCopyInto(out *DSCRay) { + *out = *in + out.ManagementSpec = in.ManagementSpec + in.RayCommonSpec.DeepCopyInto(&out.RayCommonSpec) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DSCRay. +func (in *DSCRay) DeepCopy() *DSCRay { + if in == nil { + return nil + } + out := new(DSCRay) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Dashboard) DeepCopyInto(out *Dashboard) { *out = *in @@ -693,7 +710,7 @@ func (in *Ray) DeepCopyInto(out *Ray) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.Spec = in.Spec + in.Spec.DeepCopyInto(&out.Spec) in.Status.DeepCopyInto(&out.Status) } @@ -715,6 +732,22 @@ func (in *Ray) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RayCommonSpec) DeepCopyInto(out *RayCommonSpec) { + *out = *in + in.DevFlagsSpec.DeepCopyInto(&out.DevFlagsSpec) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RayCommonSpec. +func (in *RayCommonSpec) DeepCopy() *RayCommonSpec { + if in == nil { + return nil + } + out := new(RayCommonSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RayList) DeepCopyInto(out *RayList) { *out = *in @@ -750,6 +783,7 @@ func (in *RayList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RaySpec) DeepCopyInto(out *RaySpec) { *out = *in + in.RayCommonSpec.DeepCopyInto(&out.RayCommonSpec) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RaySpec. diff --git a/apis/datasciencecluster/v1/datasciencecluster_types.go b/apis/datasciencecluster/v1/datasciencecluster_types.go index b4a799e8586..0cac0fd8a9a 100644 --- a/apis/datasciencecluster/v1/datasciencecluster_types.go +++ b/apis/datasciencecluster/v1/datasciencecluster_types.go @@ -32,7 +32,6 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/components/kueue" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelregistry" - "github.com/opendatahub-io/opendatahub-operator/v2/components/ray" "github.com/opendatahub-io/opendatahub-operator/v2/components/trainingoperator" "github.com/opendatahub-io/opendatahub-operator/v2/components/trustyai" "github.com/opendatahub-io/opendatahub-operator/v2/components/workbenches" @@ -75,7 +74,7 @@ type Components struct { CodeFlare codeflare.CodeFlare `json:"codeflare,omitempty"` // Ray component configuration. - Ray ray.Ray `json:"ray,omitempty"` + Ray componentsv1.DSCRay `json:"ray,omitempty"` // TrustyAI component configuration. TrustyAI trustyai.TrustyAI `json:"trustyai,omitempty"` diff --git a/bundle/manifests/components.opendatahub.io_dashboards.yaml b/bundle/manifests/components.opendatahub.io_dashboards.yaml index 3893f3e0f53..1c47b945179 100644 --- a/bundle/manifests/components.opendatahub.io_dashboards.yaml +++ b/bundle/manifests/components.opendatahub.io_dashboards.yaml @@ -23,6 +23,10 @@ spec: jsonPath: .status.conditions[?(@.type=="Ready")].reason name: Reason type: string + - description: URL + jsonPath: .status.url + name: URL + type: string name: v1 schema: openAPIV3Schema: @@ -74,20 +78,6 @@ spec: type: object type: array type: object - managementState: - description: |- - Set to one of the following values: - - - "Managed" : the operator is actively managing the component and trying to keep it active. - It will only upgrade the component if it is safe to do so - - - "Removed" : the operator is actively managing the component and will not install it, - or if it is installed, the operator will try to remove it - enum: - - Managed - - Removed - pattern: ^(Managed|Unmanaged|Force|Removed)$ - type: string type: object status: description: DashboardStatus defines the observed state of Dashboard @@ -153,6 +143,8 @@ spec: type: integer phase: type: string + url: + type: string type: object type: object x-kubernetes-validations: diff --git a/bundle/manifests/components.opendatahub.io_rays.yaml b/bundle/manifests/components.opendatahub.io_rays.yaml index 1b4494ad87b..ca18afe0f09 100644 --- a/bundle/manifests/components.opendatahub.io_rays.yaml +++ b/bundle/manifests/components.opendatahub.io_rays.yaml @@ -14,7 +14,16 @@ spec: singular: ray scope: Cluster versions: - - name: v1 + - additionalPrinterColumns: + - description: Ready + jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + - description: Reason + jsonPath: .status.conditions[?(@.type=="Ready")].reason + name: Reason + type: string + name: v1 schema: openAPIV3Schema: description: Ray is the Schema for the rays API @@ -39,10 +48,32 @@ spec: spec: description: RaySpec defines the desired state of Ray properties: - foo: - description: Foo is an example field of Ray. Edit ray_types.go to - remove/update - type: string + devFlags: + description: Add developer fields + properties: + manifests: + description: List of custom manifests for the given component + items: + properties: + contextDir: + default: manifests + description: contextDir is the relative path to the folder + containing manifests in a repository, default value "manifests" + type: string + sourcePath: + default: "" + description: 'sourcePath is the subpath within contextDir + where kustomize builds start. Examples include any sub-folder + or path: `base`, `overlays/dev`, `default`, `odh` etc.' + type: string + uri: + default: "" + description: uri is the URI point to a git repo with tag/branch. + e.g. https://github.com/org/repo/tarball/ + type: string + type: object + type: array + type: object type: object status: description: RayStatus defines the observed state of Ray @@ -110,6 +141,9 @@ spec: type: string type: object type: object + x-kubernetes-validations: + - message: Ray name must be default-ray + rule: self.metadata.name == 'default-ray' served: true storage: true subresources: diff --git a/bundle/manifests/opendatahub-operator.clusterserviceversion.yaml b/bundle/manifests/opendatahub-operator.clusterserviceversion.yaml index b3be2072e9f..f27faace3f1 100644 --- a/bundle/manifests/opendatahub-operator.clusterserviceversion.yaml +++ b/bundle/manifests/opendatahub-operator.clusterserviceversion.yaml @@ -103,7 +103,7 @@ metadata: categories: AI/Machine Learning, Big Data certified: "False" containerImage: quay.io/opendatahub/opendatahub-operator:v2.19.0 - createdAt: "2024-10-24T09:24:00Z" + createdAt: "2024-10-30T14:18:10Z" olm.skipRange: '>=1.0.0 <2.19.0' operators.operatorframework.io/builder: operator-sdk-v1.31.0 operators.operatorframework.io/internal-objects: '["featuretrackers.features.opendatahub.io", @@ -129,10 +129,6 @@ spec: displayName: Dashboard kind: Dashboard name: dashboards.components.opendatahub.io - specDescriptors: - - description: Add developer fields - displayName: Dev Flags - path: devFlags version: v1 - description: DataScienceCluster is the Schema for the datascienceclusters API. displayName: Data Science Cluster diff --git a/components/dashboard/dashboard.go b/components/dashboard/dashboard.go deleted file mode 100644 index 079f63c7559..00000000000 --- a/components/dashboard/dashboard.go +++ /dev/null @@ -1,244 +0,0 @@ -// // Package dashboard provides utility functions to config Open Data Hub Dashboard: A web dashboard that displays - -package dashboard - -// -// import ( -// "context" -// "errors" -// "fmt" -// "path/filepath" -// -// "github.com/go-logr/logr" -// operatorv1 "github.com/openshift/api/operator/v1" -// corev1 "k8s.io/api/core/v1" -// k8serr "k8s.io/apimachinery/pkg/api/errors" -// metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -// "sigs.k8s.io/controller-runtime/pkg/client" -// logf "sigs.k8s.io/controller-runtime/pkg/log" -// -// dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" -// "github.com/opendatahub-io/opendatahub-operator/v2/components" -// "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" -// "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" -//) -// -// var ( -// ComponentNameUpstream = "dashboard" -// PathUpstream = deploy.DefaultManifestPath + "/" + ComponentNameUpstream + "/odh" -// -// ComponentNameDownstream = "rhods-dashboard" -// PathDownstream = deploy.DefaultManifestPath + "/" + ComponentNameUpstream + "/rhoai" -// PathSelfDownstream = PathDownstream + "/onprem" -// PathManagedDownstream = PathDownstream + "/addon" -// OverridePath = "" -// DefaultPath = "" -//) -// -//// Verifies that Dashboard implements ComponentInterface. -// var _ components.ComponentInterface = (*Dashboard)(nil) -// -//// Dashboard struct holds the configuration for the Dashboard component. -//// +kubebuilder:object:generate=true -// type Dashboard struct { -// components.Component `json:""` -//} -// -// func (d *Dashboard) Init(ctx context.Context, platform cluster.Platform) error { -// log := logf.FromContext(ctx).WithName(ComponentNameUpstream) -// -// imageParamMap := map[string]string{ -// "odh-dashboard-image": "RELATED_IMAGE_ODH_DASHBOARD_IMAGE", -// } -// DefaultPath = map[cluster.Platform]string{ -// cluster.SelfManagedRhods: PathDownstream + "/onprem", -// cluster.ManagedRhods: PathDownstream + "/addon", -// cluster.OpenDataHub: PathUpstream, -// cluster.Unknown: PathUpstream, -// }[platform] -// -// if err := deploy.ApplyParams(DefaultPath, imageParamMap); err != nil { -// log.Error(err, "failed to update image", "path", DefaultPath) -// } -// -// return nil -//} -// -// func (d *Dashboard) OverrideManifests(ctx context.Context, platform cluster.Platform) error { -// // If devflags are set, update default manifests path -// if len(d.DevFlags.Manifests) != 0 { -// manifestConfig := d.DevFlags.Manifests[0] -// if err := deploy.DownloadManifests(ctx, ComponentNameUpstream, manifestConfig); err != nil { -// return err -// } -// if manifestConfig.SourcePath != "" { -// OverridePath = filepath.Join(deploy.DefaultManifestPath, ComponentNameUpstream, manifestConfig.SourcePath) -// } -// } -// return nil -//} -// -// func (d *Dashboard) GetComponentName() string { -// return ComponentNameUpstream -//} -// -// func (d *Dashboard) ReconcileComponent(ctx context.Context, -// cli client.Client, -// l logr.Logger, -// owner metav1.Object, -// dscispec *dsciv1.DSCInitializationSpec, -// platform cluster.Platform, -// currentComponentExist bool, -// ) error { -// entryPath := DefaultPath -// enabled := d.GetManagementState() == operatorv1.Managed -// monitoringEnabled := dscispec.Monitoring.ManagementState == operatorv1.Managed -// -// if enabled { -// // 1. cleanup OAuth client related secret and CR if dashboard is in 'installed false' status -// if err := d.cleanOauthClient(ctx, cli, dscispec, currentComponentExist, l); err != nil { -// return err -// } -// if d.DevFlags != nil && len(d.DevFlags.Manifests) != 0 { -// // Download manifests and update paths -// if err := d.OverrideManifests(ctx, platform); err != nil { -// return err -// } -// if OverridePath != "" { -// entryPath = OverridePath -// } -// } -// -// // 2. platform specific RBAC -// if platform == cluster.OpenDataHub || platform == "" { -// if err := cluster.UpdatePodSecurityRolebinding(ctx, cli, dscispec.ApplicationsNamespace, "odh-dashboard"); err != nil { -// return err -// } -// } else { -// if err := cluster.UpdatePodSecurityRolebinding(ctx, cli, dscispec.ApplicationsNamespace, "rhods-dashboard"); err != nil { -// return err -// } -// } -// -// // 3. Append or Update variable for component to consume -// extraParamsMap, err := updateKustomizeVariable(ctx, cli, platform, dscispec) -// if err != nil { -// return errors.New("failed to set variable for extraParamsMap") -// } -// -// // 4. update params.env regardless devFlags is provided of not -// if err := deploy.ApplyParams(entryPath, nil, extraParamsMap); err != nil { -// return fmt.Errorf("failed to update params.env from %s : %w", entryPath, err) -// } -// } -// -// // common: Deploy odh-dashboard manifests -// // TODO: check if we can have the same component name odh-dashboard for both, or still keep rhods-dashboard for RHOAI -// switch platform { -// case cluster.SelfManagedRhods, cluster.ManagedRhods: -// // anaconda -// if err := cluster.CreateSecret(ctx, cli, "anaconda-ce-access", dscispec.ApplicationsNamespace); err != nil { -// return fmt.Errorf("failed to create access-secret for anaconda: %w", err) -// } -// // Deploy RHOAI manifests -// if err := deploy.DeployManifestsFromPath(ctx, cli, owner, entryPath, dscispec.ApplicationsNamespace, ComponentNameDownstream, enabled); err != nil { -// return fmt.Errorf("failed to apply manifests from %s: %w", PathDownstream, err) -// } -// l.Info("apply manifests done") -// -// if enabled { -// if err := cluster.WaitForDeploymentAvailable(ctx, cli, ComponentNameDownstream, dscispec.ApplicationsNamespace, 20, 3); err != nil { -// return fmt.Errorf("deployment for %s is not ready to server: %w", ComponentNameDownstream, err) -// } -// } -// -// // CloudService Monitoring handling -// if platform == cluster.ManagedRhods { -// if err := d.UpdatePrometheusConfig(cli, l, enabled && monitoringEnabled, ComponentNameDownstream); err != nil { -// return err -// } -// if err := deploy.DeployManifestsFromPath(ctx, cli, owner, -// filepath.Join(deploy.DefaultManifestPath, "monitoring", "prometheus", "apps"), -// dscispec.Monitoring.Namespace, -// "prometheus", true); err != nil { -// return err -// } -// l.Info("updating SRE monitoring done") -// } -// return nil -// -// default: -// // Deploy ODH manifests -// if err := deploy.DeployManifestsFromPath(ctx, cli, owner, entryPath, dscispec.ApplicationsNamespace, ComponentNameUpstream, enabled); err != nil { -// return err -// } -// l.Info("apply manifests done") -// if enabled { -// if err := cluster.WaitForDeploymentAvailable(ctx, cli, ComponentNameUpstream, dscispec.ApplicationsNamespace, 20, 3); err != nil { -// return fmt.Errorf("deployment for %s is not ready to server: %w", ComponentNameUpstream, err) -// } -// } -// -// return nil -// } -//} -// -// func updateKustomizeVariable(ctx context.Context, cli client.Client, platform cluster.Platform, dscispec *dsciv1.DSCInitializationSpec) (map[string]string, error) { -// adminGroups := map[cluster.Platform]string{ -// cluster.SelfManagedRhods: "rhods-admins", -// cluster.ManagedRhods: "dedicated-admins", -// cluster.OpenDataHub: "odh-admins", -// cluster.Unknown: "odh-admins", -// }[platform] -// -// sectionTitle := map[cluster.Platform]string{ -// cluster.SelfManagedRhods: "OpenShift Self Managed Services", -// cluster.ManagedRhods: "OpenShift Managed Services", -// cluster.OpenDataHub: "OpenShift Open Data Hub", -// cluster.Unknown: "OpenShift Open Data Hub", -// }[platform] -// -// consoleLinkDomain, err := cluster.GetDomain(ctx, cli) -// if err != nil { -// return nil, fmt.Errorf("error getting console route URL %s : %w", consoleLinkDomain, err) -// } -// consoleURL := map[cluster.Platform]string{ -// cluster.SelfManagedRhods: "https://rhods-dashboard-" + dscispec.ApplicationsNamespace + "." + consoleLinkDomain, -// cluster.ManagedRhods: "https://rhods-dashboard-" + dscispec.ApplicationsNamespace + "." + consoleLinkDomain, -// cluster.OpenDataHub: "https://odh-dashboard-" + dscispec.ApplicationsNamespace + "." + consoleLinkDomain, -// cluster.Unknown: "https://odh-dashboard-" + dscispec.ApplicationsNamespace + "." + consoleLinkDomain, -// }[platform] -// -// return map[string]string{ -// "admin_groups": adminGroups, -// "dashboard-url": consoleURL, -// "section-title": sectionTitle, -// }, nil -//} -// -// func (d *Dashboard) cleanOauthClient(ctx context.Context, cli client.Client, dscispec *dsciv1.DSCInitializationSpec, currentComponentExist bool, l logr.Logger) error { -// // Remove previous oauth-client secrets -// // Check if component is going from state of `Not Installed --> Installed` -// // Assumption: Component is currently set to enabled -// name := "dashboard-oauth-client" -// if !currentComponentExist { -// l.Info("Cleanup any left secret") -// // Delete client secrets from previous installation -// oauthClientSecret := &corev1.Secret{} -// err := cli.Get(ctx, client.ObjectKey{ -// Namespace: dscispec.ApplicationsNamespace, -// Name: name, -// }, oauthClientSecret) -// if err != nil { -// if !k8serr.IsNotFound(err) { -// return fmt.Errorf("error getting secret %s: %w", name, err) -// } -// } else { -// if err := cli.Delete(ctx, oauthClientSecret); err != nil { -// return fmt.Errorf("error deleting secret %s: %w", name, err) -// } -// l.Info("successfully deleted secret", "secret", name) -// } -// } -// return nil -//} diff --git a/components/dashboard/zz_generated.deepcopy.go b/components/dashboard/zz_generated.deepcopy.go deleted file mode 100644 index 69b406105a1..00000000000 --- a/components/dashboard/zz_generated.deepcopy.go +++ /dev/null @@ -1,40 +0,0 @@ -//go:build !ignore_autogenerated - -/* -Copyright 2023. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// Code generated by controller-gen. DO NOT EDIT. - -package dashboard - -// -//import () -// -//// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -//func (in *Dashboard) DeepCopyInto(out *Dashboard) { -// *out = *in -// in.Component.DeepCopyInto(&out.Component) -//} -// -//// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Dashboard. -//func (in *Dashboard) DeepCopy() *Dashboard { -// if in == nil { -// return nil -// } -// out := new(Dashboard) -// in.DeepCopyInto(out) -// return out -//} diff --git a/components/ray/ray.go b/components/ray/ray.go deleted file mode 100644 index c8fa30edbd4..00000000000 --- a/components/ray/ray.go +++ /dev/null @@ -1,115 +0,0 @@ -// Package ray provides utility functions to config Ray as part of the stack -// which makes managing distributed compute infrastructure in the cloud easy and intuitive for Data Scientists -// +groupName=datasciencecluster.opendatahub.io -package ray - -import ( - "context" - "fmt" - "path/filepath" - - "github.com/go-logr/logr" - operatorv1 "github.com/openshift/api/operator/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/controller-runtime/pkg/client" - logf "sigs.k8s.io/controller-runtime/pkg/log" - - dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" - "github.com/opendatahub-io/opendatahub-operator/v2/components" - "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" - "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" -) - -var ( - ComponentName = "ray" - RayPath = deploy.DefaultManifestPath + "/" + ComponentName + "/openshift" -) - -// Verifies that Ray implements ComponentInterface. -var _ components.ComponentInterface = (*Ray)(nil) - -// Ray struct holds the configuration for the Ray component. -// +kubebuilder:object:generate=true -type Ray struct { - components.Component `json:""` -} - -func (r *Ray) Init(ctx context.Context, _ cluster.Platform) error { - log := logf.FromContext(ctx).WithName(ComponentName) - - var imageParamMap = map[string]string{ - "odh-kuberay-operator-controller-image": "RELATED_IMAGE_ODH_KUBERAY_OPERATOR_CONTROLLER_IMAGE", - } - if err := deploy.ApplyParams(RayPath, imageParamMap); err != nil { - log.Error(err, "failed to update image", "path", RayPath) - } - - return nil -} - -func (r *Ray) OverrideManifests(ctx context.Context, _ cluster.Platform) error { - // If devflags are set, update default manifests path - if len(r.DevFlags.Manifests) != 0 { - manifestConfig := r.DevFlags.Manifests[0] - if err := deploy.DownloadManifests(ctx, ComponentName, manifestConfig); err != nil { - return err - } - // If overlay is defined, update paths - defaultKustomizePath := "openshift" - if manifestConfig.SourcePath != "" { - defaultKustomizePath = manifestConfig.SourcePath - } - RayPath = filepath.Join(deploy.DefaultManifestPath, ComponentName, defaultKustomizePath) - } - - return nil -} - -func (r *Ray) GetComponentName() string { - return ComponentName -} - -func (r *Ray) ReconcileComponent(ctx context.Context, cli client.Client, l logr.Logger, - owner metav1.Object, dscispec *dsciv1.DSCInitializationSpec, platform cluster.Platform, _ bool) error { - enabled := r.GetManagementState() == operatorv1.Managed - monitoringEnabled := dscispec.Monitoring.ManagementState == operatorv1.Managed - - if enabled { - if r.DevFlags != nil { - // Download manifests and update paths - if err := r.OverrideManifests(ctx, platform); err != nil { - return err - } - } - if err := deploy.ApplyParams(RayPath, nil, map[string]string{"namespace": dscispec.ApplicationsNamespace}); err != nil { - return fmt.Errorf("failed to update namespace from %s : %w", RayPath, err) - } - } - // Deploy Ray Operator - if err := deploy.DeployManifestsFromPath(ctx, cli, owner, RayPath, dscispec.ApplicationsNamespace, ComponentName, enabled); err != nil { - return fmt.Errorf("failed to apply manifets from %s : %w", RayPath, err) - } - l.Info("apply manifests done") - - if enabled { - if err := cluster.WaitForDeploymentAvailable(ctx, cli, ComponentName, dscispec.ApplicationsNamespace, 20, 2); err != nil { - return fmt.Errorf("deployment for %s is not ready to server: %w", ComponentName, err) - } - } - - // CloudService Monitoring handling - if platform == cluster.ManagedRhods { - if err := r.UpdatePrometheusConfig(cli, l, enabled && monitoringEnabled, ComponentName); err != nil { - return err - } - if err := deploy.DeployManifestsFromPath(ctx, cli, owner, - filepath.Join(deploy.DefaultManifestPath, "monitoring", "prometheus", "apps"), - dscispec.Monitoring.Namespace, - "prometheus", true); err != nil { - return err - } - l.Info("updating SRE monitoring done") - } - - return nil -} diff --git a/components/ray/zz_generated.deepcopy.go b/components/ray/zz_generated.deepcopy.go deleted file mode 100644 index f7688cd81a5..00000000000 --- a/components/ray/zz_generated.deepcopy.go +++ /dev/null @@ -1,39 +0,0 @@ -//go:build !ignore_autogenerated - -/* -Copyright 2023. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// Code generated by controller-gen. DO NOT EDIT. - -package ray - -import () - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *Ray) DeepCopyInto(out *Ray) { - *out = *in - in.Component.DeepCopyInto(&out.Component) -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Ray. -func (in *Ray) DeepCopy() *Ray { - if in == nil { - return nil - } - out := new(Ray) - in.DeepCopyInto(out) - return out -} diff --git a/config/crd/bases/components.opendatahub.io_rays.yaml b/config/crd/bases/components.opendatahub.io_rays.yaml index ff3ce577cc3..0928d74de8c 100644 --- a/config/crd/bases/components.opendatahub.io_rays.yaml +++ b/config/crd/bases/components.opendatahub.io_rays.yaml @@ -14,7 +14,16 @@ spec: singular: ray scope: Cluster versions: - - name: v1 + - additionalPrinterColumns: + - description: Ready + jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + - description: Reason + jsonPath: .status.conditions[?(@.type=="Ready")].reason + name: Reason + type: string + name: v1 schema: openAPIV3Schema: description: Ray is the Schema for the rays API @@ -39,10 +48,32 @@ spec: spec: description: RaySpec defines the desired state of Ray properties: - foo: - description: Foo is an example field of Ray. Edit ray_types.go to - remove/update - type: string + devFlags: + description: Add developer fields + properties: + manifests: + description: List of custom manifests for the given component + items: + properties: + contextDir: + default: manifests + description: contextDir is the relative path to the folder + containing manifests in a repository, default value "manifests" + type: string + sourcePath: + default: "" + description: 'sourcePath is the subpath within contextDir + where kustomize builds start. Examples include any sub-folder + or path: `base`, `overlays/dev`, `default`, `odh` etc.' + type: string + uri: + default: "" + description: uri is the URI point to a git repo with tag/branch. + e.g. https://github.com/org/repo/tarball/ + type: string + type: object + type: array + type: object type: object status: description: RayStatus defines the observed state of Ray @@ -110,6 +141,9 @@ spec: type: string type: object type: object + x-kubernetes-validations: + - message: Ray name must be default-ray + rule: self.metadata.name == 'default-ray' served: true storage: true subresources: diff --git a/config/manifests/bases/opendatahub-operator.clusterserviceversion.yaml b/config/manifests/bases/opendatahub-operator.clusterserviceversion.yaml index 194052f9736..d3e84983875 100644 --- a/config/manifests/bases/opendatahub-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/opendatahub-operator.clusterserviceversion.yaml @@ -31,10 +31,6 @@ spec: displayName: Dashboard kind: Dashboard name: dashboards.components.opendatahub.io - specDescriptors: - - description: Add developer fields - displayName: Dev Flags - path: devFlags version: v1 - description: DataScienceCluster is the Schema for the datascienceclusters API. displayName: Data Science Cluster diff --git a/controllers/components/dashboard/dashboard.go b/controllers/components/dashboard/dashboard.go index fc1fec16d8e..6293e7d3908 100644 --- a/controllers/components/dashboard/dashboard.go +++ b/controllers/components/dashboard/dashboard.go @@ -23,7 +23,7 @@ func Init(platform cluster.Platform) error { return nil } -func GetDashboard(dsc *dscv1.DataScienceCluster) *componentsv1.Dashboard { +func GetComponentCR(dsc *dscv1.DataScienceCluster) *componentsv1.Dashboard { dashboardAnnotations := make(map[string]string) switch dsc.Spec.Components.Dashboard.ManagementState { diff --git a/controllers/components/dashboard/dashboard_controller.go b/controllers/components/dashboard/dashboard_controller.go index 1b65503dd45..c506d97fb20 100644 --- a/controllers/components/dashboard/dashboard_controller.go +++ b/controllers/components/dashboard/dashboard_controller.go @@ -38,8 +38,8 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels" ) -// NewReconciler creates a ComponentReconciler for the Dashboard API. -func NewReconciler(ctx context.Context, mgr ctrl.Manager) error { +// NewComponentReconciler creates a ComponentReconciler for the Dashboard API. +func NewComponentReconciler(ctx context.Context, mgr ctrl.Manager) error { componentName := computeComponentName() _, err := reconciler.ComponentReconcilerFor[*componentsv1.Dashboard](mgr, componentsv1.DashboardInstanceName, &componentsv1.Dashboard{}). diff --git a/controllers/components/ray/ray.go b/controllers/components/ray/ray.go new file mode 100644 index 00000000000..d7b2dab9a5e --- /dev/null +++ b/controllers/components/ray/ray.go @@ -0,0 +1,52 @@ +package ray + +import ( + "fmt" + + operatorv1 "github.com/openshift/api/operator/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + componentsv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1" + dscv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/datasciencecluster/v1" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" + odhdeploy "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/annotations" +) + +// for DSC to get compoment Ray's CR. +func GetComponentCR(dsc *dscv1.DataScienceCluster) *componentsv1.Ray { + rayAnnotations := make(map[string]string) + switch dsc.Spec.Components.Ray.ManagementState { + case operatorv1.Managed, operatorv1.Removed: + rayAnnotations[annotations.ManagementStateAnnotation] = string(dsc.Spec.Components.Ray.ManagementState) + default: // Force and Unmanaged case for unknown values, we do not support these yet + rayAnnotations[annotations.ManagementStateAnnotation] = "Unknown" + } + + return &componentsv1.Ray{ + TypeMeta: metav1.TypeMeta{ + Kind: componentsv1.RayKind, + APIVersion: componentsv1.GroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: componentsv1.RayInstanceName, + Annotations: rayAnnotations, + }, + Spec: componentsv1.RaySpec{ + RayCommonSpec: dsc.Spec.Components.Ray.RayCommonSpec, + }, + } +} + +// Init for set images. +func Init(platform cluster.Platform) error { + imageParamMap := map[string]string{ + "odh-kuberay-operator-controller-image": "RELATED_IMAGE_ODH_KUBERAY_OPERATOR_CONTROLLER_IMAGE", + } + + if err := odhdeploy.ApplyParams(DefaultPath, imageParamMap); err != nil { + return fmt.Errorf("failed to update images on path %s: %w", DefaultPath, err) + } + + return nil +} diff --git a/controllers/components/ray/ray_controller.go b/controllers/components/ray/ray_controller.go index c147afc9ae8..f9bb01c215b 100644 --- a/controllers/components/ray/ray_controller.go +++ b/controllers/components/ray/ray_controller.go @@ -19,44 +19,69 @@ package ray import ( "context" - "k8s.io/apimachinery/pkg/runtime" + securityv1 "github.com/openshift/api/security/v1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/builder" componentsv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/updatestatus" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/predicates/resources" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler" + odhdeploy "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels" ) -// RayReconciler reconciles a Ray object. -type RayReconciler struct { - client.Client - Scheme *runtime.Scheme -} +const ( + ComponentName = componentsv1.RayComponentName +) -//+kubebuilder:rbac:groups=components.opendatahub.io,resources=rays,verbs=get;list;watch;create;update;patch;delete -//+kubebuilder:rbac:groups=components.opendatahub.io,resources=rays/status,verbs=get;update;patch -//+kubebuilder:rbac:groups=components.opendatahub.io,resources=rays/finalizers,verbs=update - -// Reconcile is part of the main kubernetes reconciliation loop which aims to -// move the current state of the cluster closer to the desired state. -// TODO(user): Modify the Reconcile function to compare the state specified by -// the Ray object against the actual cluster state, and then -// perform operations to make the cluster state reflect the state specified by -// the user. -// -// For more details, check Reconcile and its Result here: -// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.12.2/pkg/reconcile -func (r *RayReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - _ = log.FromContext(ctx) - - // TODO(user): your logic here - - return ctrl.Result{}, nil -} +var ( + DefaultPath = odhdeploy.DefaultManifestPath + "/" + ComponentName + "/openshift" +) + +func NewComponentReconciler(ctx context.Context, mgr ctrl.Manager) error { + _, err := reconciler.ComponentReconcilerFor[*componentsv1.Ray]( + mgr, + componentsv1.RayInstanceName, + &componentsv1.Ray{}, + ). + // customized Owns() for Component with new predicates + Owns(&corev1.ConfigMap{}). + Owns(&corev1.Secret{}). + Owns(&rbacv1.ClusterRoleBinding{}). + Owns(&rbacv1.ClusterRole{}). + Owns(&rbacv1.Role{}). + Owns(&rbacv1.RoleBinding{}). + Owns(&corev1.ServiceAccount{}). + Owns(&appsv1.Deployment{}, builder.WithPredicates(resources.NewDeploymentPredicate())). + Owns(&securityv1.SecurityContextConstraints{}). + Watches(&extv1.CustomResourceDefinition{}). // call ForLabel() + new predicates + // Add Ray-specific actions + WithAction(initialize). + WithAction(devFlags). + WithAction(render.NewAction( + render.WithCache(true, render.DefaultCachingKeyFn), + render.WithLabel(labels.ODH.Component(ComponentName), "true"), + render.WithLabel(labels.K8SCommon.PartOf, ComponentName), + )). + WithAction(deploy.NewAction( + deploy.WithFieldOwner(componentsv1.RayInstanceName), + deploy.WithLabel(labels.ComponentManagedBy, componentsv1.RayInstanceName), + )). + WithAction(updatestatus.NewAction( + updatestatus.WithSelectorLabel(labels.ComponentManagedBy, componentsv1.RayInstanceName), + )). + Build(ctx) + + if err != nil { + return err // no need customize error, it is done in the caller main + } -// SetupWithManager sets up the controller with the Manager. -func (r *RayReconciler) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - For(&componentsv1.Ray{}). - Complete(r) + return nil } diff --git a/controllers/components/ray/ray_controller_actions.go b/controllers/components/ray/ray_controller_actions.go new file mode 100644 index 00000000000..01baa6e30cf --- /dev/null +++ b/controllers/components/ray/ray_controller_actions.go @@ -0,0 +1,64 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ray + +import ( + "context" + "fmt" + + componentsv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1" + odhtypes "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/types" + odhdeploy "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" +) + +func initialize(ctx context.Context, rr *odhtypes.ReconciliationRequest) error { + rr.Manifests = append(rr.Manifests, odhtypes.ManifestInfo{ + Path: DefaultPath, + ContextDir: "", + SourcePath: "", + }) + if err := odhdeploy.ApplyParams(DefaultPath, nil, map[string]string{"namespace": rr.DSCI.Spec.ApplicationsNamespace}); err != nil { + return fmt.Errorf("failed to update params.env from %s : %w", rr.Manifests[0], err) + } + return nil +} + +func devFlags(ctx context.Context, rr *odhtypes.ReconciliationRequest) error { + ray, ok := rr.Instance.(*componentsv1.Ray) + if !ok { + return fmt.Errorf("resource instance %v is not a componentsv1.Ray)", rr.Instance) + } + + if ray.Spec.DevFlags == nil { + return nil + } + // Implement devflags support logic + // If dev flags are set, update default manifests path + if len(ray.Spec.DevFlags.Manifests) != 0 { + manifestConfig := ray.Spec.DevFlags.Manifests[0] + if err := odhdeploy.DownloadManifests(ctx, ComponentName, manifestConfig); err != nil { + return err + } + if manifestConfig.SourcePath != "" { + rr.Manifests[0].Path = odhdeploy.DefaultManifestPath + rr.Manifests[0].ContextDir = ComponentName + rr.Manifests[0].SourcePath = manifestConfig.SourcePath + } + } + // TODO: Implement devflags logmode logic + return nil +} diff --git a/controllers/datasciencecluster/datasciencecluster_controller.go b/controllers/datasciencecluster/datasciencecluster_controller.go index 9073d8c20ee..775ca99e84c 100644 --- a/controllers/datasciencecluster/datasciencecluster_controller.go +++ b/controllers/datasciencecluster/datasciencecluster_controller.go @@ -54,6 +54,7 @@ import ( dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" "github.com/opendatahub-io/opendatahub-operator/v2/components/datasciencepipelines" dashboardctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/dashboard" + rayctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/ray" "github.com/opendatahub-io/opendatahub-operator/v2/controllers/status" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" odhClient "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/client" @@ -83,7 +84,7 @@ const ( // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. -func (r *DataScienceClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { //nolint:maintidx +func (r *DataScienceClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { //nolint:maintidx,gocyclo log := r.Log log.Info("Reconciling DataScienceCluster resources", "Request.Name", req.Name) @@ -242,7 +243,21 @@ func (r *DataScienceClusterReconciler) Reconcile(ctx context.Context, req ctrl.R var componentErrors *multierror.Error // Deploy Dashboard - if instance, err = r.reconcileDashboardComponent(ctx, instance); err != nil { + + if instance, err = r.ReconcileComponent(ctx, instance, componentsv1.DashboardComponentName, func() (error, bool) { + // Get the Dashboard instance + dashboard := dashboardctrl.GetComponentCR(instance) + // Reconcile component either create CR with setting owner or delete it + return r.apply(ctx, instance, dashboard), instance.Spec.Components.Dashboard.ManagementState == operatorv1.Managed + }); err != nil { + componentErrors = multierror.Append(componentErrors, err) + } + + // Deploy Ray + if instance, err = r.ReconcileComponent(ctx, instance, componentsv1.RayComponentName, func() (error, bool) { + ray := rayctrl.GetComponentCR(instance) + return r.apply(ctx, instance, ray), instance.Spec.Components.Ray.ManagementState == operatorv1.Managed + }); err != nil { componentErrors = multierror.Append(componentErrors, err) } @@ -286,12 +301,18 @@ func (r *DataScienceClusterReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, nil } -// TODO: make it generic for all components. -func (r *DataScienceClusterReconciler) reconcileDashboardComponent(ctx context.Context, instance *dscv1.DataScienceCluster) (*dscv1.DataScienceCluster, error) { - r.Log.Info("Starting reconciliation of Dashboard component") - componentName := dashboardctrl.ComponentName +type ComponentHandler func() (error, bool) - enabled := instance.Spec.Components.Dashboard.ManagementState == operatorv1.Managed +// TODO: make it generic for all components. +func (r *DataScienceClusterReconciler) ReconcileComponent( + ctx context.Context, + instance *dscv1.DataScienceCluster, + componentName string, + componentRec ComponentHandler, +) (*dscv1.DataScienceCluster, error) { + r.Log.Info("Starting reconciliation of component: " + componentName) + + err, enabled := componentRec() _, isExistStatus := instance.Status.InstalledComponents[componentName] if !isExistStatus { @@ -308,14 +329,8 @@ func (r *DataScienceClusterReconciler) reconcileDashboardComponent(ctx context.C } } - // Create the Dashboard instance - dashboard := dashboardctrl.GetDashboard(instance) - - // Reconcile component - err := r.apply(ctx, instance, dashboard) - if err != nil { - r.Log.Error(err, "Failed to reconcile Dashboard component") + r.Log.Error(err, "Failed to reconcile component: "+componentName) instance = r.reportError(err, instance, fmt.Sprintf("failed to reconcile %s on DataScienceCluster", componentName)) instance, _ = status.UpdateWithRetry(ctx, r.Client, instance, func(saved *dscv1.DataScienceCluster) { status.SetComponentCondition(&saved.Status.Conditions, componentName, status.ReconcileFailed, fmt.Sprintf("Component reconciliation failed: %v", err), corev1.ConditionFalse) @@ -323,7 +338,7 @@ func (r *DataScienceClusterReconciler) reconcileDashboardComponent(ctx context.C return instance, err } - r.Log.Info("Dashboard component reconciled successfully") + r.Log.Info("component reconciled successfully: " + componentName) instance, err = status.UpdateWithRetry(ctx, r.Client, instance, func(saved *dscv1.DataScienceCluster) { if saved.Status.InstalledComponents == nil { saved.Status.InstalledComponents = make(map[string]bool) @@ -365,6 +380,7 @@ var configMapPredicates = predicate.Funcs{ }, } +// apply either create component CR with owner set or delete component CR if it is marked with annotation. func (r *DataScienceClusterReconciler) apply(ctx context.Context, dsc *dscv1.DataScienceCluster, obj client.Object) error { if obj.GetObjectKind().GroupVersionKind().Empty() { return errors.New("no groupversionkind defined") @@ -507,7 +523,9 @@ func (r *DataScienceClusterReconciler) SetupWithManager(ctx context.Context, mgr &admissionregistrationv1.ValidatingWebhookConfiguration{}, builder.WithPredicates(modelMeshwebhookPredicates), ). + // components CRs Owns(&componentsv1.Dashboard{}). + Owns(&componentsv1.Ray{}). Owns( &corev1.ServiceAccount{}, builder.WithPredicates(saPredicates), diff --git a/controllers/datasciencecluster/kubebuilder_rbac.go b/controllers/datasciencecluster/kubebuilder_rbac.go index 6e062d1adcf..e95f9e7d4aa 100644 --- a/controllers/datasciencecluster/kubebuilder_rbac.go +++ b/controllers/datasciencecluster/kubebuilder_rbac.go @@ -84,10 +84,6 @@ package datasciencecluster // +kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=clusterrolebindings,verbs=* -// +kubebuilder:rbac:groups="ray.io",resources=rayservices,verbs=create;delete;list;watch;update;patch;get -// +kubebuilder:rbac:groups="ray.io",resources=rayjobs,verbs=create;delete;list;update;watch;patch;get -// +kubebuilder:rbac:groups="ray.io",resources=rayclusters,verbs=create;delete;list;patch;get - // +kubebuilder:rbac:groups="apiregistration.k8s.io",resources=apiservices,verbs=create;delete;list;watch;update;patch;get // +kubebuilder:rbac:groups="operator.openshift.io",resources=consoles,verbs=get;list;watch;patch;delete @@ -229,7 +225,15 @@ package datasciencecluster // +kubebuilder:rbac:groups="user.openshift.io",resources=groups,verbs=get;create;list;watch;patch;delete // +kubebuilder:rbac:groups="console.openshift.io",resources=consolelinks,verbs=create;get;patch;delete -// Dashboard component +// Ray +// +kubebuilder:rbac:groups=components.opendatahub.io,resources=rays,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=components.opendatahub.io,resources=rays/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=components.opendatahub.io,resources=rays/finalizers,verbs=update +// +kubebuilder:rbac:groups="ray.io",resources=rayservices,verbs=create;delete;list;watch;update;patch;get +// +kubebuilder:rbac:groups="ray.io",resources=rayjobs,verbs=create;delete;list;update;watch;patch;get +// +kubebuilder:rbac:groups="ray.io",resources=rayclusters,verbs=create;delete;list;patch;get + +// Dashboard // +kubebuilder:rbac:groups=components.opendatahub.io,resources=dashboards,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=components.opendatahub.io,resources=dashboards/status,verbs=get;update;patch // +kubebuilder:rbac:groups=components.opendatahub.io,resources=dashboards/finalizers,verbs=create;get;list;patch;update;use;watch diff --git a/controllers/webhook/webhook_suite_test.go b/controllers/webhook/webhook_suite_test.go index a93fd1a1691..48fec37e1bd 100644 --- a/controllers/webhook/webhook_suite_test.go +++ b/controllers/webhook/webhook_suite_test.go @@ -50,7 +50,6 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/components/kserve" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelregistry" - "github.com/opendatahub-io/opendatahub-operator/v2/components/ray" "github.com/opendatahub-io/opendatahub-operator/v2/components/trustyai" "github.com/opendatahub-io/opendatahub-operator/v2/components/workbenches" "github.com/opendatahub-io/opendatahub-operator/v2/controllers/webhook" @@ -292,8 +291,8 @@ func newDSC(name string, namespace string) *dscv1.DataScienceCluster { ManagementState: operatorv1.Removed, }, }, - Ray: ray.Ray{ - Component: componentsold.Component{ + Ray: componentsv1.DSCRay{ + ManagementSpec: components.ManagementSpec{ ManagementState: operatorv1.Removed, }, }, diff --git a/docs/api-overview.md b/docs/api-overview.md index fd908ee0723..c626b1fbaf1 100644 --- a/docs/api-overview.md +++ b/docs/api-overview.md @@ -120,6 +120,23 @@ DSCDashboard contains all the configuration exposed in DSC instance for Dashboar +_Appears in:_ +- [Components](#components) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `managementState` _[ManagementState](#managementstate)_ | Set to one of the following values:

- "Managed" : the operator is actively managing the component and trying to keep it active.
It will only upgrade the component if it is safe to do so

- "Removed" : the operator is actively managing the component and will not install it,
or if it is installed, the operator will try to remove it | | Enum: [Managed Removed]
| +| `devFlags` _[DevFlags](#devflags)_ | Add developer fields | | | + + +#### DSCRay + + + +DSCRay contains all the configuration exposed in DSC instance for Ray component + + + _Appears in:_ - [Components](#components) @@ -625,6 +642,23 @@ _Appears in:_ | `status` _[RayStatus](#raystatus)_ | | | | +#### RayCommonSpec + + + + + + + +_Appears in:_ +- [DSCRay](#dscray) +- [RaySpec](#rayspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `devFlags` _[DevFlags](#devflags)_ | Add developer fields | | | + + #### RayList @@ -658,7 +692,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `foo` _string_ | Foo is an example field of Ray. Edit ray_types.go to remove/update | | | +| `devFlags` _[DevFlags](#devflags)_ | Add developer fields | | | #### RayStatus @@ -952,7 +986,6 @@ _Appears in:_ - [Kueue](#kueue) - [ModelMeshServing](#modelmeshserving) - [ModelRegistry](#modelregistry) -- [Ray](#ray) - [TrainingOperator](#trainingoperator) - [TrustyAI](#trustyai) - [Workbenches](#workbenches) @@ -995,8 +1028,11 @@ DevFlagsSpec struct defines the component's dev flags configuration. _Appears in:_ - [Component](#component) - [DSCDashboard](#dscdashboard) +- [DSCRay](#dscray) - [DashboardCommonSpec](#dashboardcommonspec) - [DashboardSpec](#dashboardspec) +- [RayCommonSpec](#raycommonspec) +- [RaySpec](#rayspec) | Field | Description | Default | Validation | | --- | --- | --- | --- | @@ -1014,6 +1050,7 @@ ManagementSpec struct defines the component's management configuration. _Appears in:_ - [Component](#component) - [DSCDashboard](#dscdashboard) +- [DSCRay](#dscray) | Field | Description | Default | Validation | | --- | --- | --- | --- | @@ -1187,30 +1224,6 @@ _Appears in:_ -## datasciencecluster.opendatahub.io/ray - -Package ray provides utility functions to config Ray as part of the stack -which makes managing distributed compute infrastructure in the cloud easy and intuitive for Data Scientists - - - -#### Ray - - - -Ray struct holds the configuration for the Ray component. - - - -_Appears in:_ -- [Components](#components) - -| Field | Description | Default | Validation | -| --- | --- | --- | --- | -| `Component` _[Component](#component)_ | | | | - - - ## datasciencecluster.opendatahub.io/trainingoperator Package trainingoperator provides utility functions to config trainingoperator as part of the stack @@ -1339,7 +1352,7 @@ _Appears in:_ | `kserve` _[Kserve](#kserve)_ | Kserve component configuration.
Require OpenShift Serverless and OpenShift Service Mesh Operators to be installed before enable component
Does not support enabled ModelMeshServing at the same time | | | | `kueue` _[Kueue](#kueue)_ | Kueue component configuration. | | | | `codeflare` _[CodeFlare](#codeflare)_ | CodeFlare component configuration.
If CodeFlare Operator has been installed in the cluster, it should be uninstalled first before enabled component. | | | -| `ray` _[Ray](#ray)_ | Ray component configuration. | | | +| `ray` _[DSCRay](#dscray)_ | Ray component configuration. | | | | `trustyai` _[TrustyAI](#trustyai)_ | TrustyAI component configuration. | | | | `modelregistry` _[ModelRegistry](#modelregistry)_ | ModelRegistry component configuration. | | | | `trainingoperator` _[TrainingOperator](#trainingoperator)_ | Training Operator component configuration. | | | diff --git a/main.go b/main.go index e52c162e2d1..549f3bfef79 100644 --- a/main.go +++ b/main.go @@ -29,6 +29,7 @@ import ( oauthv1 "github.com/openshift/api/oauth/v1" operatorv1 "github.com/openshift/api/operator/v1" routev1 "github.com/openshift/api/route/v1" + securityv1 "github.com/openshift/api/security/v1" userv1 "github.com/openshift/api/user/v1" ofapiv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" ofapiv2 "github.com/operator-framework/api/pkg/operators/v2" @@ -63,6 +64,7 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/components/modelregistry" "github.com/opendatahub-io/opendatahub-operator/v2/controllers/certconfigmapgenerator" dashboardctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/dashboard" + rayctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/ray" dscctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/datasciencecluster" dscictrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/dscinitialization" "github.com/opendatahub-io/opendatahub-operator/v2/controllers/secretgenerator" @@ -106,6 +108,7 @@ func init() { //nolint:gochecknoinits utilruntime.Must(monitoringv1.AddToScheme(scheme)) utilruntime.Must(operatorv1.Install(scheme)) utilruntime.Must(consolev1.AddToScheme(scheme)) + utilruntime.Must(securityv1.Install(scheme)) } func initComponents(_ context.Context, p cluster.Platform) error { @@ -391,9 +394,14 @@ func createDeploymentCacheConfig(platform cluster.Platform) map[string]cache.Con func CreateComponentReconcilers(ctx context.Context, mgr manager.Manager) error { // TODO: add more here or make it go routine - if err := dashboardctrl.NewReconciler(ctx, mgr); err != nil { + if err := dashboardctrl.NewComponentReconciler(ctx, mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "DashboardReconciler") return err } + if err := rayctrl.NewComponentReconciler(ctx, mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "RayReconciler") + return err + } + return nil } diff --git a/pkg/controller/handlers/handlers.go b/pkg/controller/handlers/handlers.go index b19d79a07d9..ae27a0fb55b 100644 --- a/pkg/controller/handlers/handlers.go +++ b/pkg/controller/handlers/handlers.go @@ -11,7 +11,6 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels" ) -//nolint:ireturn func ToOwner() handler.EventHandler { return handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, a client.Object) []reconcile.Request { objLabels := a.GetLabels() diff --git a/pkg/upgrade/upgrade.go b/pkg/upgrade/upgrade.go index 90faf59b39b..24c6025b762 100644 --- a/pkg/upgrade/upgrade.go +++ b/pkg/upgrade/upgrade.go @@ -38,7 +38,6 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/components/kueue" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelregistry" - "github.com/opendatahub-io/opendatahub-operator/v2/components/ray" "github.com/opendatahub-io/opendatahub-operator/v2/components/trainingoperator" "github.com/opendatahub-io/opendatahub-operator/v2/components/trustyai" "github.com/opendatahub-io/opendatahub-operator/v2/components/workbenches" @@ -87,8 +86,8 @@ func CreateDefaultDSC(ctx context.Context, cli client.Client) error { CodeFlare: codeflare.CodeFlare{ Component: componentsold.Component{ManagementState: operatorv1.Managed}, }, - Ray: ray.Ray{ - Component: componentsold.Component{ManagementState: operatorv1.Managed}, + Ray: componentsv1.DSCRay{ + ManagementSpec: components.ManagementSpec{ManagementState: operatorv1.Managed}, }, Kueue: kueue.Kueue{ Component: componentsold.Component{ManagementState: operatorv1.Managed}, diff --git a/tests/e2e/controller_test.go b/tests/e2e/controller_test.go index c07d5ff2af8..98ca34746bd 100644 --- a/tests/e2e/controller_test.go +++ b/tests/e2e/controller_test.go @@ -117,6 +117,7 @@ func TestOdhOperator(t *testing.T) { t.Run("create DSCI and DSC CRs", creationTestSuite) // Validate deployment of each component in separate test suite t.Run("validate installation of Dashboard Component", dashboardTestSuite) + t.Run("validate installation of Ray Component", rayTestSuite) // Run deletion if skipDeletion is not set if !skipDeletion { diff --git a/tests/e2e/creation_test.go b/tests/e2e/creation_test.go index cdb3d846d8b..c1920815a06 100644 --- a/tests/e2e/creation_test.go +++ b/tests/e2e/creation_test.go @@ -185,7 +185,7 @@ func waitDSCReady(tc *testContext) error { if err != nil { return false, err } - return dsc.Status.Phase == "Ready", nil + return dsc.Status.Phase == readyStatus, nil }) if err != nil { diff --git a/tests/e2e/dashboard_test.go b/tests/e2e/dashboard_test.go index e6891582783..746eedbf8b6 100644 --- a/tests/e2e/dashboard_test.go +++ b/tests/e2e/dashboard_test.go @@ -36,10 +36,9 @@ func dashboardTestSuite(t *testing.T) { testCtx := dashboardCtx.testCtx t.Run(testCtx.testDsc.Name, func(t *testing.T) { - // DSCI t.Run("Creation of Dashboard CR", func(t *testing.T) { err = dashboardCtx.testDashboardCreation() - require.NoError(t, err, "error creating DSCI CR") + require.NoError(t, err, "error creating Dashboard CR") }) t.Run("Validate Dashboard instance", func(t *testing.T) { @@ -107,7 +106,7 @@ func (tc *DashboardTestCtx) testDashboardCreation() error { func (tc *DashboardTestCtx) validateDashboard() error { // Dashboard spec should match the spec of Dashboard component in DSC if !reflect.DeepEqual(tc.testCtx.testDsc.Spec.Components.Dashboard.DashboardCommonSpec, tc.testDashboardInstance.Spec.DashboardCommonSpec) { - err := fmt.Errorf("expected smanagement state for Dashboard %v, got %v", + err := fmt.Errorf("expected spec for Dashboard %v, got %v", tc.testCtx.testDsc.Spec.Components.Dashboard.DashboardCommonSpec, tc.testDashboardInstance.Spec.DashboardCommonSpec) return err } @@ -116,12 +115,12 @@ func (tc *DashboardTestCtx) validateDashboard() error { func (tc *DashboardTestCtx) testOwnerReferences() error { if len(tc.testDashboardInstance.OwnerReferences) != 1 { - return errors.New("expected ownerreferences to be non empty") + return errors.New("expect CR has ownerreferences set") } // Test Dashboard CR ownerref if tc.testDashboardInstance.OwnerReferences[0].Kind != "DataScienceCluster" { - return fmt.Errorf("expected ownerreference not found. Got ownereferrence: %v", + return fmt.Errorf("expected ownerreference DataScienceCluster not found. Got ownereferrence: %v", tc.testDashboardInstance.OwnerReferences[0].Kind) } @@ -134,7 +133,7 @@ func (tc *DashboardTestCtx) testOwnerReferences() error { return fmt.Errorf("error listing component deployments %w", err) } // test any one deployment for ownerreference - if len(appDeployments.Items) != 0 && appDeployments.Items[0].OwnerReferences[0].Kind != "Dashboard" { + if len(appDeployments.Items) != 0 && appDeployments.Items[0].OwnerReferences[0].Kind != componentsv1.DashboardKind { return fmt.Errorf("expected ownerreference not found. Got ownereferrence: %v", appDeployments.Items[0].OwnerReferences) } @@ -155,7 +154,7 @@ func (tc *DashboardTestCtx) validateDashboardReady() error { if err != nil { return false, err } - return dashboard.Status.Phase == "Ready", nil + return dashboard.Status.Phase == readyStatus, nil }) if err != nil { diff --git a/tests/e2e/helper_test.go b/tests/e2e/helper_test.go index 2ae51fe76f6..2479905d6fc 100644 --- a/tests/e2e/helper_test.go +++ b/tests/e2e/helper_test.go @@ -34,7 +34,6 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/components/kueue" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelregistry" - "github.com/opendatahub-io/opendatahub-operator/v2/components/ray" "github.com/opendatahub-io/opendatahub-operator/v2/components/trainingoperator" "github.com/opendatahub-io/opendatahub-operator/v2/components/trustyai" "github.com/opendatahub-io/opendatahub-operator/v2/components/workbenches" @@ -55,6 +54,7 @@ const ( dscCreationTimeout = 20 * time.Second // time required to wait till DSC is created. generalRetryInterval = 10 * time.Second generalWaitTimeout = 2 * time.Minute + readyStatus = "Ready" ) func (tc *testContext) waitForOperatorDeployment(name string, replicas int32) error { @@ -152,9 +152,9 @@ func setupDSCInstance(name string) *dscv1.DataScienceCluster { ManagementState: operatorv1.Removed, }, }, - Ray: ray.Ray{ - Component: componentsold.Component{ - ManagementState: operatorv1.Removed, + Ray: componentsv1.DSCRay{ + ManagementSpec: components.ManagementSpec{ + ManagementState: operatorv1.Managed, }, }, Kueue: kueue.Kueue{ diff --git a/tests/e2e/odh_manager_test.go b/tests/e2e/odh_manager_test.go index f087dd24df2..f9cbfe4a042 100644 --- a/tests/e2e/odh_manager_test.go +++ b/tests/e2e/odh_manager_test.go @@ -42,6 +42,12 @@ func (tc *testContext) validateOwnedCRDs(t *testing.T) { t.Run("Validate Dashboard CRD", func(t *testing.T) { t.Parallel() require.NoErrorf(t, tc.validateCRD("dashboards.components.opendatahub.io"), - "error in validating CRD : featuretrackers.features.opendatahub.io") + "error in validating CRD : dashboards.components.opendatahub.io") + }) + + t.Run("Validate Ray CRD", func(t *testing.T) { + t.Parallel() + require.NoErrorf(t, tc.validateCRD("rays.components.opendatahub.io"), + "error in validating CRD : rays.components.opendatahub.io") }) } diff --git a/tests/e2e/ray_test.go b/tests/e2e/ray_test.go new file mode 100644 index 00000000000..30defef3992 --- /dev/null +++ b/tests/e2e/ray_test.go @@ -0,0 +1,278 @@ +package e2e_test + +import ( + "context" + "errors" + "fmt" + "reflect" + "testing" + "time" + + operatorv1 "github.com/openshift/api/operator/v1" + "github.com/stretchr/testify/require" + autoscalingv1 "k8s.io/api/autoscaling/v1" + k8serr "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" + "sigs.k8s.io/controller-runtime/pkg/client" + + componentsv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels" +) + +type RayTestCtx struct { + testCtx *testContext + testRayInstance componentsv1.Ray +} + +func rayTestSuite(t *testing.T) { + rayCtx := RayTestCtx{} + var err error + rayCtx.testCtx, err = NewTestContext() + require.NoError(t, err) + + testCtx := rayCtx.testCtx + + t.Run(testCtx.testDsc.Name, func(t *testing.T) { + // creation + t.Run("Creation of Ray CR", func(t *testing.T) { + err = rayCtx.testRayCreation() + require.NoError(t, err, "error creating Ray CR") + }) + + t.Run("Validate Ray instance", func(t *testing.T) { + err = rayCtx.validateRay() + require.NoError(t, err, "error validating Ray instance") + }) + + t.Run("Validate Ownerrefrences exist", func(t *testing.T) { + err = rayCtx.testOwnerReferences() + require.NoError(t, err, "error getting all Ray's Ownerrefrences") + }) + + t.Run("Validate Ray Ready", func(t *testing.T) { + err = rayCtx.validateRayReady() + require.NoError(t, err, "Ray instance is not Ready") + }) + + // reconcile + t.Run("Validate Controller reconcile", func(t *testing.T) { + err = rayCtx.testUpdateOnRayResources() + require.NoError(t, err, "error testing updates for Ray's managed resources") + }) + + t.Run("Validate Disabling Ray Component", func(t *testing.T) { + err = rayCtx.testUpdateRayComponentDisabled() + require.NoError(t, err, "error testing ray component enabled field") + }) + }) +} + +func (tc *RayTestCtx) testRayCreation() error { + if tc.testCtx.testDsc.Spec.Components.Ray.ManagementState != operatorv1.Managed { + return nil + } + + err := tc.testCtx.wait(func(ctx context.Context) (bool, error) { + existingRayList := &componentsv1.RayList{} + + if err := tc.testCtx.customClient.List(ctx, existingRayList); err != nil { + return false, err + } + + switch { + case len(existingRayList.Items) == 1: + tc.testRayInstance = existingRayList.Items[0] + return true, nil + case len(existingRayList.Items) > 1: + return false, fmt.Errorf( + "unexpected Ray CR instances. Expected 1 , Found %v instance", len(existingRayList.Items)) + default: + return false, nil + } + }) + + if err != nil { + return fmt.Errorf("unable to find Ray CR instance: %w", err) + } + + return nil +} + +func (tc *RayTestCtx) validateRay() error { + // Ray spec should match the spec of Ray component in DSC + if !reflect.DeepEqual(tc.testCtx.testDsc.Spec.Components.Ray.RayCommonSpec, tc.testRayInstance.Spec.RayCommonSpec) { + err := fmt.Errorf("expected .spec for Ray %v, got %v", + tc.testCtx.testDsc.Spec.Components.Ray.RayCommonSpec, tc.testRayInstance.Spec.RayCommonSpec) + return err + } + return nil +} + +func (tc *RayTestCtx) testOwnerReferences() error { + if len(tc.testRayInstance.OwnerReferences) != 1 { + return errors.New("expect CR has ownerreferences set") + } + + // Test Ray CR ownerref + if tc.testRayInstance.OwnerReferences[0].Kind != "DataScienceCluster" { + return fmt.Errorf("expected ownerreference DataScienceCluster not found. Got ownereferrence: %v", + tc.testRayInstance.OwnerReferences[0].Kind) + } + + // Test Ray resources + appDeployments, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).List(tc.testCtx.ctx, metav1.ListOptions{ + LabelSelector: labels.ODH.Component(componentsv1.RayComponentName), + }) + if err != nil { + return fmt.Errorf("error listing component deployments %w", err) + } + // test any one deployment for ownerreference + if len(appDeployments.Items) != 0 && appDeployments.Items[0].OwnerReferences[0].Kind != componentsv1.RayKind { + return fmt.Errorf("expected ownerreference not found. Got ownereferrence: %v", + appDeployments.Items[0].OwnerReferences) + } + + return nil +} + +// Verify Ray instance is in Ready phase when ray deployments are up and running. +func (tc *RayTestCtx) validateRayReady() error { + err := wait.PollUntilContextTimeout(tc.testCtx.ctx, generalRetryInterval, componentReadyTimeout, true, func(ctx context.Context) (bool, error) { + key := types.NamespacedName{Name: tc.testRayInstance.Name} + ray := &componentsv1.Ray{} + + err := tc.testCtx.customClient.Get(ctx, key, ray) + if err != nil { + return false, err + } + return ray.Status.Phase == readyStatus, nil + }) + + if err != nil { + return fmt.Errorf("error waiting Ready state for Ray %v: %w", tc.testRayInstance.Name, err) + } + + return nil +} + +func (tc *RayTestCtx) testUpdateOnRayResources() error { + // Test Updating Ray Replicas + + appDeployments, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).List(tc.testCtx.ctx, metav1.ListOptions{ + LabelSelector: labels.ComponentManagedBy + "=" + tc.testRayInstance.Name, + }) + if err != nil { + return err + } + + if len(appDeployments.Items) != 1 { + return fmt.Errorf("error getting deployment for component %s", tc.testRayInstance.Name) + } + + const expectedReplica int32 = 2 // from 1 to 2 + + testDeployment := appDeployments.Items[0] + patchedReplica := &autoscalingv1.Scale{ + ObjectMeta: metav1.ObjectMeta{ + Name: testDeployment.Name, + Namespace: testDeployment.Namespace, + }, + Spec: autoscalingv1.ScaleSpec{ + Replicas: expectedReplica, + }, + Status: autoscalingv1.ScaleStatus{}, + } + updatedDep, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).UpdateScale(tc.testCtx.ctx, + testDeployment.Name, patchedReplica, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("error patching component resources : %w", err) + } + if updatedDep.Spec.Replicas != patchedReplica.Spec.Replicas { + return fmt.Errorf("failed to patch replicas : expect to be %v but got %v", patchedReplica.Spec.Replicas, updatedDep.Spec.Replicas) + } + + // Sleep for 20 seconds to allow the operator to reconcile + // we expect it should not revert back to original value because of AllowList + time.Sleep(2 * generalRetryInterval) + reconciledDep, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).Get(tc.testCtx.ctx, testDeployment.Name, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("error getting component resource after reconcile: %w", err) + } + if *reconciledDep.Spec.Replicas != expectedReplica { + return fmt.Errorf("failed to revert back replicas : expect to be %v but got %v", expectedReplica, *reconciledDep.Spec.Replicas) + } + + return nil +} + +func (tc *RayTestCtx) testUpdateRayComponentDisabled() error { + // Test Updating Ray to be disabled + var rayDeploymentName string + + if tc.testCtx.testDsc.Spec.Components.Ray.ManagementState == operatorv1.Managed { + appDeployments, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).List(tc.testCtx.ctx, metav1.ListOptions{ + LabelSelector: labels.ODH.Component(componentsv1.RayComponentName), + }) + if err != nil { + return fmt.Errorf("error getting enabled component %v", componentsv1.RayComponentName) + } + if len(appDeployments.Items) > 0 { + rayDeploymentName = appDeployments.Items[0].Name + if appDeployments.Items[0].Status.ReadyReplicas == 0 { + return fmt.Errorf("error getting enabled component: %s its deployment 'ReadyReplicas'", rayDeploymentName) + } + } + } else { + return errors.New("ray spec should be in 'enabled: true' state in order to perform test") + } + + // Disable component Ray + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + // refresh DSC instance in case it was updated during the reconcile + err := tc.testCtx.customClient.Get(tc.testCtx.ctx, types.NamespacedName{Name: tc.testCtx.testDsc.Name}, tc.testCtx.testDsc) + if err != nil { + return fmt.Errorf("error getting resource %w", err) + } + // Disable the Component + tc.testCtx.testDsc.Spec.Components.Ray.ManagementState = operatorv1.Removed + + // Try to update + err = tc.testCtx.customClient.Update(tc.testCtx.ctx, tc.testCtx.testDsc) + // Return err itself here (not wrapped inside another error) + // so that RetryOnConflict can identify it correctly. + if err != nil { + return fmt.Errorf("error updating component from 'enabled: true' to 'enabled: false': %w", err) + } + + return nil + }) + if err != nil { + return fmt.Errorf("error after retry %w", err) + } + + if err = tc.testCtx.wait(func(ctx context.Context) (bool, error) { + // Verify ray CR is deleted + ray := &componentsv1.Ray{} + err = tc.testCtx.customClient.Get(ctx, client.ObjectKey{Name: tc.testRayInstance.Name}, ray) + return k8serr.IsNotFound(err), nil + }); err != nil { + return fmt.Errorf("component ray is disabled, should not get the Ray CR %v", tc.testRayInstance.Name) + } + + // Sleep for 20 seconds to allow the operator to reconcile + time.Sleep(2 * generalRetryInterval) + _, err = tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).Get(tc.testCtx.ctx, rayDeploymentName, metav1.GetOptions{}) + if err != nil { + if k8serr.IsNotFound(err) { + return nil // correct result: should not find deployment after we disable it already + } + return fmt.Errorf("error getting component resource after reconcile: %w", err) + } + return fmt.Errorf("component %v is disabled, should not get its deployment %v from NS %v any more", + componentsv1.RayKind, + rayDeploymentName, + tc.testCtx.applicationsNamespace) +} diff --git a/tests/integration/features/features_suite_int_test.go b/tests/integration/features/features_suite_int_test.go index 1dcf8d68c5a..edf14152d76 100644 --- a/tests/integration/features/features_suite_int_test.go +++ b/tests/integration/features/features_suite_int_test.go @@ -59,7 +59,6 @@ var _ = BeforeSuite(func() { Scheme: testScheme, Paths: []string{ filepath.Join(projectDir, "config", "crd", "bases"), - filepath.Join(projectDir, "config", "crd", "dashboard-crds"), filepath.Join(projectDir, "tests", "integration", "features", "fixtures", "crd"), }, ErrorIfPathMissing: true,