From 6f5968211f9adb51a07ecd57739d8ddcb5fba388 Mon Sep 17 00:00:00 2001 From: Wen Zhou Date: Thu, 5 Dec 2024 16:24:23 +0100 Subject: [PATCH] feat: add NIM flag in Operator (#1420) * feat: add NIM flag in Operator - API change - add function in modelcontroller - only when kserve is managed and its nim is managed, we set nim-state to managed - use newer branch of serving which has the new params.env --------- Signed-off-by: Wen Zhou --- README.md | 2 ++ apis/components/v1alpha1/kserve_types.go | 10 ++++++++ .../v1alpha1/modelcontroller_types.go | 1 + .../v1alpha1/zz_generated.deepcopy.go | 17 ++++++++++++++ ...nents.platform.opendatahub.io_kserves.yaml | 11 +++++++++ ...tform.opendatahub.io_modelcontrollers.yaml | 11 +++++++++ ...er.opendatahub.io_datascienceclusters.yaml | 11 +++++++++ ...atahub-operator.clusterserviceversion.yaml | 3 +++ ...nents.platform.opendatahub.io_kserves.yaml | 11 +++++++++ ...tform.opendatahub.io_modelcontrollers.yaml | 11 +++++++++ ...er.opendatahub.io_datascienceclusters.yaml | 11 +++++++++ ...asciencecluster_v1_datasciencecluster.yaml | 3 +++ controllers/components/kserve/kserve.go | 1 + .../modelcontroller/modelcontroller.go | 7 +++--- .../modelcontroller_actions.go | 14 ++++++++++- docs/api-overview.md | 23 +++++++++++++++++++ get_all_manifests.sh | 4 ++-- 17 files changed, 144 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index d9bb63b6f8b..9a0618e2dfc 100644 --- a/README.md +++ b/README.md @@ -315,6 +315,8 @@ spec: managementState: Managed kserve: managementState: Managed + nim: + managementState: Managed serving: ingressGateway: certificate: diff --git a/apis/components/v1alpha1/kserve_types.go b/apis/components/v1alpha1/kserve_types.go index 7d31b2e5192..7bcab993978 100644 --- a/apis/components/v1alpha1/kserve_types.go +++ b/apis/components/v1alpha1/kserve_types.go @@ -17,6 +17,7 @@ limitations under the License. package v1alpha1 import ( + operatorv1 "github.com/openshift/api/operator/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/opendatahub-io/opendatahub-operator/v2/apis/common" @@ -52,6 +53,15 @@ type KserveCommonSpec struct { // This field is optional. If no default deployment mode is specified, Kserve will use Serverless mode. // +kubebuilder:validation:Enum=Serverless;RawDeployment DefaultDeploymentMode DefaultDeploymentMode `json:"defaultDeploymentMode,omitempty"` + // Configures and enables NVIDIA NIM integration + NIM NimSpec `json:"nim,omitempty"` +} + +// nimSpec enables NVIDIA NIM integration +type NimSpec struct { + // +kubebuilder:validation:Enum=Managed;Removed + // +kubebuilder:default=Managed + ManagementState operatorv1.ManagementState `json:"managementState,omitempty"` } // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. diff --git a/apis/components/v1alpha1/modelcontroller_types.go b/apis/components/v1alpha1/modelcontroller_types.go index 355d8ea4d60..43dc528ceca 100644 --- a/apis/components/v1alpha1/modelcontroller_types.go +++ b/apis/components/v1alpha1/modelcontroller_types.go @@ -58,6 +58,7 @@ type ModelControllerSpec struct { // a mini version of the DSCKserve only keep devflags and management spec type ModelControllerKerveSpec struct { ManagementState operatorv1.ManagementState `json:"managementState,omitempty"` + NIM NimSpec `json:"nim,omitempty"` common.DevFlagsSpec `json:",inline"` } diff --git a/apis/components/v1alpha1/zz_generated.deepcopy.go b/apis/components/v1alpha1/zz_generated.deepcopy.go index f7f0b3595c1..3fba7741aea 100644 --- a/apis/components/v1alpha1/zz_generated.deepcopy.go +++ b/apis/components/v1alpha1/zz_generated.deepcopy.go @@ -579,6 +579,7 @@ func (in *KserveCommonSpec) DeepCopyInto(out *KserveCommonSpec) { *out = *in in.DevFlagsSpec.DeepCopyInto(&out.DevFlagsSpec) out.Serving = in.Serving + out.NIM = in.NIM } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KserveCommonSpec. @@ -792,6 +793,7 @@ func (in *ModelController) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ModelControllerKerveSpec) DeepCopyInto(out *ModelControllerKerveSpec) { *out = *in + out.NIM = in.NIM in.DevFlagsSpec.DeepCopyInto(&out.DevFlagsSpec) } @@ -1109,6 +1111,21 @@ func (in *ModelRegistryStatus) DeepCopy() *ModelRegistryStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NimSpec) DeepCopyInto(out *NimSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NimSpec. +func (in *NimSpec) DeepCopy() *NimSpec { + if in == nil { + return nil + } + out := new(NimSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Ray) DeepCopyInto(out *Ray) { *out = *in diff --git a/bundle/manifests/components.platform.opendatahub.io_kserves.yaml b/bundle/manifests/components.platform.opendatahub.io_kserves.yaml index 412d9bb2aef..933b1021573 100644 --- a/bundle/manifests/components.platform.opendatahub.io_kserves.yaml +++ b/bundle/manifests/components.platform.opendatahub.io_kserves.yaml @@ -84,6 +84,17 @@ spec: type: object type: array type: object + nim: + description: Configures and enables NVIDIA NIM integration + properties: + managementState: + default: Managed + enum: + - Managed + - Removed + pattern: ^(Managed|Unmanaged|Force|Removed)$ + type: string + type: object serving: description: |- Serving configures the KNative-Serving stack used for model serving. A Service diff --git a/bundle/manifests/components.platform.opendatahub.io_modelcontrollers.yaml b/bundle/manifests/components.platform.opendatahub.io_modelcontrollers.yaml index 2ee5cb7113a..b640900f0f4 100644 --- a/bundle/manifests/components.platform.opendatahub.io_modelcontrollers.yaml +++ b/bundle/manifests/components.platform.opendatahub.io_modelcontrollers.yaml @@ -87,6 +87,17 @@ spec: managementState: pattern: ^(Managed|Unmanaged|Force|Removed)$ type: string + nim: + description: nimSpec enables NVIDIA NIM integration + properties: + managementState: + default: Managed + enum: + - Managed + - Removed + pattern: ^(Managed|Unmanaged|Force|Removed)$ + type: string + type: object type: object modelMeshServing: description: a mini version of the DSCModelMeshServing only keep devflags diff --git a/bundle/manifests/datasciencecluster.opendatahub.io_datascienceclusters.yaml b/bundle/manifests/datasciencecluster.opendatahub.io_datascienceclusters.yaml index 163acecf4bd..7a87c24d77b 100644 --- a/bundle/manifests/datasciencecluster.opendatahub.io_datascienceclusters.yaml +++ b/bundle/manifests/datasciencecluster.opendatahub.io_datascienceclusters.yaml @@ -245,6 +245,17 @@ spec: - Removed pattern: ^(Managed|Unmanaged|Force|Removed)$ type: string + nim: + description: Configures and enables NVIDIA NIM integration + properties: + managementState: + default: Managed + enum: + - Managed + - Removed + pattern: ^(Managed|Unmanaged|Force|Removed)$ + type: string + type: object serving: description: |- Serving configures the KNative-Serving stack used for model serving. A Service diff --git a/bundle/manifests/opendatahub-operator.clusterserviceversion.yaml b/bundle/manifests/opendatahub-operator.clusterserviceversion.yaml index 3e3ebc1d94a..d79bdba158d 100644 --- a/bundle/manifests/opendatahub-operator.clusterserviceversion.yaml +++ b/bundle/manifests/opendatahub-operator.clusterserviceversion.yaml @@ -30,6 +30,9 @@ metadata: }, "kserve": { "managementState": "Managed", + "nim": { + "managementState": "Managed" + }, "serving": { "ingressGateway": { "certificate": { diff --git a/config/crd/bases/components.platform.opendatahub.io_kserves.yaml b/config/crd/bases/components.platform.opendatahub.io_kserves.yaml index 448f09d0e80..36aea8ae156 100644 --- a/config/crd/bases/components.platform.opendatahub.io_kserves.yaml +++ b/config/crd/bases/components.platform.opendatahub.io_kserves.yaml @@ -84,6 +84,17 @@ spec: type: object type: array type: object + nim: + description: Configures and enables NVIDIA NIM integration + properties: + managementState: + default: Managed + enum: + - Managed + - Removed + pattern: ^(Managed|Unmanaged|Force|Removed)$ + type: string + type: object serving: description: |- Serving configures the KNative-Serving stack used for model serving. A Service diff --git a/config/crd/bases/components.platform.opendatahub.io_modelcontrollers.yaml b/config/crd/bases/components.platform.opendatahub.io_modelcontrollers.yaml index 7582a7748d9..def42d5027a 100644 --- a/config/crd/bases/components.platform.opendatahub.io_modelcontrollers.yaml +++ b/config/crd/bases/components.platform.opendatahub.io_modelcontrollers.yaml @@ -87,6 +87,17 @@ spec: managementState: pattern: ^(Managed|Unmanaged|Force|Removed)$ type: string + nim: + description: nimSpec enables NVIDIA NIM integration + properties: + managementState: + default: Managed + enum: + - Managed + - Removed + pattern: ^(Managed|Unmanaged|Force|Removed)$ + type: string + type: object type: object modelMeshServing: description: a mini version of the DSCModelMeshServing only keep devflags diff --git a/config/crd/bases/datasciencecluster.opendatahub.io_datascienceclusters.yaml b/config/crd/bases/datasciencecluster.opendatahub.io_datascienceclusters.yaml index d68ea742e3c..a59f48a50ef 100644 --- a/config/crd/bases/datasciencecluster.opendatahub.io_datascienceclusters.yaml +++ b/config/crd/bases/datasciencecluster.opendatahub.io_datascienceclusters.yaml @@ -245,6 +245,17 @@ spec: - Removed pattern: ^(Managed|Unmanaged|Force|Removed)$ type: string + nim: + description: Configures and enables NVIDIA NIM integration + properties: + managementState: + default: Managed + enum: + - Managed + - Removed + pattern: ^(Managed|Unmanaged|Force|Removed)$ + type: string + type: object serving: description: |- Serving configures the KNative-Serving stack used for model serving. A Service diff --git a/config/samples/datasciencecluster_v1_datasciencecluster.yaml b/config/samples/datasciencecluster_v1_datasciencecluster.yaml index 626fc5442d1..4dc9443c956 100644 --- a/config/samples/datasciencecluster_v1_datasciencecluster.yaml +++ b/config/samples/datasciencecluster_v1_datasciencecluster.yaml @@ -18,6 +18,9 @@ spec: managementState: "Managed" kserve: { managementState: "Managed", + nim: { + managementState: "Managed" + }, serving: { ingressGateway: { certificate: { diff --git a/controllers/components/kserve/kserve.go b/controllers/components/kserve/kserve.go index 628add1a52c..9e266df115c 100644 --- a/controllers/components/kserve/kserve.go +++ b/controllers/components/kserve/kserve.go @@ -31,6 +31,7 @@ func init() { //nolint:gochecknoinits cr.Add(&componentHandler{}) } +// Init for set images. func (s *componentHandler) Init(platform cluster.Platform) error { return nil } diff --git a/controllers/components/modelcontroller/modelcontroller.go b/controllers/components/modelcontroller/modelcontroller.go index 5fd3dbed319..58353f88510 100644 --- a/controllers/components/modelcontroller/modelcontroller.go +++ b/controllers/components/modelcontroller/modelcontroller.go @@ -19,6 +19,8 @@ const ( ComponentName = componentApi.ModelControllerComponentName ) +var DefaultPath = odhdeploy.DefaultManifestPath + "/" + ComponentName + "/base" + type componentHandler struct{} func init() { //nolint:gochecknoinits @@ -61,9 +63,6 @@ func (s *componentHandler) NewCRObject(dsc *dscv1.DataScienceCluster) client.Obj Annotations: mcAnnotations, }, Spec: componentApi.ModelControllerSpec{ - // ModelMeshServing: &componentsv1.DSCModelMeshServing { - // dsc.Spec.Components.ModelMeshServing, - // }, ModelMeshServing: &componentApi.ModelControllerMMSpec{ ManagementState: mState, DevFlagsSpec: dsc.Spec.Components.ModelMeshServing.DevFlagsSpec, @@ -71,6 +70,7 @@ func (s *componentHandler) NewCRObject(dsc *dscv1.DataScienceCluster) client.Obj Kserve: &componentApi.ModelControllerKerveSpec{ ManagementState: kState, DevFlagsSpec: dsc.Spec.Components.Kserve.DevFlagsSpec, + NIM: dsc.Spec.Components.Kserve.NIM, }, }, }) @@ -78,7 +78,6 @@ func (s *componentHandler) NewCRObject(dsc *dscv1.DataScienceCluster) client.Obj // Init for set images. func (s *componentHandler) Init(platform cluster.Platform) error { - DefaultPath := odhdeploy.DefaultManifestPath + "/" + ComponentName + "/base" var imageParamMap = map[string]string{ "odh-model-controller": "RELATED_IMAGE_ODH_MODEL_CONTROLLER_IMAGE", } diff --git a/controllers/components/modelcontroller/modelcontroller_actions.go b/controllers/components/modelcontroller/modelcontroller_actions.go index e2e81b2a130..341006acc49 100644 --- a/controllers/components/modelcontroller/modelcontroller_actions.go +++ b/controllers/components/modelcontroller/modelcontroller_actions.go @@ -32,7 +32,7 @@ import ( func initialize(ctx context.Context, rr *odhtypes.ReconciliationRequest) error { // early exist - _, ok := rr.Instance.(*componentApi.ModelController) + mc, ok := rr.Instance.(*componentApi.ModelController) if !ok { return fmt.Errorf("resource instance %v is not a componentApi.ModelController)", rr.Instance) } @@ -41,6 +41,18 @@ func initialize(ctx context.Context, rr *odhtypes.ReconciliationRequest) error { ContextDir: ComponentName, SourcePath: "base", }) + + nimState := operatorv1.Removed + if mc.Spec.Kserve.ManagementState == operatorv1.Managed { + nimState = mc.Spec.Kserve.NIM.ManagementState + } + extraParamsMap := map[string]string{ + "nim-state": strings.ToLower(string(nimState)), + } + if err := odhdeploy.ApplyParams(rr.Manifests[0].String(), nil, extraParamsMap); err != nil { + return fmt.Errorf("failed to update images on path %s: %w", rr.Manifests[0].String(), err) + } + return nil } diff --git a/docs/api-overview.md b/docs/api-overview.md index 740d8574206..65224e5a991 100644 --- a/docs/api-overview.md +++ b/docs/api-overview.md @@ -200,6 +200,7 @@ _Appears in:_ | `devFlags` _[DevFlags](#devflags)_ | Add developer fields | | | | `serving` _[ServingSpec](#servingspec)_ | Serving configures the KNative-Serving stack used for model serving. A Service
Mesh (Istio) is prerequisite, since it is used as networking layer. | | | | `defaultDeploymentMode` _[DefaultDeploymentMode](#defaultdeploymentmode)_ | Configures the default deployment mode for Kserve. This can be set to 'Serverless' or 'RawDeployment'.
The value specified in this field will be used to set the default deployment mode in the 'inferenceservice-config' configmap for Kserve.
This field is optional. If no default deployment mode is specified, Kserve will use Serverless mode. | | Enum: [Serverless RawDeployment]
Pattern: `^(Serverless\|RawDeployment)$`
| +| `nim` _[NimSpec](#nimspec)_ | Configures and enables NVIDIA NIM integration | | | #### DSCKueue @@ -585,6 +586,7 @@ _Appears in:_ | `devFlags` _[DevFlags](#devflags)_ | Add developer fields | | | | `serving` _[ServingSpec](#servingspec)_ | Serving configures the KNative-Serving stack used for model serving. A Service
Mesh (Istio) is prerequisite, since it is used as networking layer. | | | | `defaultDeploymentMode` _[DefaultDeploymentMode](#defaultdeploymentmode)_ | Configures the default deployment mode for Kserve. This can be set to 'Serverless' or 'RawDeployment'.
The value specified in this field will be used to set the default deployment mode in the 'inferenceservice-config' configmap for Kserve.
This field is optional. If no default deployment mode is specified, Kserve will use Serverless mode. | | Enum: [Serverless RawDeployment]
Pattern: `^(Serverless\|RawDeployment)$`
| +| `nim` _[NimSpec](#nimspec)_ | Configures and enables NVIDIA NIM integration | | | #### KserveList @@ -623,6 +625,7 @@ _Appears in:_ | `devFlags` _[DevFlags](#devflags)_ | Add developer fields | | | | `serving` _[ServingSpec](#servingspec)_ | Serving configures the KNative-Serving stack used for model serving. A Service
Mesh (Istio) is prerequisite, since it is used as networking layer. | | | | `defaultDeploymentMode` _[DefaultDeploymentMode](#defaultdeploymentmode)_ | Configures the default deployment mode for Kserve. This can be set to 'Serverless' or 'RawDeployment'.
The value specified in this field will be used to set the default deployment mode in the 'inferenceservice-config' configmap for Kserve.
This field is optional. If no default deployment mode is specified, Kserve will use Serverless mode. | | Enum: [Serverless RawDeployment]
Pattern: `^(Serverless\|RawDeployment)$`
| +| `nim` _[NimSpec](#nimspec)_ | Configures and enables NVIDIA NIM integration | | | #### KserveStatus @@ -772,6 +775,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | | `managementState` _[ManagementState](#managementstate)_ | | | | +| `nim` _[NimSpec](#nimspec)_ | | | | | `devFlags` _[DevFlags](#devflags)_ | Add developer fields | | | @@ -1037,6 +1041,25 @@ _Appears in:_ | `registriesNamespace` _string_ | | | | +#### NimSpec + + + +nimSpec enables NVIDIA NIM integration + + + +_Appears in:_ +- [DSCKserve](#dsckserve) +- [KserveCommonSpec](#kservecommonspec) +- [KserveSpec](#kservespec) +- [ModelControllerKerveSpec](#modelcontrollerkervespec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `managementState` _[ManagementState](#managementstate)_ | | Managed | Enum: [Managed Removed]
| + + #### Ray diff --git a/get_all_manifests.sh b/get_all_manifests.sh index 22ab52c5ac8..29187f07dcf 100755 --- a/get_all_manifests.sh +++ b/get_all_manifests.sh @@ -16,8 +16,8 @@ declare -A COMPONENT_MANIFESTS=( ["notebooks"]="opendatahub-io:notebooks:main:manifests:notebooks" ["trustyai"]="trustyai-explainability:trustyai-service-operator:main:config:trustyai-service-operator" ["model-mesh"]="opendatahub-io:modelmesh-serving:release-0.12.0-rc0:config:model-mesh" - ["odh-model-controller"]="opendatahub-io:odh-model-controller:release-0.12.0:config:odh-model-controller" - ["kserve"]="opendatahub-io:kserve:release-v0.12.1:config:kserve" + ["odh-model-controller"]="opendatahub-io:odh-model-controller:incubating:config:odh-model-controller" + ["kserve"]="opendatahub-io:kserve:release-v0.14:config:kserve" ["modelregistry"]="opendatahub-io:model-registry-operator:main:config:model-registry-operator" ["trainingoperator"]="opendatahub-io:training-operator:dev:manifests:trainingoperator" )