Skip to content

Commit

Permalink
Add e2e test for nodeclaim
Browse files Browse the repository at this point in the history
Signed-off-by: Heba Elayoty <hebaelayoty@gmail.com>
  • Loading branch information
helayoty committed Aug 17, 2024
1 parent daec24b commit e9e0004
Show file tree
Hide file tree
Showing 18 changed files with 509 additions and 310 deletions.
74 changes: 44 additions & 30 deletions .github/workflows/e2e-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ jobs:
environment: e2e-test
env:
GO_VERSION: "1.22"

KARPENTER_NAMESPACE: "karpenter"
GPU_PROVISIONER_NAMESPACE: "gpu-provisioner"
steps:
- name: Harden Runner
uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1
Expand Down Expand Up @@ -146,20 +147,40 @@ jobs:
- name: create cluster
shell: bash
run: |
make create-aks-cluster
if [ "${{ inputs.suite }}" == "gpuprovisioner" ]; then
make create-aks-cluster
else
make create-aks-cluster-for-karpenter
fi
env:
AZURE_ACR_NAME: ${{ env.CLUSTER_NAME }}
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
AZURE_LOCATION: ${{ inputs.region }}
AKS_K8S_VERSION: ${{ inputs.k8s_version }}

- name: Az login
uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a # v2.1.1
with:
client-id: ${{ secrets.E2E_CLIENT_ID }}
tenant-id: ${{ secrets.E2E_TENANT_ID }}
subscription-id: ${{ secrets.E2E_SUBSCRIPTION_ID }}

- name: Create Identities and Permissions for ${{ inputs.suite }}
shell: bash
run: |
make generate-identities
env:
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
TEST_SUITE: ${{ inputs.suite }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.E2E_SUBSCRIPTION_ID }}

- name: Install gpu-provisioner helm chart
if: ${{ inputs.suite == 'gpuprov' }}
if: ${{ inputs.suite == 'gpuprovisioner' }}
shell: bash
run: |
make gpu-provisioner-helm
kubectl wait --for=condition=available deploy "gpu-provisioner" -n gpu-provisioner --timeout=300s
env:
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
Expand All @@ -172,45 +193,28 @@ jobs:
shell: bash
run: |
make azure-karpenter-helm
# taint nodes as karpenter-system
kubectl taint nodes CriticalAddonsOnly=true:NoSchedule --all
kubectl wait --for=condition=available deploy "karpenter" -n karpenter --timeout=300s
env:
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
AZURE_TENANT_ID: ${{ secrets.E2E_TENANT_ID }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.E2E_SUBSCRIPTION_ID }}
KARPENTER_VERSION: ${{ vars.KARPENTER_VERSION }}
KARPENTER_NAMESPACE: ${{ env.KARPENTER_NAMESPACE }}

- uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a # v2.1.1
with:
client-id: ${{ secrets.E2E_CLIENT_ID }}
tenant-id: ${{ secrets.E2E_TENANT_ID }}
subscription-id: ${{ secrets.E2E_SUBSCRIPTION_ID }}

- name: Create Role Assignment
uses: azure/CLI@v2.0.0
with:
inlineScript: |
IDENTITY_PRINCIPAL_ID="$(az identity show --name ${{ inputs.suite }}Identity --resource-group ${{ env.CLUSTER_NAME }} --query 'principalId' -otsv)"
az role assignment create --assignee ${IDENTITY_PRINCIPAL_ID} --scope "/subscriptions/${{ secrets.E2E_SUBSCRIPTION_ID }}/resourceGroups/${{ env.CLUSTER_NAME }}" --role "Contributor"
if [ "${{ inputs.suite }}" == "azkarpenter" ]; then
fi
- name: Create Azure Federated Identity
uses: azure/CLI@v2.0.0
with:
inlineScript: |
AKS_OIDC_ISSUER="$(az aks show -n "${{ env.CLUSTER_NAME }}" -g "${{ env.CLUSTER_NAME }}" --query 'oidcIssuerProfile.issuerUrl' -otsv)"
if [ "${{ inputs.suite }}" == "gpuprov" ]; then
az identity federated-credential create --name ${{ inputs.suite }}-fed --identity-name ${{ inputs.suite }}Identity --resource-group "${{ env.CLUSTER_NAME }}" \
--issuer "${AKS_OIDC_ISSUER}" --subject system:serviceaccount:"gpu-provisioner:gpu-provisioner" --audience api://AzureADTokenExchange
fi
if [ "${{ inputs.suite }}" == "azkarpenter" ]; then
az identity federated-credential create --name ${{ inputs.suite }}-fed --identity-name ${{ inputs.suite }}Identity --resource-group "${{ env.CLUSTER_NAME }}" \
--issuer "${AKS_OIDC_ISSUER}" --subject system:serviceaccount:"karpenter:karpenter-sa" --audience api://AzureADTokenExchange
fi
- name: build KAITO image
if: ${{ !inputs.isRelease }}
shell: bash
run: |
make docker-build-kaito
env:
REGISTRY: ${{ env.REGISTRY }}
VERSION: ${{ env.VERSION }}

- name: Install KAITO Workspace helm chart
shell: bash
Expand All @@ -222,6 +226,7 @@ jobs:
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
REGISTRY: ${{ env.REGISTRY }}
VERSION: ${{ env.VERSION }}
TEST_SUITE: ${{ inputs.suite }}

# Retrieve E2E ACR credentials and create Kubernetes secret
- name: Set up E2E ACR Credentials and Secret
Expand Down Expand Up @@ -251,6 +256,14 @@ jobs:
--docker-username=${{ secrets.E2E_ACR_AMRT_USERNAME }} \
--docker-password=${{ secrets.E2E_ACR_AMRT_PASSWORD }}
- name: Log ${{ inputs.suite }}
run: |
if [ "${{ inputs.suite }}" == "gpuprovisioner" ]; then
kubectl logs -n "${{ env.GPU_PROVISIONER_NAMESPACE }}" -l app.kubernetes.io/name=gpu-provisioner -c controller
else
kubectl logs -n "${{ env.KARPENTER_NAMESPACE }}" -l app.kubernetes.io/name=karpenter -c controller
fi
- name: Log kaito-workspace
run: |
kubectl get pods -n kaito-workspace -o name | grep "^pod/kaito-workspace" | sed 's/^pod\///' | xargs -I {} kubectl logs -n kaito-workspace {}
Expand All @@ -264,6 +277,7 @@ jobs:
REGISTRY: ${{ env.REGISTRY }}
AI_MODELS_REGISTRY: ${{ secrets.E2E_ACR_AMRT_USERNAME }}.azurecr.io
AI_MODELS_REGISTRY_SECRET: ${{ secrets.E2E_AMRT_SECRET_NAME }}
TEST_SUITE: ${{ inputs.suite }}
E2E_ACR_REGISTRY: ${{ env.CLUSTER_NAME }}.azurecr.io
E2E_ACR_REGISTRY_SECRET: ${{ env.CLUSTER_NAME }}-acr-secret

Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/kaito-e2e.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
name: pr-e2e-test

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

on:
pull_request:
paths-ignore: ['docs/**', '**.md', '**.mdx', '**.png', '**.jpg']
Expand All @@ -16,7 +20,7 @@ jobs:
strategy:
fail-fast: false
matrix:
suite: [ gpuprov, azkarpenter ]
suite: [ gpuprovisioner, azkarpenter ]
permissions:
contents: read
id-token: write
Expand Down
108 changes: 38 additions & 70 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,12 @@ GOLANGCI_LINT_BIN := golangci-lint
GOLANGCI_LINT := $(abspath $(TOOLS_BIN_DIR)/$(GOLANGCI_LINT_BIN)-$(GOLANGCI_LINT_VER))

E2E_TEST_BIN := e2e.test
KARPENTER_E2E_TEST_BIN := karpenter-e2e.test
E2E_TEST := $(BIN_DIR)/$(E2E_TEST_BIN)
KARPENTER_E2E_TEST := $(BIN_DIR)/$(KARPENTER_E2E_TEST_BIN)

GINKGO_VER := v2.19.0
GINKGO_BIN := ginkgo
GINKGO := $(TOOLS_BIN_DIR)/$(GINKGO_BIN)-$(GINKGO_VER)
TEST_SUITE ?= gpuprovisioner

AZURE_SUBSCRIPTION_ID ?= $(AZURE_SUBSCRIPTION_ID)
AZURE_LOCATION ?= eastus
Expand All @@ -33,19 +32,18 @@ AZURE_CLUSTER_NAME ?= kaito-demo
AZURE_RESOURCE_GROUP_MC=MC_$(AZURE_RESOURCE_GROUP)_$(AZURE_CLUSTER_NAME)_$(AZURE_LOCATION)
GPU_PROVISIONER_NAMESPACE ?= gpu-provisioner
KAITO_NAMESPACE ?= kaito-workspace
GPU_PROVISIONER_MSI_NAME ?= gpuIdentity
GPU_PROVISIONER_MSI_NAME ?= gpuprovisionerIdentity

## Karpenter parameters
## Azure Karpenter parameters
KARPENTER_NAMESPACE ?= karpenter
KARPENTER_SERVICE_ACCOUNT_NAME ?= karpenter-sa
KARPENTER_VERSION ?= 0.4.0
AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME ?= karpenterIdentity
KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME ?= karpenter-fed
KARPENTER_SA_NAME ?= karpenter-sa
KARPENTER_VERSION ?= 0.5.1
AZURE_KARPENTER_MSI_NAME ?= azkarpenterIdentity

RUN_LLAMA_13B ?= false
AI_MODELS_REGISTRY ?= modelregistry.azurecr.io
AI_MODELS_REGISTRY_SECRET ?= modelregistry
SUPPORTED_MODELS_YAML_PATH ?= /home/runner/work/kaito/kaito/presets/models/supported_models.yaml
SUPPORTED_MODELS_YAML_PATH ?= $(abspath presets/models/supported_models.yaml)

# Scripts
GO_INSTALL := ./hack/go-install.sh
Expand Down Expand Up @@ -88,10 +86,6 @@ manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and Cust
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..."

.PHONY: fmt
fmt: ## Run go fmt against code.
go fmt ./...

## --------------------------------------
## Unit Tests
## --------------------------------------
Expand Down Expand Up @@ -124,19 +118,11 @@ $(E2E_TEST):
.PHONY: kaito-workspace-e2e-test
kaito-workspace-e2e-test: $(E2E_TEST) $(GINKGO)
AI_MODELS_REGISTRY_SECRET=$(AI_MODELS_REGISTRY_SECRET) RUN_LLAMA_13B=$(RUN_LLAMA_13B) \
AI_MODELS_REGISTRY=$(AI_MODELS_REGISTRY) GPU_NAMESPACE=$(GPU_NAMESPACE) KAITO_NAMESPACE=$(KAITO_NAMESPACE) \
AI_MODELS_REGISTRY=$(AI_MODELS_REGISTRY) GPU_PROVISIONER_NAMESPACE=$(GPU_PROVISIONER_NAMESPACE) \
KARPENTER_NAMESPACE=$(KARPENTER_NAMESPACE) KAITO_NAMESPACE=$(KAITO_NAMESPACE) TEST_SUITE=$(TEST_SUITE) \
SUPPORTED_MODELS_YAML_PATH=$(SUPPORTED_MODELS_YAML_PATH) \
$(GINKGO) -v -trace $(GINKGO_ARGS) $(E2E_TEST)

$(KARPENTER_E2E_TEST):
(cd test/e2e/karpenter && go test -c . -o $(KARPENTER_E2E_TEST))

.PHONY: kaito-karpenter-e2e-test
kaito-karpenter-e2e-test: $(E2E_TEST) $(GINKGO)
AI_MODELS_REGISTRY_SECRET=$(AI_MODELS_REGISTRY_SECRET) RUN_LLAMA_13B=$(RUN_LLAMA_13B) \
AI_MODELS_REGISTRY=$(AI_MODELS_REGISTRY) KARPENTER=$(KARPENTER_NAMESPACE) KAITO_NAMESPACE=$(KAITO_NAMESPACE) \
$(GINKGO) -v -trace $(GINKGO_ARGS) $(KARPENTER_E2E_TEST)

## --------------------------------------
## Azure resources
## --------------------------------------
Expand Down Expand Up @@ -169,17 +155,15 @@ create-aks-cluster-with-kaito: ## Create test AKS cluster (with msi, oidc and ka
.PHONY: create-aks-cluster-for-karpenter
create-aks-cluster-for-karpenter: ## Create test AKS cluster (with msi, cilium, oidc, and workload identity enabled)
az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) \
--location $(AZURE_LOCATION) --attach-acr $(AZURE_ACR_NAME) \
--kubernetes-version $(AKS_K8S_VERSION) --node-count 1 --generate-ssh-keys \
--location $(AZURE_LOCATION) --attach-acr $(AZURE_ACR_NAME) --node-vm-size "Standard_D2s_v3" \
--kubernetes-version $(AKS_K8S_VERSION) --node-count 3 --generate-ssh-keys \
--network-plugin azure --network-plugin-mode overlay --network-dataplane cilium \
--enable-managed-identity --enable-oidc-issuer --enable-workload-identity -o none
az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing


## --------------------------------------
## Image Docker Build
## --------------------------------------

BUILDX_BUILDER_NAME ?= img-builder
OUTPUT_TYPE ?= type=registry
QEMU_VERSION ?= 7.2.0-1
Expand All @@ -202,6 +186,15 @@ docker-build-kaito: docker-buildx
--pull \
--tag $(REGISTRY)/$(IMG_NAME):$(IMG_TAG) .

.PHONY: docker-build-adapter
docker-build-adapter: docker-buildx
docker buildx build \
--file ./docker/adapter/Dockerfile \
--output=$(OUTPUT_TYPE) \
--platform="linux/$(ARCH)" \
--pull \
--tag $(REGISTRY)/e2e-adapter:0.0.1 .

.PHONY: docker-build-dataset
docker-build-dataset: docker-buildx
docker buildx build \
Expand Down Expand Up @@ -230,54 +223,40 @@ az-patch-install-helm: ## Update Azure client env vars and settings in helm valu

yq -i '(.image.repository) = "$(REGISTRY)/workspace"' ./charts/kaito/workspace/values.yaml
yq -i '(.image.tag) = "$(IMG_TAG)"' ./charts/kaito/workspace/values.yaml
if [ $(TEST_SUITE) = "azkarpenter" ]; then \
yq -i '(.featureGates.Karpenter) = "true"' ./charts/kaito/workspace/values.yaml; \
fi
yq -i '(.clusterName) = "$(AZURE_CLUSTER_NAME)"' ./charts/kaito/workspace/values.yaml

helm install kaito-workspace ./charts/kaito/workspace --namespace $(KAITO_NAMESPACE) --create-namespace

generate-identities: ## Create identities for the provisioner component.
./hack/deploy/generate-identities.sh \
$(AZURE_CLUSTER_NAME) $(AZURE_RESOURCE_GROUP) $(TEST_SUITE) $(AZURE_SUBSCRIPTION_ID)

## --------------------------------------
## gpu-provider installation
## --------------------------------------
gpu-provisioner-identity-perm: ## Create identity for gpu-provisioner
az identity create --name $(GPU_PROVISIONER_MSI_NAME) --resource-group $(AZURE_RESOURCE_GROUP)

IDENTITY_PRINCIPAL_ID=$(shell az identity show --name $(GPU_PROVISIONER_MSI_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --subscription $(AZURE_SUBSCRIPTION_ID) --query 'principalId');\
az role assignment create --assignee $$IDENTITY_PRINCIPAL_ID --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP) --role "Contributor"

AKS_OIDC_ISSUER=$(shell az aks show -n "$(AZURE_CLUSTER_NAME)" -g "$(AZURE_RESOURCE_GROUP)" --subscription $(AZURE_SUBSCRIPTION_ID) --query "oidcIssuerProfile.issuerUrl");\
az identity federated-credential create --name gpu-federatecredential --identity-name $(GPU_PROVISIONER_MSI_NAME) --resource-group "$(AZURE_RESOURCE_GROUP)" --issuer $$AKS_OIDC_ISSUER \
--subject system:serviceaccount:"$(GPU_PROVISIONER_NAMESPACE):$(GPU_PROVISIONER_NAMESPACE)" --audience api://AzureADTokenExchange --subscription $(AZURE_SUBSCRIPTION_ID)

.PHONY: gpu-provisioner-helm
gpu-provisioner-helm: ## Update Azure client env vars and settings in helm values.yml
curl -sO https://raw.githubusercontent.com/Azure/gpu-provisioner/main/hack/deploy/configure-helm-values.sh
chmod +x ./configure-helm-values.sh && ./configure-helm-values.sh $(AZURE_CLUSTER_NAME) $(AZURE_RESOURCE_GROUP) $(GPU_PROVISIONER_MSI_NAME)
chmod +x ./configure-helm-values.sh && ./configure-helm-values.sh $(AZURE_CLUSTER_NAME) \
$(AZURE_RESOURCE_GROUP) $(GPU_PROVISIONER_MSI_NAME)

helm install $(GPU_PROVISIONER_NAMESPACE) --values gpu-provisioner-values.yaml --set settings.azure.clusterName=$(AZURE_CLUSTER_NAME) --wait \
helm install gpu-provisioner \
--values gpu-provisioner-values.yaml \
--set settings.azure.clusterName=$(AZURE_CLUSTER_NAME) \
https://github.com/Azure/gpu-provisioner/raw/gh-pages/charts/gpu-provisioner-$(GPU_PROVISIONER_VERSION).tgz

kubectl wait --for=condition=available deploy "gpu-provisioner" -n gpu-provisioner --timeout=300s
## --------------------------------------
## Azure Karpenter Installation
## --------------------------------------
karpenter-identity-perm:
az identity create --name $(AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME) --resource-group $(AZURE_RESOURCE_GROUP)

KARPENTER_USER_ASSIGNED_PRINCIPAL_ID=$(shell az identity show -n "$(AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME)" \
-g "$(AZURE_RESOURCE_GROUP)" --query 'principalId');\
az role assignment create --assignee $$KARPENTER_USER_ASSIGNED_PRINCIPAL_ID --scope "/subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP)" --role "Virtual Machine Contributor";\
az role assignment create --assignee $$KARPENTER_USER_ASSIGNED_PRINCIPAL_ID --scope "/subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP)" --role "Network Contributor";\
az role assignment create --assignee $$KARPENTER_USER_ASSIGNED_PRINCIPAL_ID --scope "/subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP)" --role "Managed Identity Operator"

AKS_OIDC_ISSUER=$(shell az aks show -n "$(AZURE_CLUSTER_NAME)" -g "$(AZURE_RESOURCE_GROUP)" --subscription $(AZURE_SUBSCRIPTION_ID) --query "oidcIssuerProfile.issuerUrl");\
az identity federated-credential create --name $(KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME) \
--identity-name $(AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME) \
--resource-group "$(AZURE_RESOURCE_GROUP)" --issuer $$AKS_OIDC_ISSUER \
--subject system:serviceaccount:"$(KARPENTER_NAMESPACE):$(KARPENTER_SERVICE_ACCOUNT_NAME)" \
--audience api://AzureADTokenExchange --subscription $(AZURE_SUBSCRIPTION_ID)

.PHONY: azure-karpenter-helm
azure-karpenter-helm: ## Update Azure client env vars and settings in helm values.yml
curl -sO https://raw.githubusercontent.com/Azure/karpenter-provider-azure/main/hack/deploy/configure-values.sh
chmod +x ./configure-values.sh && ./configure-values.sh $(AZURE_CLUSTER_NAME) $(AZURE_RESOURCE_GROUP) \
$(KARPENTER_SERVICE_ACCOUNT_NAME) $(AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME)
chmod +x ./configure-values.sh && ./configure-values.sh $(AZURE_CLUSTER_NAME) \
$(AZURE_RESOURCE_GROUP) $(KARPENTER_SA_NAME) $(AZURE_KARPENTER_MSI_NAME)

helm upgrade --install karpenter oci://mcr.microsoft.com/aks/karpenter/karpenter \
--version "$(KARPENTER_VERSION)" \
Expand All @@ -286,20 +265,9 @@ azure-karpenter-helm: ## Update Azure client env vars and settings in helm valu
--set controller.resources.requests.cpu=1 \
--set controller.resources.requests.memory=1Gi \
--set controller.resources.limits.cpu=1 \
--set controller.resources.limits.memory=1Gi \
--wait

kubectl logs -f -n "$(KARPENTER_NAMESPACE)" -l app.kubernetes.io/name=karpenter -c controller

##@ Development
.PHONY: manifests
manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
$(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases

.PHONY: generate
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..."
--set controller.resources.limits.memory=1Gi

kubectl wait --for=condition=available deploy "karpenter" -n karpenter --timeout=300s

##@ Build
.PHONY: build
Expand Down
6 changes: 6 additions & 0 deletions charts/kaito/workspace/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ rules:
- apiGroups: ["karpenter.sh"]
resources: ["machines", "machines/status", "nodeclaims", "nodeclaims/status"]
verbs: ["get","list","watch","create", "delete", "update", "patch"]
- apiGroups: [ "karpenter.azure.com" ]
resources: [ "aksnodeclasses"]
verbs: [ "get","list","watch","create", "delete", "update", "patch" ]
- apiGroups: [ "karpenter.k8s.aws" ]
resources: [ "ec2nodeclasses" ]
verbs: [ "get","list","watch","create", "delete", "update", "patch" ]
- apiGroups: ["admissionregistration.k8s.io"]
resources: ["validatingwebhookconfigurations"]
verbs: ["get","list","watch"]
Expand Down
2 changes: 2 additions & 0 deletions charts/kaito/workspace/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ spec:
value: {{ .Values.presetRegistryName }}
- name: CLOUD_PROVIDER
value: {{ .Values.cloudProviderName }}
- name: CLUSTER_NAME
value: {{ .Values.clusterName }}
ports:
- name: http-metrics
containerPort: 8080
Expand Down
4 changes: 0 additions & 4 deletions charts/kaito/workspace/templates/nvidia-device-plugin-ds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@ spec:
operator: NotIn
values:
- virtual-kubelet
- key: karpenter.sh/provisioner-name
operator: Exists
- key: kaito.sh/machine-type
operator: Exists
tolerations:
# Allow this pod to be rescheduled while the node is in "critical add-ons only" mode.
# This, along with the annotation above marks this pod as a critical add-on.
Expand Down
Loading

0 comments on commit e9e0004

Please sign in to comment.