Skip to content

Commit

Permalink
Add e2e test for nodeclaim
Browse files Browse the repository at this point in the history
Signed-off-by: Heba Elayoty <hebaelayoty@gmail.com>
  • Loading branch information
helayoty committed May 9, 2024
1 parent db5cc07 commit 744e44f
Show file tree
Hide file tree
Showing 9 changed files with 230 additions and 109 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/e2e-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ jobs:
AKS_K8S_VERSION: ${{ inputs.k8s_version }}

- name: Install gpu-provisioner helm chart
if: ${{ inputs.suite == 'gpuprov' }}
if: ${{ inputs.suite == 'gpuprovisioner' }}
shell: bash
run: |
make gpu-provisioner-helm
Expand Down Expand Up @@ -185,7 +185,7 @@ jobs:
inlineScript: |
AKS_OIDC_ISSUER="$(az aks show -n "${{ env.CLUSTER_NAME }}" -g "${{ env.CLUSTER_NAME }}" --query 'oidcIssuerProfile.issuerUrl' -otsv)"
if [ "${{ inputs.suite }}" == "gpuprov" ]; then
if [ "${{ inputs.suite }}" == "gpuprovisioner" ]; then
az identity federated-credential create --name ${{ inputs.suite }}-fed --identity-name ${{ inputs.suite }}Identity --resource-group "${{ env.CLUSTER_NAME }}" \
--issuer "${AKS_OIDC_ISSUER}" --subject system:serviceaccount:"gpu-provisioner:gpu-provisioner" --audience api://AzureADTokenExchange
fi
Expand Down Expand Up @@ -224,6 +224,7 @@ jobs:
RUN_LLAMA_13B: ${{ env.RUN_LLAMA_13B }}
AI_MODELS_REGISTRY: ${{ secrets.E2E_ACR_AMRT_USERNAME }}.azurecr.io
AI_MODELS_REGISTRY_SECRET: ${{ secrets.E2E_AMRT_SECRET_NAME }}
TEST_SUITE: ${{ inputs.suite }}

- name: Cleanup e2e resources
if: ${{ always() }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/kaito-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
strategy:
fail-fast: false
matrix:
suite: [ gpuprov, azkarpenter ]
suite: [ gpuprovisioner, azkarpenter ]
permissions:
contents: read
id-token: write
Expand Down
25 changes: 6 additions & 19 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@ GOLANGCI_LINT_BIN := golangci-lint
GOLANGCI_LINT := $(abspath $(TOOLS_BIN_DIR)/$(GOLANGCI_LINT_BIN)-$(GOLANGCI_LINT_VER))

E2E_TEST_BIN := e2e.test
KARPENTER_E2E_TEST_BIN := karpenter-e2e.test
E2E_TEST := $(BIN_DIR)/$(E2E_TEST_BIN)
KARPENTER_E2E_TEST := $(BIN_DIR)/$(KARPENTER_E2E_TEST_BIN)

GINKGO_VER := v2.17.1
GINKGO_BIN := ginkgo
Expand All @@ -33,14 +31,14 @@ AZURE_CLUSTER_NAME ?= kaito-demo
AZURE_RESOURCE_GROUP_MC=MC_$(AZURE_RESOURCE_GROUP)_$(AZURE_CLUSTER_NAME)_$(AZURE_LOCATION)
GPU_PROVISIONER_NAMESPACE ?= gpu-provisioner
KAITO_NAMESPACE ?= kaito-workspace
GPU_PROVISIONER_MSI_NAME ?= gpuIdentity
GPU_PROVISIONER_MSI_NAME ?= gpuprovisionerIdentity

## Karpenter parameters
## Azure Karpenter parameters
KARPENTER_NAMESPACE ?= karpenter
KARPENTER_SERVICE_ACCOUNT_NAME ?= karpenter-sa
KARPENTER_VERSION ?= 0.4.0
AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME ?= karpenterIdentity
KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME ?= karpenter-fed
AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME ?= azkarpenterIdentity
KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME ?= azkarpenter-fed

RUN_LLAMA_13B ?= false
AI_MODELS_REGISTRY ?= modelregistry.azurecr.io
Expand Down Expand Up @@ -97,18 +95,10 @@ $(E2E_TEST):
.PHONY: kaito-workspace-e2e-test
kaito-workspace-e2e-test: $(E2E_TEST) $(GINKGO)
AI_MODELS_REGISTRY_SECRET=$(AI_MODELS_REGISTRY_SECRET) RUN_LLAMA_13B=$(RUN_LLAMA_13B) \
AI_MODELS_REGISTRY=$(AI_MODELS_REGISTRY) GPU_NAMESPACE=$(GPU_PROVISIONER_NAMESPACE) KAITO_NAMESPACE=$(KAITO_NAMESPACE) \
AI_MODELS_REGISTRY=$(AI_MODELS_REGISTRY) GPU_NAMESPACE=$(GPU_PROVISIONER_NAMESPACE) \
KARPENTER_NAMESPACE=$(KARPENTER_NAMESPACE) KAITO_NAMESPACE=$(KAITO_NAMESPACE) TEST_SUITE=$(TEST_SUITE)\
$(GINKGO) -v -trace $(GINKGO_ARGS) $(E2E_TEST)

$(KARPENTER_E2E_TEST):
(cd test/e2e/karpenter && go test -c . -o $(KARPENTER_E2E_TEST))

.PHONY: kaito-karpenter-e2e-test
kaito-karpenter-e2e-test: $(E2E_TEST) $(GINKGO)
AI_MODELS_REGISTRY_SECRET=$(AI_MODELS_REGISTRY_SECRET) RUN_LLAMA_13B=$(RUN_LLAMA_13B) \
AI_MODELS_REGISTRY=$(AI_MODELS_REGISTRY) KARPENTER=$(KARPENTER_NAMESPACE) KAITO_NAMESPACE=$(KAITO_NAMESPACE) \
$(GINKGO) -v -trace $(GINKGO_ARGS) $(KARPENTER_E2E_TEST)

## --------------------------------------
## Azure resources
## --------------------------------------
Expand Down Expand Up @@ -147,11 +137,9 @@ create-aks-cluster-for-karpenter: ## Create test AKS cluster (with msi, cilium,
--enable-managed-identity --enable-oidc-issuer --enable-workload-identity -o none
az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing


## --------------------------------------
## Image Docker Build
## --------------------------------------

BUILDX_BUILDER_NAME ?= img-builder
OUTPUT_TYPE ?= type=registry
QEMU_VERSION ?= 5.2.0-2
Expand Down Expand Up @@ -263,7 +251,6 @@ manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and Cust
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..."


##@ Build
.PHONY: build
build: manifests generate fmt vet ## Build manager binary.
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/cluster.go → test/e2e/cluster/cluster.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

package e2e
package cluster

import (
"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
Expand Down
60 changes: 42 additions & 18 deletions test/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"os"
"testing"

"github.com/azure/kaito/test/e2e/cluster"
"github.com/azure/kaito/test/e2e/utils"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
Expand All @@ -21,28 +22,51 @@ import (

var (
ctx = context.Background()
namespaceName = fmt.Sprint(E2eNamespace, rand.Intn(100))
namespaceName = fmt.Sprint(cluster.E2eNamespace, rand.Intn(100))
suiteTestName = os.Getenv("TEST_SUITE")
)

var _ = SynchronizedBeforeSuite(func() []byte {
GetClusterClient(TestingCluster)
gpuNamespace := os.Getenv("GPU_NAMESPACE")
cluster.GetClusterClient(cluster.TestingCluster)
kaitoNamespace := os.Getenv("KAITO_NAMESPACE")

//check gpu-provisioner deployment is up and running
gpuProvisionerDeployment := &v1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "gpu-provisioner",
Namespace: gpuNamespace,
},
if suiteTestName == "karpenter" {
karpenterNamespace := os.Getenv("KARPENTER_NAMESPACE")
//check karpenter deployment is up and running
karpenterDeployment := &v1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "karpenter",
Namespace: karpenterNamespace,
},
}

Eventually(func() error {
return cluster.TestingCluster.KubeClient.Get(ctx, client.ObjectKey{
Namespace: karpenterDeployment.Namespace,
Name: karpenterDeployment.Name,
}, karpenterDeployment, &client.GetOptions{})
}, utils.PollTimeout, utils.PollInterval).
Should(Succeed(), "Failed to wait for karpenter deployment")
}

Eventually(func() error {
return TestingCluster.KubeClient.Get(ctx, client.ObjectKey{
Namespace: gpuProvisionerDeployment.Namespace,
Name: gpuProvisionerDeployment.Name,
}, gpuProvisionerDeployment, &client.GetOptions{})
}, utils.PollTimeout, utils.PollInterval).Should(Succeed(), "Failed to wait for gpu-provisioner deployment")
if suiteTestName == "gpuprovisioner" {
gpuNamespace := os.Getenv("GPU_NAMESPACE")
//check gpu-provisioner deployment is up and running
gpuProvisionerDeployment := &v1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "gpu-provisioner",
Namespace: gpuNamespace,
},
}

Eventually(func() error {
return cluster.TestingCluster.KubeClient.Get(ctx, client.ObjectKey{
Namespace: gpuProvisionerDeployment.Namespace,
Name: gpuProvisionerDeployment.Name,
}, gpuProvisionerDeployment, &client.GetOptions{})
}, utils.PollTimeout, utils.PollInterval).
Should(Succeed(), "Failed to wait for gpu-provisioner deployment")
}

//check kaito-workspace deployment is up and running
kaitoWorkspaceDeployment := &v1.Deployment{
Expand All @@ -53,14 +77,14 @@ var _ = SynchronizedBeforeSuite(func() []byte {
}

Eventually(func() error {
return TestingCluster.KubeClient.Get(ctx, client.ObjectKey{
return cluster.TestingCluster.KubeClient.Get(ctx, client.ObjectKey{
Namespace: kaitoWorkspaceDeployment.Namespace,
Name: kaitoWorkspaceDeployment.Name,
}, kaitoWorkspaceDeployment, &client.GetOptions{})
}, utils.PollTimeout, utils.PollInterval).Should(Succeed(), "Failed to wait for kaito-workspace deployment")

// create testing namespace
err := TestingCluster.KubeClient.Create(context.TODO(), &corev1.Namespace{
err := cluster.TestingCluster.KubeClient.Create(context.TODO(), &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: namespaceName,
},
Expand All @@ -73,7 +97,7 @@ var _ = SynchronizedBeforeSuite(func() []byte {
var _ = SynchronizedAfterSuite(func() {
// delete testing namespace
Eventually(func() error {
return TestingCluster.KubeClient.Delete(ctx, &corev1.Namespace{
return cluster.TestingCluster.KubeClient.Delete(ctx, &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: namespaceName,
},
Expand Down
Loading

0 comments on commit 744e44f

Please sign in to comment.