Skip to content

Commit

Permalink
Add e2e test for nodeclaim
Browse files Browse the repository at this point in the history
Signed-off-by: Heba Elayoty <hebaelayoty@gmail.com>
  • Loading branch information
helayoty committed Aug 19, 2024
1 parent daec24b commit 2387b13
Show file tree
Hide file tree
Showing 20 changed files with 404 additions and 263 deletions.
75 changes: 44 additions & 31 deletions .github/workflows/e2e-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ on:
default: "eastus"
k8s_version:
type: string
default: "1.29.2"
secrets:
E2E_CLIENT_ID:
required: true
Expand All @@ -47,7 +46,8 @@ jobs:
environment: e2e-test
env:
GO_VERSION: "1.22"

KARPENTER_NAMESPACE: "karpenter"
GPU_PROVISIONER_NAMESPACE: "gpu-provisioner"
steps:
- name: Harden Runner
uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1
Expand Down Expand Up @@ -146,20 +146,40 @@ jobs:
- name: create cluster
shell: bash
run: |
make create-aks-cluster
if [ "${{ inputs.suite }}" == "gpuprovisioner" ]; then
make create-aks-cluster
else
make create-aks-cluster-for-karpenter
fi
env:
AZURE_ACR_NAME: ${{ env.CLUSTER_NAME }}
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
AZURE_LOCATION: ${{ inputs.region }}
AKS_K8S_VERSION: ${{ inputs.k8s_version }}

- name: Az login
uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a # v2.1.1
with:
client-id: ${{ secrets.E2E_CLIENT_ID }}
tenant-id: ${{ secrets.E2E_TENANT_ID }}
subscription-id: ${{ secrets.E2E_SUBSCRIPTION_ID }}

- name: Create Identities and Permissions for ${{ inputs.suite }}
shell: bash
run: |
make generate-identities
env:
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
TEST_SUITE: ${{ inputs.suite }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.E2E_SUBSCRIPTION_ID }}

- name: Install gpu-provisioner helm chart
if: ${{ inputs.suite == 'gpuprov' }}
if: ${{ inputs.suite == 'gpuprovisioner' }}
shell: bash
run: |
make gpu-provisioner-helm
kubectl wait --for=condition=available deploy "gpu-provisioner" -n gpu-provisioner --timeout=300s
env:
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
Expand All @@ -172,45 +192,28 @@ jobs:
shell: bash
run: |
make azure-karpenter-helm
# taint nodes as karpenter-system
kubectl taint nodes CriticalAddonsOnly=true:NoSchedule --all
kubectl wait --for=condition=available deploy "karpenter" -n karpenter --timeout=300s
env:
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
AZURE_TENANT_ID: ${{ secrets.E2E_TENANT_ID }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.E2E_SUBSCRIPTION_ID }}
KARPENTER_VERSION: ${{ vars.KARPENTER_VERSION }}
KARPENTER_NAMESPACE: ${{ env.KARPENTER_NAMESPACE }}

- uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a # v2.1.1
with:
client-id: ${{ secrets.E2E_CLIENT_ID }}
tenant-id: ${{ secrets.E2E_TENANT_ID }}
subscription-id: ${{ secrets.E2E_SUBSCRIPTION_ID }}

- name: Create Role Assignment
uses: azure/CLI@v2.0.0
with:
inlineScript: |
IDENTITY_PRINCIPAL_ID="$(az identity show --name ${{ inputs.suite }}Identity --resource-group ${{ env.CLUSTER_NAME }} --query 'principalId' -otsv)"
az role assignment create --assignee ${IDENTITY_PRINCIPAL_ID} --scope "/subscriptions/${{ secrets.E2E_SUBSCRIPTION_ID }}/resourceGroups/${{ env.CLUSTER_NAME }}" --role "Contributor"
if [ "${{ inputs.suite }}" == "azkarpenter" ]; then
fi
- name: Create Azure Federated Identity
uses: azure/CLI@v2.0.0
with:
inlineScript: |
AKS_OIDC_ISSUER="$(az aks show -n "${{ env.CLUSTER_NAME }}" -g "${{ env.CLUSTER_NAME }}" --query 'oidcIssuerProfile.issuerUrl' -otsv)"
if [ "${{ inputs.suite }}" == "gpuprov" ]; then
az identity federated-credential create --name ${{ inputs.suite }}-fed --identity-name ${{ inputs.suite }}Identity --resource-group "${{ env.CLUSTER_NAME }}" \
--issuer "${AKS_OIDC_ISSUER}" --subject system:serviceaccount:"gpu-provisioner:gpu-provisioner" --audience api://AzureADTokenExchange
fi
if [ "${{ inputs.suite }}" == "azkarpenter" ]; then
az identity federated-credential create --name ${{ inputs.suite }}-fed --identity-name ${{ inputs.suite }}Identity --resource-group "${{ env.CLUSTER_NAME }}" \
--issuer "${AKS_OIDC_ISSUER}" --subject system:serviceaccount:"karpenter:karpenter-sa" --audience api://AzureADTokenExchange
fi
- name: build KAITO image
if: ${{ !inputs.isRelease }}
shell: bash
run: |
make docker-build-kaito
env:
REGISTRY: ${{ env.REGISTRY }}
VERSION: ${{ env.VERSION }}

- name: Install KAITO Workspace helm chart
shell: bash
Expand All @@ -222,6 +225,7 @@ jobs:
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
REGISTRY: ${{ env.REGISTRY }}
VERSION: ${{ env.VERSION }}
TEST_SUITE: ${{ inputs.suite }}

# Retrieve E2E ACR credentials and create Kubernetes secret
- name: Set up E2E ACR Credentials and Secret
Expand Down Expand Up @@ -251,6 +255,14 @@ jobs:
--docker-username=${{ secrets.E2E_ACR_AMRT_USERNAME }} \
--docker-password=${{ secrets.E2E_ACR_AMRT_PASSWORD }}
- name: Log ${{ inputs.suite }}
run: |
if [ "${{ inputs.suite }}" == "gpuprovisioner" ]; then
kubectl logs -n "${{ env.GPU_PROVISIONER_NAMESPACE }}" -l app.kubernetes.io/name=gpu-provisioner -c controller
else
kubectl logs -n "${{ env.KARPENTER_NAMESPACE }}" -l app.kubernetes.io/name=karpenter -c controller
fi
- name: Log kaito-workspace
run: |
kubectl get pods -n kaito-workspace -o name | grep "^pod/kaito-workspace" | sed 's/^pod\///' | xargs -I {} kubectl logs -n kaito-workspace {}
Expand All @@ -264,6 +276,7 @@ jobs:
REGISTRY: ${{ env.REGISTRY }}
AI_MODELS_REGISTRY: ${{ secrets.E2E_ACR_AMRT_USERNAME }}.azurecr.io
AI_MODELS_REGISTRY_SECRET: ${{ secrets.E2E_AMRT_SECRET_NAME }}
TEST_SUITE: ${{ inputs.suite }}
E2E_ACR_REGISTRY: ${{ env.CLUSTER_NAME }}.azurecr.io
E2E_ACR_REGISTRY_SECRET: ${{ env.CLUSTER_NAME }}-acr-secret

Expand Down
7 changes: 6 additions & 1 deletion .github/workflows/kaito-e2e.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
name: pr-e2e-test

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

on:
pull_request:
paths-ignore: ['docs/**', '**.md', '**.mdx', '**.png', '**.jpg']
Expand All @@ -16,14 +20,15 @@ jobs:
strategy:
fail-fast: false
matrix:
suite: [ gpuprov, azkarpenter ]
suite: [ gpuprovisioner, azkarpenter ]
permissions:
contents: read
id-token: write
statuses: write
uses: ./.github/workflows/e2e-workflow.yml
with:
git_sha: ${{ github.event.pull_request.head.sha }}
k8s_version: ${{ vars.AKS_K8S_VERSION }}
suite: ${{ matrix.suite }}
secrets:
E2E_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/publish-gh-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ jobs:
git_sha: ${{ github.sha }}
isRelease: true
registry: ${{ needs.build-scan-publish-gh-images.outputs.registry_repository }}
k8s_version: ${{ vars.AKS_K8S_VERSION }}
tag: ${{ needs.check-tag.outputs.tag }}
secrets:
E2E_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/publish-mcr-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ jobs:
git_sha: ${{ github.sha }}
isRelease: true
registry: "mcr.microsoft.com/aks/kaito"
k8s_version: ${{ vars.AKS_K8S_VERSION }}
tag: ${{ github.event.client_payload.tag }}
secrets:
E2E_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
Expand Down
Loading

0 comments on commit 2387b13

Please sign in to comment.