-
Notifications
You must be signed in to change notification settings - Fork 59
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Bangqi Zhu <bangqizhu@microsoft.com>
- Loading branch information
Bangqi Zhu
committed
Jan 16, 2025
1 parent
3755bab
commit ae39bf6
Showing
4 changed files
with
350 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,285 @@ | ||
name: ragengine-e2e-workflow | ||
|
||
on: | ||
workflow_call: | ||
inputs: | ||
git_sha: | ||
type: string | ||
required: true | ||
node_provisioner: | ||
type: string | ||
required: false | ||
default: gpuprovisioner | ||
tag: | ||
type: string | ||
isRelease: | ||
type: boolean | ||
default: false | ||
registry: | ||
type: string | ||
region: | ||
type: string | ||
description: "the azure location to run the e2e test in" | ||
default: "eastus" | ||
k8s_version: | ||
type: string | ||
default: "1.30.0" | ||
|
||
jobs: | ||
e2e-tests: | ||
runs-on: [ "self-hosted", "hostname:kaito-e2e-github-runner" ] | ||
name: e2e-tests-${{ inputs.node_provisioner }} | ||
permissions: | ||
contents: read | ||
id-token: write # This is required for requesting the JWT | ||
environment: e2e-test | ||
env: | ||
GO_VERSION: "1.22" | ||
KARPENTER_NAMESPACE: "karpenter" | ||
GPU_PROVISIONER_NAMESPACE: "gpu-provisioner" | ||
|
||
steps: | ||
- name: Harden Runner | ||
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2 | ||
with: | ||
egress-policy: audit | ||
|
||
- name: Checkout | ||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | ||
with: | ||
ref: ${{ inputs.git_sha }} | ||
|
||
- name: Set e2e Resource and Cluster Name | ||
run: | | ||
rand=$(git rev-parse --short ${{ inputs.git_sha }}) | ||
if [ "$rand" = "" ]; then | ||
rand=$RANDOM | ||
fi | ||
echo "VERSION=${rand}" >> $GITHUB_ENV | ||
echo "CLUSTER_NAME=${{ inputs.node_provisioner }}${rand}rag" >> $GITHUB_ENV | ||
echo "REGISTRY=${{ inputs.node_provisioner }}${rand}rag.azurecr.io" >> $GITHUB_ENV | ||
echo "RUN_LLAMA_13B=false" >> $GITHUB_ENV | ||
- name: Set Registry | ||
if: ${{ inputs.isRelease }} | ||
run: | | ||
echo "REGISTRY=${{ inputs.registry }}" >> $GITHUB_ENV | ||
echo "VERSION=$(echo ${{ inputs.tag }} | tr -d v)" >> $GITHUB_ENV | ||
- name: Remove existing Go modules directory | ||
run: sudo rm -rf ~/go/pkg/mod | ||
|
||
- name: Set up Go ${{ env.GO_VERSION }} | ||
uses: actions/setup-go@v5.2.0 | ||
with: | ||
go-version: ${{ env.GO_VERSION }} | ||
|
||
- name: Install Azure CLI latest | ||
run: | | ||
if ! which az > /dev/null; then | ||
echo "Azure CLI not found. Installing..." | ||
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash | ||
else | ||
echo "Azure CLI already installed." | ||
fi | ||
- name: Azure CLI Login | ||
run: | | ||
az login --identity | ||
- uses: azure/setup-helm@v4 | ||
id: install | ||
|
||
- name: Create Resource Group | ||
shell: bash | ||
run: | | ||
make create-rg | ||
env: | ||
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }} | ||
|
||
- name: Create ACR | ||
shell: bash | ||
run: | | ||
make create-acr | ||
env: | ||
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }} | ||
AZURE_ACR_NAME: ${{ env.CLUSTER_NAME }} | ||
|
||
- name: Create Azure Identity | ||
uses: azure/CLI@v2.1.0 | ||
with: | ||
inlineScript: | | ||
az identity create --name ${{ inputs.node_provisioner }}Identity --resource-group ${{ env.CLUSTER_NAME }} | ||
- name: Generate APIs | ||
run: | | ||
make generate | ||
- name: build KAITO image | ||
if: ${{ !inputs.isRelease }} | ||
shell: bash | ||
run: | | ||
make docker-build-workspace | ||
env: | ||
REGISTRY: ${{ env.REGISTRY }} | ||
VERSION: ${{ env.VERSION }} | ||
|
||
- name: build kaito RAG Engine image | ||
if: ${{ !inputs.isRelease }} | ||
shell: bash | ||
run: | | ||
make docker-build-ragengine | ||
env: | ||
REGISTRY: ${{ env.REGISTRY }} | ||
VERSION: ${{ env.VERSION }} | ||
|
||
|
||
|
||
- name: create cluster | ||
shell: bash | ||
run: | | ||
if [ "${{ inputs.node_provisioner }}" == "gpuprovisioner" ]; then | ||
make create-aks-cluster | ||
else | ||
make create-aks-cluster-for-karpenter | ||
fi | ||
env: | ||
AZURE_ACR_NAME: ${{ env.CLUSTER_NAME }} | ||
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }} | ||
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }} | ||
AZURE_LOCATION: ${{ inputs.region }} | ||
AKS_K8S_VERSION: ${{ inputs.k8s_version }} | ||
|
||
- name: Create Identities and Permissions for ${{ inputs.node_provisioner }} | ||
shell: bash | ||
run: | | ||
AZURE_SUBSCRIPTION_ID=$E2E_SUBSCRIPTION_ID \ | ||
make generate-identities | ||
env: | ||
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }} | ||
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }} | ||
TEST_SUITE: ${{ inputs.node_provisioner }} | ||
|
||
- name: Install gpu-provisioner helm chart | ||
if: ${{ inputs.node_provisioner == 'gpuprovisioner' }} | ||
shell: bash | ||
run: | | ||
AZURE_TENANT_ID=$E2E_TENANT_ID \ | ||
AZURE_SUBSCRIPTION_ID=$E2E_SUBSCRIPTION_ID \ | ||
make gpu-provisioner-helm | ||
env: | ||
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }} | ||
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }} | ||
|
||
- name: Install karpenter Azure provider helm chart | ||
if: ${{ inputs.node_provisioner == 'azkarpenter' }} | ||
shell: bash | ||
run: | | ||
AZURE_TENANT_ID=$E2E_TENANT_ID \ | ||
AZURE_SUBSCRIPTION_ID=$E2E_SUBSCRIPTION_ID \ | ||
make azure-karpenter-helm | ||
env: | ||
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }} | ||
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }} | ||
KARPENTER_VERSION: ${{ vars.KARPENTER_VERSION }} | ||
KARPENTER_NAMESPACE: ${{ env.KARPENTER_NAMESPACE }} | ||
|
||
# This 600s is only for testing when done, change it back to 300 | ||
- name: Install KAITO Workspace helm chart | ||
shell: bash | ||
run: | | ||
make az-patch-install-helm | ||
kubectl wait --for=condition=available deploy "kaito-workspace" -n kaito-workspace --timeout=600s | ||
env: | ||
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }} | ||
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }} | ||
REGISTRY: ${{ env.REGISTRY }} | ||
VERSION: ${{ env.VERSION }} | ||
TEST_SUITE: ${{ inputs.node_provisioner }} | ||
|
||
- name: Install KAITO RAG Engine helm chart | ||
shell: bash | ||
run: | | ||
make az-patch-install-ragengine-helm | ||
kubectl wait --for=condition=available deploy "kaito-ragengine" -n kaito-ragengine --timeout=300s | ||
env: | ||
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }} | ||
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }} | ||
REGISTRY: ${{ env.REGISTRY }} | ||
VERSION: ${{ env.VERSION }} | ||
TEST_SUITE: ${{ inputs.node_provisioner }} | ||
|
||
# Retrieve E2E ACR credentials and create Kubernetes secret | ||
- name: Set up E2E ACR Credentials and Secret | ||
shell: bash | ||
run: | | ||
# Retrieve the ACR username and password | ||
ACR_USERNAME=$(az acr credential show --name ${{ env.CLUSTER_NAME }} --resource-group ${{ env.CLUSTER_NAME }} --query "username" -o tsv) | ||
ACR_PASSWORD=$(az acr credential show --name ${{ env.CLUSTER_NAME }} --resource-group ${{ env.CLUSTER_NAME }} --query "passwords[0].value" -o tsv) | ||
# Ensure credentials were retrieved successfully | ||
if [ -z "$ACR_USERNAME" ] || [ -z "$ACR_PASSWORD" ]; then | ||
echo "Failed to retrieve ACR credentials" | ||
exit 1 | ||
fi | ||
# Create the Kubernetes secret with the retrieved credentials | ||
kubectl create secret docker-registry ${{ env.CLUSTER_NAME }}-acr-secret \ | ||
--docker-server=${{ env.CLUSTER_NAME }}.azurecr.io \ | ||
--docker-username=${ACR_USERNAME} \ | ||
--docker-password=${ACR_PASSWORD} | ||
# Add Private-Hosted ACR secret for private models like llama | ||
- name: Add Private-Hosted ACR Secret Credentials | ||
run: | | ||
# Ensure E2E_AMRT_SECRET_NAME is sanitized to remove any accidental quotes | ||
E2E_AMRT_SECRET_NAME=$(echo "$E2E_AMRT_SECRET_NAME" | sed 's/[\"'\'']//g') | ||
if kubectl get secret "$E2E_AMRT_SECRET_NAME" >/dev/null 2>&1; then | ||
echo "Secret $E2E_AMRT_SECRET_NAME already exists. Skipping creation." | ||
else | ||
kubectl create secret docker-registry "$E2E_AMRT_SECRET_NAME" \ | ||
--docker-server="$E2E_ACR_AMRT_USERNAME.azurecr.io" \ | ||
--docker-username="$E2E_ACR_AMRT_USERNAME" \ | ||
--docker-password="$E2E_ACR_AMRT_PASSWORD" | ||
echo "Secret $E2E_AMRT_SECRET_NAME created successfully." | ||
fi | ||
- name: Log ${{ inputs.node_provisioner }} | ||
run: | | ||
if [ "${{ inputs.node_provisioner }}" == "gpuprovisioner" ]; then | ||
kubectl logs -n "${{ env.GPU_PROVISIONER_NAMESPACE }}" -l app.kubernetes.io/name=gpu-provisioner -c controller | ||
else | ||
kubectl logs -n "${{ env.KARPENTER_NAMESPACE }}" -l app.kubernetes.io/name=karpenter -c controller | ||
fi | ||
- name: Log kaito-workspace | ||
run: | | ||
kubectl get pods -n kaito-workspace -o name | grep "^pod/kaito-workspace" | sed 's/^pod\///' | xargs -I {} kubectl logs -n kaito-workspace {} | ||
- name: Log kaito-ragengine | ||
run: | | ||
kubectl get pods -n kaito-ragengine -o name | grep "^pod/kaito-ragengine" | sed 's/^pod\///' | xargs -I {} kubectl logs -n kaito-ragengine {} | ||
- name: Run e2e test | ||
run: | | ||
AI_MODELS_REGISTRY=$E2E_ACR_AMRT_USERNAME.azurecr.io \ | ||
AI_MODELS_REGISTRY_SECRET=$E2E_AMRT_SECRET_NAME \ | ||
make kaito-ragengine-e2e-test | ||
env: | ||
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }} | ||
RUN_LLAMA_13B: ${{ env.RUN_LLAMA_13B }} | ||
REGISTRY: ${{ env.REGISTRY }} | ||
TEST_SUITE: ${{ inputs.node_provisioner }} | ||
E2E_ACR_REGISTRY: ${{ env.CLUSTER_NAME }}.azurecr.io | ||
E2E_ACR_REGISTRY_SECRET: ${{ env.CLUSTER_NAME }}-acr-secret | ||
|
||
- name: Cleanup e2e resources | ||
if: ${{ always() }} | ||
uses: azure/CLI@v2.1.0 | ||
with: | ||
inlineScript: | | ||
set +e | ||
az group delete --name "${{ env.CLUSTER_NAME }}" --yes --no-wait || true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
name: ragengine-e2e-test | ||
|
||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | ||
cancel-in-progress: true | ||
|
||
on: | ||
pull_request: | ||
paths-ignore: ['docs/**', '**.md', '**.mdx', '**.png', '**.jpg'] | ||
|
||
env: | ||
GO_VERSION: "1.22" | ||
|
||
permissions: | ||
id-token: write # This is required for requesting the JWT | ||
contents: read # This is required for actions/checkout | ||
|
||
jobs: | ||
run-e2e: | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
node-provisioner: [gpuprovisioner] # WIP: azkarpenter] | ||
permissions: | ||
contents: read | ||
id-token: write | ||
statuses: write | ||
uses: ./.github/workflows/ragengine-e2e-workflow.yml | ||
with: | ||
git_sha: ${{ github.event.pull_request.head.sha }} | ||
node_provisioner: ${{ matrix.node-provisioner }} |
Oops, something went wrong.