Merge branch 'main' into Ishaan/abstract-stores

kaito-project · Dec 11, 2024 · 1e3430a · 1e3430a
2 parents a090f87 + c3be988
commit 1e3430a
Show file tree

Hide file tree

Showing 230 changed files with 4,827 additions and 940 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -1 +0,0 @@
-presets/test/** linguist-vendored

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -42,13 +42,13 @@ updates:
       interval: daily
 
   - package-ecosystem: pip
-    directory: /presets/inference/text-generation
+    directory: /presets/workspace/inference/text-generation
     schedule:
       interval: daily
     open-pull-requests-limit: 0
 
   - package-ecosystem: pip
-    directory: /presets/tuning/tfs
+    directory: /presets/workspace/tuning/tfs
     schedule:
       interval: daily
     open-pull-requests-limit: 0
diff --git a/.github/e2e-preset-configs.json b/.github/e2e-preset-configs.json
@@ -4,7 +4,7 @@
       {
         "name": "falcon-7b",
         "node-count": 1,
-        "node-vm-size": "Standard_NC12s_v3",
+        "node-vm-size": "Standard_NC6s_v3",
         "node-osdisk-size": 100,
         "OSS": true,
         "loads_adapter": false
@@ -21,39 +21,39 @@
       {
         "name": "falcon-7b-instruct",
         "node-count": 1,
-        "node-vm-size": "Standard_NC12s_v3",
+        "node-vm-size": "Standard_NC6s_v3",
         "node-osdisk-size": 100,
         "OSS": true,
         "loads_adapter": false
       },
       {
         "name": "falcon-40b",
         "node-count": 1,
-        "node-vm-size": "Standard_NC24s_v3",
+        "node-vm-size": "Standard_NC48ads_A100_v4",
         "node-osdisk-size": 400,
         "OSS": true,
         "loads_adapter": false
       },
       {
         "name": "falcon-40b-instruct",
         "node-count": 1,
-        "node-vm-size": "Standard_NC24s_v3",
+        "node-vm-size": "Standard_NC48ads_A100_v4",
         "node-osdisk-size": 400,
         "OSS": true,
         "loads_adapter": false
       },
       {
         "name": "mistral-7b",
         "node-count": 1,
-        "node-vm-size": "Standard_NC12s_v3",
+        "node-vm-size": "Standard_NC6s_v3",
         "node-osdisk-size": 100,
         "OSS": true,
         "loads_adapter": false
       },
       {
         "name": "mistral-7b-instruct",
         "node-count": 1,
-        "node-vm-size": "Standard_NC12s_v3",
+        "node-vm-size": "Standard_NC6s_v3",
         "node-osdisk-size": 100,
         "OSS": true,
         "loads_adapter": false
@@ -129,6 +129,14 @@
         "node-osdisk-size": 150,
         "OSS": false,
         "loads_adapter": false
+      },
+      {
+        "name": "tuning",
+        "node-count": 1,
+        "node-vm-size": "Standard_NC6s_v3",
+        "node-osdisk-size": 100,
+        "OSS": true,
+        "loads_adapter": false
       }
     ]
   }

diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
@@ -32,7 +32,7 @@ jobs:
           submodules: true
           fetch-depth: 0
 
-      - uses: actions/setup-go@v5.0.2
+      - uses: actions/setup-go@v5.1.0
         with:
           go-version-file: 'go.mod'
 

diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml
@@ -21,7 +21,7 @@ jobs:
           egress-policy: audit
 
       - name: Set up Go ${{ env.GO_VERSION }}
-        uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2
+        uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed # v5.1.0
         with:
           go-version: ${{ env.GO_VERSION  }}
 

diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml
@@ -24,4 +24,4 @@ jobs:
       - name: 'Checkout Repository'
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
       - name: 'Dependency Review'
-        uses: actions/dependency-review-action@5a2ce3f5b92ee19cbb1541a4984c76d921601d7c # v4.3.4
+        uses: actions/dependency-review-action@3b139cfc5fae8b618d3eae3675e383bb1769c019 # v4.5.0
diff --git a/.github/workflows/e2e-preset-test.yml b/.github/workflows/e2e-preset-test.yml
@@ -15,12 +15,17 @@ on:
                 type: boolean
                 default: false
                 description: "Test all Phi models for E2E"
+            test-on-vllm:
+                type: boolean
+                default: false
+                description: "Test on VLLM runtime"
 
 env:
     GO_VERSION: "1.22"
     BRANCH_NAME: ${{ github.head_ref || github.ref_name}} 
     FORCE_RUN_ALL: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all == 'true' }}
     FORCE_RUN_ALL_PHI:  ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all-phi-models== 'true' }}
+    RUNTIME: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.test-on-vllm == 'true') && 'vllm' || 'hf' }}
 
 permissions:
     id-token: write
@@ -209,7 +214,7 @@ jobs:
             fi
 
       - name: Create Service
-        run: kubectl apply -f presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-service.yaml
+        run: kubectl apply -f presets/workspace/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-service.yaml
 
       - name: Retrieve External Service IP
         id: get_ip
@@ -229,10 +234,11 @@ jobs:
       
       - name: Replace IP and Deploy Resource to K8s
         run: |
-            sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
-            sed -i "s/TAG_HERE/${{ matrix.model.tag }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
-            sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
-            kubectl apply -f presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
+            POSTFIX=$(echo "${{ matrix.model.name }}" | grep -q "llama" && echo "" || echo "_${{ env.RUNTIME }}")
+            sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" presets/workspace/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}$POSTFIX.yaml
+            sed -i "s/TAG_HERE/${{ matrix.model.tag }}/g" presets/workspace/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}$POSTFIX.yaml
+            sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" presets/workspace/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}$POSTFIX.yaml
+            kubectl apply -f presets/workspace/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}$POSTFIX.yaml
 
       - name: Wait for Resource to be ready
         run: |
@@ -243,20 +249,23 @@ jobs:
         run: |
             POD_NAME=$(kubectl get pods -l app=${{ matrix.model.name }} -o jsonpath="{.items[0].metadata.name}")
             kubectl logs $POD_NAME | grep "Adapter added:" | grep "${{ matrix.model.expected_adapter }}" || (echo "Adapter not loaded or incorrect adapter loaded" && exit 1)
-          
-      - name: Test home endpoint
+
+      - name: Install testing commands
         run: |
-            curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/
+            kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- apt-get update
+            kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- apt-get install -y curl
 
       - name: Test healthz endpoint
         run: |
-            curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/healthz
-    
+            kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- \
+            curl -s http://localhost:5000/health
+
       - name: Test inference endpoint
         run: |
+            echo "Testing inference for ${{ matrix.model.name }}"
             if [[ "${{ matrix.model.name }}" == *"llama"* && "${{ matrix.model.name }}" == *"-chat"* ]]; then
-                echo "Testing inference for ${{ matrix.model.name }}"
-                curl -X POST \
+                kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- \
+                curl -s -X POST \
                 -H "Content-Type: application/json" \
                 -d '{
                     "input_data": {
@@ -274,10 +283,10 @@ jobs:
                         ]
                     }
                 }' \
-                http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/chat
+                http://localhost:5000/chat
             elif [[ "${{ matrix.model.name }}" == *"llama"* ]]; then
-                echo "Testing inference for ${{ matrix.model.name }}"
-                curl -X POST \
+                kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- \
+                curl -s -X POST \
                 -H "Content-Type: application/json" \
                 -d '{
                     "prompts": [
@@ -290,10 +299,29 @@ jobs:
                         "max_gen_len": 128
                     }
                 }' \
-                http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/generate
+                http://localhost:5000/generate
+            elif [[ "${{ env.RUNTIME }}" == *"vllm"* ]]; then
+                kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- \
+                curl -s -X POST \
+                -H "accept: application/json" \
+                -H "Content-Type: application/json" \
+                -d '{
+                    "model": "test",
+                    "messages": [
+                    {
+                        "role": "system",
+                        "content": "You are a helpful assistant."
+                    },
+                    {
+                        "role": "user",
+                        "content": "Hello!"
+                    }
+                    ]
+                    }' \
+                http://localhost:5000/v1/chat/completions
             else
-                echo "Testing inference for ${{ matrix.model.name }}"
-                curl -X POST \
+                kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- \
+                curl -s -X POST \
                 -H "accept: application/json" \
                 -H "Content-Type: application/json" \
                 -d '{
@@ -327,7 +355,7 @@ jobs:
                             "remove_invalid_values":null
                         }
                     }' \
-                http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/chat                
+                http://localhost:5000/chat
             fi
       
       - name: Cleanup
@@ -340,6 +368,7 @@ jobs:
                 
                 # Check and Delete K8s Resource (Deployment or StatefulSet)
                 if kubectl get $RESOURCE_TYPE ${{ matrix.model.name }} > /dev/null 2>&1; then
+                    kubectl logs $RESOURCE_TYPE/${{ matrix.model.name }}
                     kubectl delete $RESOURCE_TYPE ${{ matrix.model.name }}
                 fi
             fi
@@ -364,4 +393,3 @@ jobs:
                     --resource-group llm-test
                 fi
             fi
-          
diff --git a/.github/workflows/e2e-preset-tuning-test.yml b/.github/workflows/e2e-preset-tuning-test.yml
@@ -0,0 +1,136 @@
+name: E2E Preset tuning Test
+
+on:
+    workflow_run:
+        workflows: ["Build and Push Preset Models"]
+        types:
+            - completed
+    workflow_dispatch: {}
+
+env:
+    GO_VERSION: "1.22"
+
+permissions:
+    id-token: write
+    contents: read
+
+jobs:
+  e2e-preset-tuning-tests:
+    if: github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success'
+    runs-on: ubuntu-latest
+    environment: preset-env
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4.2.2
+        with:
+            submodules: true
+            fetch-depth: 0
+
+      - name: 'Az CLI login'
+        uses: azure/login@v2.2.0
+        with:
+            client-id: ${{ secrets.AZURE_CLIENT_ID }}
+            tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+            allow-no-subscriptions: true
+
+      - name: 'Set ACR Subscription'
+        run: az account set --subscription ${{secrets.AZURE_SUBSCRIPTION_ID}}
+
+      - name: Set up kubectl context
+        run: |
+          az aks get-credentials --resource-group llm-test --name GitRunner
+
+      - name: Get test meta
+        id: get_test_meta
+        run: |
+            CONFIG=$(jq -c '.matrix.image[] | select(.name == "tuning")' .github/e2e-preset-configs.json)
+
+            echo "TAG=0.0.7" >> $GITHUB_OUTPUT
+            for row in $(echo "${CONFIG}" | jq -r 'to_entries|map("\(.key)=\(.value|tostring)")|.[]'); do
+                echo "${row}" >> $GITHUB_OUTPUT
+            done
+
+      - name: Create Nodepool
+        run: |
+            NODEPOOL_EXIST=$(az aks nodepool show \
+                            --name ${{ steps.get_test_meta.outputs.name }} \
+                            --cluster-name GitRunner \
+                            --resource-group llm-test \
+                            --query 'name' -o tsv || echo "")
+            echo "NODEPOOL_EXIST: $NODEPOOL_EXIST"
+            if [ -z "$NODEPOOL_EXIST" ]; then
+                az aks nodepool add \
+                    --name ${{ steps.get_test_meta.outputs.name }} \
+                    --cluster-name GitRunner \
+                    --resource-group llm-test \
+                    --node-count ${{ steps.get_test_meta.outputs.node-count }} \
+                    --node-vm-size ${{ steps.get_test_meta.outputs.node-vm-size }} \
+                    --node-osdisk-size ${{ steps.get_test_meta.outputs.node-osdisk-size }} \
+                    --labels pool=${{ steps.get_test_meta.outputs.name }} \
+                    --node-taints sku=gpu:NoSchedule \
+                    --aks-custom-headers UseGPUDedicatedVHD=true
+            else
+                NODEPOOL_STATE=$(az aks nodepool show \
+                                --name ${{ steps.get_test_meta.outputs.name }} \
+                                --cluster-name GitRunner \
+                                --resource-group llm-test \
+                                --query 'provisioningState' -o tsv)
+                echo "NODEPOOL_STATE: $NODEPOOL_STATE"
+                if [ "$NODEPOOL_STATE" != "Succeeded" ]; then
+                    echo "Nodepool exists but is not in a Succeeded state. Please check manually."
+                    exit 1
+                else
+                    echo "Nodepool already exists and is in a running state."
+                fi
+            fi
+
+      - name: Replace repo and Deploy Resource to K8s
+        run: |
+            sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" presets/workspace/test/tuning/tuning-job.yaml
+            sed -i "s/TAG_HERE/${{ steps.get_test_meta.outputs.TAG }}/g" presets/workspace/test/tuning/tuning-job.yaml
+            kubectl apply -f presets/workspace/test/tuning/tuning-job.yaml
+
+      - name: Wait for tuning job to be ready
+        shell: bash {0}
+        run: |
+            retval_complete=1
+            retval_failed=1
+            count=0
+            max_retries=60
+            while [[ $retval_complete -ne 0 ]] && [[ $retval_failed -ne 0 ]] && [[ $count -lt $max_retries ]]; do
+                sleep 10
+                output=$(kubectl wait --for=condition=failed job/tuning-example --timeout=0 2>&1)
+                retval_failed=$?
+                output=$(kubectl wait --for=condition=complete job/tuning-example --timeout=0 2>&1)
+                retval_complete=$?
+                count=$((count + 1))
+            done
+
+            if [ $retval_failed -eq 0 ]; then
+                echo "Job failed. Please check logs."
+                exit 1
+            elif [ $retval_complete -ne 0 ]; then
+                echo "Job timeout."
+                exit 1
+            else
+                echo "Job succeeded."
+            fi
+      - name: Cleanup
+        if: always()
+        run: |
+            kubectl delete --wait=true -f presets/workspace/test/tuning/tuning-job.yaml
+
+            # Check and Delete AKS Nodepool if it exists
+            NODEPOOL_EXIST=$(az aks nodepool show \
+                            --name ${{ steps.get_test_meta.outputs.name }} \
+                            --cluster-name GitRunner \
+                            --resource-group llm-test \
+                            --query 'name' -o tsv || echo "")
+
+            if [ -n "$NODEPOOL_EXIST" ]; then
+                echo "deleting nodepool"
+                az aks nodepool delete \
+                --name ${{ steps.get_test_meta.outputs.name }} \
+                --cluster-name GitRunner \
+                --resource-group llm-test
+            fi