Skip to content

Commit

Permalink
Merge branch 'main' into Ishaan/abstract-stores
Browse files Browse the repository at this point in the history
  • Loading branch information
ishaansehgal99 authored Dec 11, 2024
2 parents a090f87 + c3be988 commit 1e3430a
Show file tree
Hide file tree
Showing 230 changed files with 4,827 additions and 940 deletions.
1 change: 0 additions & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
presets/test/** linguist-vendored
4 changes: 2 additions & 2 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ updates:
interval: daily

- package-ecosystem: pip
directory: /presets/inference/text-generation
directory: /presets/workspace/inference/text-generation
schedule:
interval: daily
open-pull-requests-limit: 0

- package-ecosystem: pip
directory: /presets/tuning/tfs
directory: /presets/workspace/tuning/tfs
schedule:
interval: daily
open-pull-requests-limit: 0
20 changes: 14 additions & 6 deletions .github/e2e-preset-configs.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
{
"name": "falcon-7b",
"node-count": 1,
"node-vm-size": "Standard_NC12s_v3",
"node-vm-size": "Standard_NC6s_v3",
"node-osdisk-size": 100,
"OSS": true,
"loads_adapter": false
Expand All @@ -21,39 +21,39 @@
{
"name": "falcon-7b-instruct",
"node-count": 1,
"node-vm-size": "Standard_NC12s_v3",
"node-vm-size": "Standard_NC6s_v3",
"node-osdisk-size": 100,
"OSS": true,
"loads_adapter": false
},
{
"name": "falcon-40b",
"node-count": 1,
"node-vm-size": "Standard_NC24s_v3",
"node-vm-size": "Standard_NC48ads_A100_v4",
"node-osdisk-size": 400,
"OSS": true,
"loads_adapter": false
},
{
"name": "falcon-40b-instruct",
"node-count": 1,
"node-vm-size": "Standard_NC24s_v3",
"node-vm-size": "Standard_NC48ads_A100_v4",
"node-osdisk-size": 400,
"OSS": true,
"loads_adapter": false
},
{
"name": "mistral-7b",
"node-count": 1,
"node-vm-size": "Standard_NC12s_v3",
"node-vm-size": "Standard_NC6s_v3",
"node-osdisk-size": 100,
"OSS": true,
"loads_adapter": false
},
{
"name": "mistral-7b-instruct",
"node-count": 1,
"node-vm-size": "Standard_NC12s_v3",
"node-vm-size": "Standard_NC6s_v3",
"node-osdisk-size": 100,
"OSS": true,
"loads_adapter": false
Expand Down Expand Up @@ -129,6 +129,14 @@
"node-osdisk-size": 150,
"OSS": false,
"loads_adapter": false
},
{
"name": "tuning",
"node-count": 1,
"node-vm-size": "Standard_NC6s_v3",
"node-osdisk-size": 100,
"OSS": true,
"loads_adapter": false
}
]
}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
submodules: true
fetch-depth: 0

- uses: actions/setup-go@v5.0.2
- uses: actions/setup-go@v5.1.0
with:
go-version-file: 'go.mod'

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/create-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
egress-policy: audit

- name: Set up Go ${{ env.GO_VERSION }}
uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2
uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed # v5.1.0
with:
go-version: ${{ env.GO_VERSION }}

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dependency-review.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ jobs:
- name: 'Checkout Repository'
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: 'Dependency Review'
uses: actions/dependency-review-action@5a2ce3f5b92ee19cbb1541a4984c76d921601d7c # v4.3.4
uses: actions/dependency-review-action@3b139cfc5fae8b618d3eae3675e383bb1769c019 # v4.5.0
68 changes: 48 additions & 20 deletions .github/workflows/e2e-preset-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,17 @@ on:
type: boolean
default: false
description: "Test all Phi models for E2E"
test-on-vllm:
type: boolean
default: false
description: "Test on VLLM runtime"

env:
GO_VERSION: "1.22"
BRANCH_NAME: ${{ github.head_ref || github.ref_name}}
FORCE_RUN_ALL: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all == 'true' }}
FORCE_RUN_ALL_PHI: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all-phi-models== 'true' }}
RUNTIME: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.test-on-vllm == 'true') && 'vllm' || 'hf' }}

permissions:
id-token: write
Expand Down Expand Up @@ -209,7 +214,7 @@ jobs:
fi
- name: Create Service
run: kubectl apply -f presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-service.yaml
run: kubectl apply -f presets/workspace/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-service.yaml

- name: Retrieve External Service IP
id: get_ip
Expand All @@ -229,10 +234,11 @@ jobs:
- name: Replace IP and Deploy Resource to K8s
run: |
sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
sed -i "s/TAG_HERE/${{ matrix.model.tag }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
kubectl apply -f presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
POSTFIX=$(echo "${{ matrix.model.name }}" | grep -q "llama" && echo "" || echo "_${{ env.RUNTIME }}")
sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" presets/workspace/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}$POSTFIX.yaml
sed -i "s/TAG_HERE/${{ matrix.model.tag }}/g" presets/workspace/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}$POSTFIX.yaml
sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" presets/workspace/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}$POSTFIX.yaml
kubectl apply -f presets/workspace/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}$POSTFIX.yaml
- name: Wait for Resource to be ready
run: |
Expand All @@ -243,20 +249,23 @@ jobs:
run: |
POD_NAME=$(kubectl get pods -l app=${{ matrix.model.name }} -o jsonpath="{.items[0].metadata.name}")
kubectl logs $POD_NAME | grep "Adapter added:" | grep "${{ matrix.model.expected_adapter }}" || (echo "Adapter not loaded or incorrect adapter loaded" && exit 1)
- name: Test home endpoint
- name: Install testing commands
run: |
curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- apt-get update
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- apt-get install -y curl
- name: Test healthz endpoint
run: |
curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/healthz
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- \
curl -s http://localhost:5000/health
- name: Test inference endpoint
run: |
echo "Testing inference for ${{ matrix.model.name }}"
if [[ "${{ matrix.model.name }}" == *"llama"* && "${{ matrix.model.name }}" == *"-chat"* ]]; then
echo "Testing inference for ${{ matrix.model.name }}"
curl -X POST \
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- \
curl -s -X POST \
-H "Content-Type: application/json" \
-d '{
"input_data": {
Expand All @@ -274,10 +283,10 @@ jobs:
]
}
}' \
http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/chat
http://localhost:5000/chat
elif [[ "${{ matrix.model.name }}" == *"llama"* ]]; then
echo "Testing inference for ${{ matrix.model.name }}"
curl -X POST \
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- \
curl -s -X POST \
-H "Content-Type: application/json" \
-d '{
"prompts": [
Expand All @@ -290,10 +299,29 @@ jobs:
"max_gen_len": 128
}
}' \
http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/generate
http://localhost:5000/generate
elif [[ "${{ env.RUNTIME }}" == *"vllm"* ]]; then
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- \
curl -s -X POST \
-H "accept: application/json" \
-H "Content-Type: application/json" \
-d '{
"model": "test",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello!"
}
]
}' \
http://localhost:5000/v1/chat/completions
else
echo "Testing inference for ${{ matrix.model.name }}"
curl -X POST \
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- \
curl -s -X POST \
-H "accept: application/json" \
-H "Content-Type: application/json" \
-d '{
Expand Down Expand Up @@ -327,7 +355,7 @@ jobs:
"remove_invalid_values":null
}
}' \
http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/chat
http://localhost:5000/chat
fi
- name: Cleanup
Expand All @@ -340,6 +368,7 @@ jobs:
# Check and Delete K8s Resource (Deployment or StatefulSet)
if kubectl get $RESOURCE_TYPE ${{ matrix.model.name }} > /dev/null 2>&1; then
kubectl logs $RESOURCE_TYPE/${{ matrix.model.name }}
kubectl delete $RESOURCE_TYPE ${{ matrix.model.name }}
fi
fi
Expand All @@ -364,4 +393,3 @@ jobs:
--resource-group llm-test
fi
fi
136 changes: 136 additions & 0 deletions .github/workflows/e2e-preset-tuning-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
name: E2E Preset tuning Test

on:
workflow_run:
workflows: ["Build and Push Preset Models"]
types:
- completed
workflow_dispatch: {}

env:
GO_VERSION: "1.22"

permissions:
id-token: write
contents: read

jobs:
e2e-preset-tuning-tests:
if: github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success'
runs-on: ubuntu-latest
environment: preset-env
steps:
- name: Checkout
uses: actions/checkout@v4.2.2
with:
submodules: true
fetch-depth: 0

- name: 'Az CLI login'
uses: azure/login@v2.2.0
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
allow-no-subscriptions: true

- name: 'Set ACR Subscription'
run: az account set --subscription ${{secrets.AZURE_SUBSCRIPTION_ID}}

- name: Set up kubectl context
run: |
az aks get-credentials --resource-group llm-test --name GitRunner
- name: Get test meta
id: get_test_meta
run: |
CONFIG=$(jq -c '.matrix.image[] | select(.name == "tuning")' .github/e2e-preset-configs.json)
echo "TAG=0.0.7" >> $GITHUB_OUTPUT
for row in $(echo "${CONFIG}" | jq -r 'to_entries|map("\(.key)=\(.value|tostring)")|.[]'); do
echo "${row}" >> $GITHUB_OUTPUT
done
- name: Create Nodepool
run: |
NODEPOOL_EXIST=$(az aks nodepool show \
--name ${{ steps.get_test_meta.outputs.name }} \
--cluster-name GitRunner \
--resource-group llm-test \
--query 'name' -o tsv || echo "")
echo "NODEPOOL_EXIST: $NODEPOOL_EXIST"
if [ -z "$NODEPOOL_EXIST" ]; then
az aks nodepool add \
--name ${{ steps.get_test_meta.outputs.name }} \
--cluster-name GitRunner \
--resource-group llm-test \
--node-count ${{ steps.get_test_meta.outputs.node-count }} \
--node-vm-size ${{ steps.get_test_meta.outputs.node-vm-size }} \
--node-osdisk-size ${{ steps.get_test_meta.outputs.node-osdisk-size }} \
--labels pool=${{ steps.get_test_meta.outputs.name }} \
--node-taints sku=gpu:NoSchedule \
--aks-custom-headers UseGPUDedicatedVHD=true
else
NODEPOOL_STATE=$(az aks nodepool show \
--name ${{ steps.get_test_meta.outputs.name }} \
--cluster-name GitRunner \
--resource-group llm-test \
--query 'provisioningState' -o tsv)
echo "NODEPOOL_STATE: $NODEPOOL_STATE"
if [ "$NODEPOOL_STATE" != "Succeeded" ]; then
echo "Nodepool exists but is not in a Succeeded state. Please check manually."
exit 1
else
echo "Nodepool already exists and is in a running state."
fi
fi
- name: Replace repo and Deploy Resource to K8s
run: |
sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" presets/workspace/test/tuning/tuning-job.yaml
sed -i "s/TAG_HERE/${{ steps.get_test_meta.outputs.TAG }}/g" presets/workspace/test/tuning/tuning-job.yaml
kubectl apply -f presets/workspace/test/tuning/tuning-job.yaml
- name: Wait for tuning job to be ready
shell: bash {0}
run: |
retval_complete=1
retval_failed=1
count=0
max_retries=60
while [[ $retval_complete -ne 0 ]] && [[ $retval_failed -ne 0 ]] && [[ $count -lt $max_retries ]]; do
sleep 10
output=$(kubectl wait --for=condition=failed job/tuning-example --timeout=0 2>&1)
retval_failed=$?
output=$(kubectl wait --for=condition=complete job/tuning-example --timeout=0 2>&1)
retval_complete=$?
count=$((count + 1))
done
if [ $retval_failed -eq 0 ]; then
echo "Job failed. Please check logs."
exit 1
elif [ $retval_complete -ne 0 ]; then
echo "Job timeout."
exit 1
else
echo "Job succeeded."
fi
- name: Cleanup
if: always()
run: |
kubectl delete --wait=true -f presets/workspace/test/tuning/tuning-job.yaml
# Check and Delete AKS Nodepool if it exists
NODEPOOL_EXIST=$(az aks nodepool show \
--name ${{ steps.get_test_meta.outputs.name }} \
--cluster-name GitRunner \
--resource-group llm-test \
--query 'name' -o tsv || echo "")
if [ -n "$NODEPOOL_EXIST" ]; then
echo "deleting nodepool"
az aks nodepool delete \
--name ${{ steps.get_test_meta.outputs.name }} \
--cluster-name GitRunner \
--resource-group llm-test
fi
Loading

0 comments on commit 1e3430a

Please sign in to comment.