Skip to content

Commit

Permalink
feat: added falcon 40b for e2e
Browse files Browse the repository at this point in the history
  • Loading branch information
ishaansehgal99 committed Oct 18, 2023
1 parent d79e017 commit 4b29c27
Show file tree
Hide file tree
Showing 5 changed files with 154 additions and 24 deletions.
48 changes: 24 additions & 24 deletions .github/workflows/e2e-preset-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,36 +85,36 @@ jobs:
fail-fast: false
matrix:
image:
- name: falcon-7b
node-count: 1
node-vm-size: Standard_NC12s_v3
node-osdisk-size: 100

- name: falcon-7b-instruct
node-count: 1
node-vm-size: Standard_NC12s_v3
node-osdisk-size: 100

# Uncomment once service/deployment made
# - name: falcon-40b
# - name: falcon-7b
# node-count: 1
# node-vm-size: Standard_NC96ads_A100_v4
# node-osdisk-size: 400
# node-vm-size: Standard_NC12s_v3
# node-osdisk-size: 100

# - name: falcon-40b-instruct
# - name: falcon-7b-instruct
# node-count: 1
# node-vm-size: Standard_NC96ads_A100_v4
# node-osdisk-size: 400
# node-vm-size: Standard_NC12s_v3
# node-osdisk-size: 100

- name: llama-2-7b
# Uncomment once service/deployment made
- name: falcon-40b
node-count: 1
node-vm-size: Standard_NC12s_v3
node-osdisk-size: 100
node-vm-size: Standard_NC96ads_A100_v4
node-osdisk-size: 400

- name: falcon-40b-instruct
node-count: 1
node-vm-size: Standard_NC96ads_A100_v4
node-osdisk-size: 400

# - name: llama-2-7b
# node-count: 1
# node-vm-size: Standard_NC12s_v3
# node-osdisk-size: 100

- name: llama-2-13b
node-count: 2
node-vm-size: Standard_NC12s_v3
node-osdisk-size: 150
# - name: llama-2-13b
# node-count: 2
# node-vm-size: Standard_NC12s_v3
# node-osdisk-size: 150

# Uncomment once service/deployment made
# - name: llama-2-70b
Expand Down
14 changes: 14 additions & 0 deletions presets/k8s/falcon-40b-instruct/falcon-40b-instruct-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: v1
kind: Service
metadata:
name: falcon-40b-instruct
spec:
selector:
app: falcon
statefulset.kubernetes.io/pod-name: falcon-40b-instruct-0
ports:
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
publishNotReadyAddresses: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: falcon-40b-instruct
spec:
replicas: 1
selector:
matchLabels:
app: falcon
podManagementPolicy: Parallel
template:
metadata:
labels:
app: falcon
spec:
containers:
- name: falcon-container
image: REPO_HERE.azurecr.io/falcon-40b-instruct:TAG_HERE
command:
- /bin/sh
- -c
- accelerate launch --config_file config.yaml --num_processes 1 --num_machines 1 --use_deepspeed --machine_rank 0 --gpu_ids all inference-api.py
livenessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 600 # 10 Min
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 30
periodSeconds: 10
volumeMounts:
- name: dshm
mountPath: /dev/shm
volumes:
- name: dshm
emptyDir:
medium: Memory
tolerations:
- effect: NoSchedule
key: sku
operator: Equal
value: gpu
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
nodeSelector:
pool: n40binstruct
51 changes: 51 additions & 0 deletions presets/k8s/falcon-40b/falcon-40b-instruct-statefulset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: falcon-40b
spec:
replicas: 1
selector:
matchLabels:
app: falcon
podManagementPolicy: Parallel
template:
metadata:
labels:
app: falcon
spec:
containers:
- name: falcon-container
image: REPO_HERE.azurecr.io/falcon-40b:TAG_HERE
command:
- /bin/sh
- -c
- accelerate launch --config_file config.yaml --num_processes 1 --num_machines 1 --use_deepspeed --machine_rank 0 --gpu_ids all inference-api.py
livenessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 600 # 10 Min
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 30
periodSeconds: 10
volumeMounts:
- name: dshm
mountPath: /dev/shm
volumes:
- name: dshm
emptyDir:
medium: Memory
tolerations:
- effect: NoSchedule
key: sku
operator: Equal
value: gpu
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
nodeSelector:
pool: falcon40b
14 changes: 14 additions & 0 deletions presets/k8s/falcon-40b/falcon-40b-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: v1
kind: Service
metadata:
name: falcon-40b
spec:
selector:
app: falcon
statefulset.kubernetes.io/pod-name: falcon-40b-0
ports:
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
publishNotReadyAddresses: true

0 comments on commit 4b29c27

Please sign in to comment.