From 4b29c2732aef2cb36ef9f67d19e9493ae84c05be Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Tue, 17 Oct 2023 23:03:24 -0700
Subject: [PATCH] feat: added falcon 40b for e2e

---
 .github/workflows/e2e-preset-test.yml         | 48 ++++++++---------
 .../falcon-40b-instruct-service.yaml          | 14 +++++
 .../falcon-40b-instruct-statefulset.yaml      | 51 +++++++++++++++++++
 .../falcon-40b-instruct-statefulset.yaml      | 51 +++++++++++++++++++
 .../k8s/falcon-40b/falcon-40b-service.yaml    | 14 +++++
 5 files changed, 154 insertions(+), 24 deletions(-)
 create mode 100644 presets/k8s/falcon-40b-instruct/falcon-40b-instruct-service.yaml
 create mode 100644 presets/k8s/falcon-40b-instruct/falcon-40b-instruct-statefulset.yaml
 create mode 100644 presets/k8s/falcon-40b/falcon-40b-instruct-statefulset.yaml
 create mode 100644 presets/k8s/falcon-40b/falcon-40b-service.yaml

diff --git a/.github/workflows/e2e-preset-test.yml b/.github/workflows/e2e-preset-test.yml
index 470013862..0fc4f551c 100644
--- a/.github/workflows/e2e-preset-test.yml
+++ b/.github/workflows/e2e-preset-test.yml
@@ -85,36 +85,36 @@ jobs:
       fail-fast: false
       matrix:
         image:
-          - name: falcon-7b
-            node-count: 1
-            node-vm-size: Standard_NC12s_v3
-            node-osdisk-size: 100
-
-          - name: falcon-7b-instruct
-            node-count: 1
-            node-vm-size: Standard_NC12s_v3
-            node-osdisk-size: 100
-
-        # Uncomment once service/deployment made
-        #   - name: falcon-40b
+        #   - name: falcon-7b
         #     node-count: 1
-        #     node-vm-size: Standard_NC96ads_A100_v4
-        #     node-osdisk-size: 400
+        #     node-vm-size: Standard_NC12s_v3
+        #     node-osdisk-size: 100
 
-        #   - name: falcon-40b-instruct
+        #   - name: falcon-7b-instruct
         #     node-count: 1
-        #     node-vm-size: Standard_NC96ads_A100_v4
-        #     node-osdisk-size: 400
+        #     node-vm-size: Standard_NC12s_v3
+        #     node-osdisk-size: 100
 
-          - name: llama-2-7b
+        # Uncomment once service/deployment made
+          - name: falcon-40b
             node-count: 1
-            node-vm-size: Standard_NC12s_v3
-            node-osdisk-size: 100
+            node-vm-size: Standard_NC96ads_A100_v4
+            node-osdisk-size: 400
+
+          - name: falcon-40b-instruct
+            node-count: 1
+            node-vm-size: Standard_NC96ads_A100_v4
+            node-osdisk-size: 400
+
+        #   - name: llama-2-7b
+        #     node-count: 1
+        #     node-vm-size: Standard_NC12s_v3
+        #     node-osdisk-size: 100
         
-          - name: llama-2-13b
-            node-count: 2
-            node-vm-size: Standard_NC12s_v3
-            node-osdisk-size: 150
+        #   - name: llama-2-13b
+        #     node-count: 2
+        #     node-vm-size: Standard_NC12s_v3
+        #     node-osdisk-size: 150
         
         # Uncomment once service/deployment made
         #   - name: llama-2-70b
diff --git a/presets/k8s/falcon-40b-instruct/falcon-40b-instruct-service.yaml b/presets/k8s/falcon-40b-instruct/falcon-40b-instruct-service.yaml
new file mode 100644
index 000000000..cc41fb6f7
--- /dev/null
+++ b/presets/k8s/falcon-40b-instruct/falcon-40b-instruct-service.yaml
@@ -0,0 +1,14 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: falcon-40b-instruct
+spec:
+  selector:
+    app: falcon
+    statefulset.kubernetes.io/pod-name: falcon-40b-instruct-0
+  ports:
+    - protocol: TCP
+      port: 80
+      targetPort: 5000
+  type: LoadBalancer
+  publishNotReadyAddresses: true
diff --git a/presets/k8s/falcon-40b-instruct/falcon-40b-instruct-statefulset.yaml b/presets/k8s/falcon-40b-instruct/falcon-40b-instruct-statefulset.yaml
new file mode 100644
index 000000000..0c39ec31e
--- /dev/null
+++ b/presets/k8s/falcon-40b-instruct/falcon-40b-instruct-statefulset.yaml
@@ -0,0 +1,51 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: falcon-40b-instruct
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: falcon
+  podManagementPolicy: Parallel
+  template:
+    metadata:
+      labels:
+        app: falcon
+    spec:
+      containers:
+        - name: falcon-container
+          image: REPO_HERE.azurecr.io/falcon-40b-instruct:TAG_HERE
+          command:
+            - /bin/sh
+            - -c
+            - accelerate launch --config_file config.yaml --num_processes 1 --num_machines 1 --use_deepspeed --machine_rank 0 --gpu_ids all inference-api.py
+          livenessProbe:
+            httpGet:
+              path: /healthz
+              port: 5000
+            initialDelaySeconds: 600 # 10 Min
+            periodSeconds: 10
+          readinessProbe:
+            httpGet:
+              path: /healthz
+              port: 5000
+            initialDelaySeconds: 30
+            periodSeconds: 10
+          volumeMounts:
+            - name: dshm
+              mountPath: /dev/shm
+      volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+      tolerations:
+        - effect: NoSchedule
+          key: sku
+          operator: Equal
+          value: gpu
+        - effect: NoSchedule
+          key: nvidia.com/gpu
+          operator: Exists
+      nodeSelector:
+        pool: n40binstruct
diff --git a/presets/k8s/falcon-40b/falcon-40b-instruct-statefulset.yaml b/presets/k8s/falcon-40b/falcon-40b-instruct-statefulset.yaml
new file mode 100644
index 000000000..c213867df
--- /dev/null
+++ b/presets/k8s/falcon-40b/falcon-40b-instruct-statefulset.yaml
@@ -0,0 +1,51 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: falcon-40b
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: falcon
+  podManagementPolicy: Parallel
+  template:
+    metadata:
+      labels:
+        app: falcon
+    spec:
+      containers:
+        - name: falcon-container
+          image: REPO_HERE.azurecr.io/falcon-40b:TAG_HERE
+          command:
+            - /bin/sh
+            - -c
+            - accelerate launch --config_file config.yaml --num_processes 1 --num_machines 1 --use_deepspeed --machine_rank 0 --gpu_ids all inference-api.py
+          livenessProbe:
+            httpGet:
+              path: /healthz
+              port: 5000
+            initialDelaySeconds: 600 # 10 Min
+            periodSeconds: 10
+          readinessProbe:
+            httpGet:
+              path: /healthz
+              port: 5000
+            initialDelaySeconds: 30
+            periodSeconds: 10
+          volumeMounts:
+            - name: dshm
+              mountPath: /dev/shm
+      volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+      tolerations:
+        - effect: NoSchedule
+          key: sku
+          operator: Equal
+          value: gpu
+        - effect: NoSchedule
+          key: nvidia.com/gpu
+          operator: Exists
+      nodeSelector:
+        pool: falcon40b
diff --git a/presets/k8s/falcon-40b/falcon-40b-service.yaml b/presets/k8s/falcon-40b/falcon-40b-service.yaml
new file mode 100644
index 000000000..599f70ca5
--- /dev/null
+++ b/presets/k8s/falcon-40b/falcon-40b-service.yaml
@@ -0,0 +1,14 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: falcon-40b
+spec:
+  selector:
+    app: falcon
+    statefulset.kubernetes.io/pod-name: falcon-40b-0
+  ports:
+    - protocol: TCP
+      port: 80
+      targetPort: 5000
+  type: LoadBalancer
+  publishNotReadyAddresses: true