chore: rename web_example_chat_completion.py to inference-api.py

kaito-project · Oct 20, 2023 · 3706150 · 3706150
1 parent beec436
commit 3706150
Show file tree

Hide file tree

Showing 7 changed files with 6 additions and 6 deletions.
diff --git a/pkg/inference/preset-inference-types.go b/pkg/inference/preset-inference-types.go
@@ -40,8 +40,8 @@ var (
 	baseCommandPresetLlama2AChat = fmt.Sprintf("cd /workspace/llama/%s && torchrun", kaitov1alpha1.PresetLlama2AChat)
 	baseCommandPresetLlama2BChat = fmt.Sprintf("cd /workspace/llama/%s && torchrun", kaitov1alpha1.PresetLlama2BChat)
 	baseCommandPresetLlama2CChat = fmt.Sprintf("cd /workspace/llama/%s && torchrun", kaitov1alpha1.PresetLlama2CChat)
-	// llamaTextInferenceFile       = "web_example_text_completion.py" TODO: To support Text Generation Llama Models
-	llamaChatInferenceFile = "web_example_chat_completion.py"
+	// llamaTextInferenceFile       = "inference-api.py" TODO: To support Text Generation Llama Models
+	llamaChatInferenceFile = "inference-api.py"
 	llamaRunParams         = map[string]string{
 		"max_seq_len":    "512",
 		"max_batch_size": "8",

diff --git a/presets/examples/llama-2-13b-chat/llama-2-13b-chat-statefulset.yaml b/presets/examples/llama-2-13b-chat/llama-2-13b-chat-statefulset.yaml
@@ -35,7 +35,7 @@ spec:
             - |
               echo "MASTER_ADDR: $MASTER_ADDR"
               NODE_RANK=$(echo $HOSTNAME | grep -o '[^-]*$')
-              cd /workspace/llama/llama-2 && torchrun --nnodes 2 --nproc_per_node 1 --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port 29500 web_example_chat_completion.py
+              cd /workspace/llama/llama-2 && torchrun --nnodes 2 --nproc_per_node 1 --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port 29500 inference-api.py
           resources:
             limits:
               nvidia.com/gpu: "1"

diff --git a/presets/examples/llama-2-13b/llama-2-13b-statefulset.yaml b/presets/examples/llama-2-13b/llama-2-13b-statefulset.yaml
@@ -35,7 +35,7 @@ spec:
             - |
               echo "MASTER_ADDR: $MASTER_ADDR"
               NODE_RANK=$(echo $HOSTNAME | grep -o '[^-]*$')
-              cd /workspace/llama/llama-2 && torchrun --nnodes 2 --nproc_per_node 1 --node_rank $NODE_RANK --master-addr $MASTER_ADDR --master-port 29500 web_example_text_completion.py
+              cd /workspace/llama/llama-2 && torchrun --nnodes 2 --nproc_per_node 1 --node_rank $NODE_RANK --master-addr $MASTER_ADDR --master-port 29500 inference-api.py
           resources:
             limits:
               nvidia.com/gpu: "1"

diff --git a/presets/examples/llama-2-7b-chat/llama-2-7b-chat-statefulset.yaml b/presets/examples/llama-2-7b-chat/llama-2-7b-chat-statefulset.yaml
@@ -19,7 +19,7 @@ spec:
           command:
             - /bin/sh
             - -c
-            - cd /workspace/llama/llama-2 && torchrun web_example_chat_completion.py
+            - cd /workspace/llama/llama-2 && torchrun inference-api.py
           resources:
             limits:
               nvidia.com/gpu: "1"

diff --git a/presets/examples/llama-2-7b/llama-2-7b-statefulset.yaml b/presets/examples/llama-2-7b/llama-2-7b-statefulset.yaml
@@ -19,7 +19,7 @@ spec:
           command:
             - /bin/sh
             - -c
-            - cd /workspace/llama/llama-2 && torchrun web_example_text_completion.py
+            - cd /workspace/llama/llama-2 && torchrun inference-api.py
           resources:
             limits:
               nvidia.com/gpu: "1"

diff --git a/...ama-2-chat/web_example_chat_completion.py → presets/llama-2-chat/inference-api.py b/...ama-2-chat/web_example_chat_completion.py → presets/llama-2-chat/inference-api.py
diff --git a/...ts/llama-2/web_example_text_completion.py → presets/llama-2/inference-api.py b/...ts/llama-2/web_example_text_completion.py → presets/llama-2/inference-api.py