kaito-project · ishaansehgal99 · Mar 12, 2024 · Mar 13, 2024 · Mar 13, 2024 · Mar 13, 2024
@@ -48,6 +48,7 @@ jobs:
         run: |
             PR_BRANCH=${{ env.BRANCH_NAME }} \
             FORCE_RUN_ALL=${{ env.FORCE_RUN_ALL }} \
+            PR_REPO_URL=${{ github.event.pull_request.head.repo.clone_url }} \
             python3 .github/workflows/kind-cluster/determine_models.py
 
       - name: Print Determined Models
@@ -274,6 +275,11 @@ jobs:
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true')
         run: |
             curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/healthz
+
+      - name: Test version endpoint
+        if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true')
+        run: |
+            curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/version
 
       - name: Test inference endpoint
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true')

@@ -90,7 +90,7 @@ def models_to_build(files_changed):
                 seen_model_types.add(model_info["type"])
     return list(models)
 
-def check_modified_models(pr_branch):
+def check_modified_models(pr_branch, pr_repo_url):
     """Check for modified models in the repository."""
     repo_dir = Path.cwd() / "repo"
 
@@ -102,7 +102,14 @@ def check_modified_models(pr_branch):
 
     run_command("git checkout --detach")
     run_command("git fetch origin main:main")
-    run_command(f"git fetch origin {pr_branch}:{pr_branch}")
+
+    fetch_command = f"git fetch origin {pr_branch}:{pr_branch}"
+    if pr_repo_url != KAITO_REPO_URL:
+        # Add the PR's repo as a new remote only if it's different from the main repo
+        run_command("git remote add pr_repo {}".format(pr_repo_url))
+        fetch_command = f"git fetch pr_repo {pr_branch}"
+
+    run_command(fetch_command)
     run_command(f"git checkout {pr_branch}")
 
     files = run_command("git diff --name-only origin/main") # Returns each file on newline
@@ -118,14 +125,15 @@ def check_modified_models(pr_branch):
 def main():
     pr_branch = os.environ.get("PR_BRANCH", "main") # If not specified default to 'main'
     force_run_all = os.environ.get("FORCE_RUN_ALL", "false") # If not specified default to False
+    pr_repo_url = os.environ.get("PR_REPO_URL", KAITO_REPO_URL)
 
     affected_models = []
     if force_run_all != "false":
         affected_models = [model['name'] for model in YAML_PR['models']]
     else:
         # Logic to determine affected models
         # Example: affected_models = ['model1', 'model2', 'model3']
-        affected_models = check_modified_models(pr_branch)
+        affected_models = check_modified_models(pr_branch, pr_repo_url)
 
     # Convert the list of models into JSON matrix format
     matrix = create_matrix(affected_models)

@@ -43,6 +43,8 @@ spec:
                          --build-arg WEIGHTS_PATH=/weights \
                          --build-arg VERSION={{VERSION}} \
                          --build-arg MODEL_TYPE={{MODEL_TYPE}} \
+                         --build-arg IMAGE_NAME={{IMAGE_NAME}} \
+                         --build-arg MODEL_VERSION={{MODEL_VERSION}} \
                          -f $DOCKERFILE_PATH /
             docker push $ACR_NAME.azurecr.io/{{IMAGE_NAME}}:$VERSION
         env:

@@ -55,6 +55,7 @@ jobs:
         run: |
           PR_BRANCH=${{ env.BRANCH_NAME }} \
           FORCE_RUN_ALL=${{ env.FORCE_RUN_ALL }} \
+          PR_REPO_URL=${{ github.event.pull_request.head.repo.clone_url }} \
           python3 .github/workflows/kind-cluster/determine_models.py
 
       - name: Print Determined Models

@@ -3,6 +3,8 @@
 #              --build-arg WEIGHTS_PATH=/weights \
 #              --build-arg VERSION={{VERSION}} \
 #              --build-arg MODEL_TYPE={{MODEL_TYPE}} \
+#              --build-arg IMAGE_NAME={{IMAGE_NAME}} \
+#              --build-arg MODEL_VERSION={{MODEL_VERSION}} \
 
 FROM python:3.8-slim
 WORKDIR /workspace
@@ -26,8 +28,12 @@ RUN pip install 'uvicorn[standard]'
 ARG WEIGHTS_PATH
 ARG MODEL_TYPE
 ARG VERSION
-# Write the version to a file
-RUN echo $VERSION > /workspace/llama/version.txt
+ARG IMAGE_NAME
+ARG MODEL_VERSION
+
+# Write metadata to model_info.json file
+RUN MODEL_VERSION_HASH="${MODEL_VERSION##*/}" && \
+    echo "{\"Model Type\": \"$MODEL_TYPE\", \"Version\": \"$VERSION\", \"Image Name\": \"$IMAGE_NAME\", \"Model Version URL\": \"$MODEL_VERSION\", \"REVISION_ID\": \"$MODEL_VERSION_HASH\"}" > /workspace/llama/model_info.json
 
 ADD ${WEIGHTS_PATH} /workspace/llama/llama-2/weights
 ADD kaito/presets/inference/${MODEL_TYPE} /workspace/llama/llama-2
@@ -4,12 +4,15 @@ FROM mcr.microsoft.com/aifx/acpt/stable-ubuntu2004-cu118-py38-torch211
 ARG WEIGHTS_PATH
 ARG MODEL_TYPE
 ARG VERSION
+ARG IMAGE_NAME
+ARG MODEL_VERSION
 
 # Set the working directory
 WORKDIR /workspace/tfs
 
-# Write the version to a file
-RUN echo $VERSION > /workspace/tfs/version.txt
+# Write metadata to model_info.json file
+RUN MODEL_VERSION_HASH="${MODEL_VERSION##*/}" && \
+    echo "{\"Model Type\": \"$MODEL_TYPE\", \"Version\": \"$VERSION\", \"Image Name\": \"$IMAGE_NAME\", \"Model Version URL\": \"$MODEL_VERSION\", \"REVISION_ID\": \"$MODEL_VERSION_HASH\"}" > /workspace/tfs/model_info.json
 
 # First, copy just the requirements.txt file and install dependencies
 # This is done before copying the code to utilize Docker's layer caching and

@@ -3,12 +3,15 @@ FROM python:3.10-slim
 ARG WEIGHTS_PATH
 ARG MODEL_TYPE
 ARG VERSION
+ARG IMAGE_NAME
+ARG MODEL_VERSION
 
 # Set the working directory
 WORKDIR /workspace/tfs
 
-# Write the version to a file
-RUN echo $VERSION > /workspace/tfs/version.txt
+# Write metadata to model_info.json file
+RUN MODEL_VERSION_HASH="${MODEL_VERSION##*/}" && \
+    echo "{\"Model Type\": \"$MODEL_TYPE\", \"Version\": \"$VERSION\", \"Image Name\": \"$IMAGE_NAME\", \"Model Version URL\": \"$MODEL_VERSION\", \"REVISION_ID\": \"$MODEL_VERSION_HASH\"}" > /workspace/tfs/model_info.json
 
 # First, copy just the preset files and install dependencies
 # This is done before copying the code to utilize Docker's layer caching and

@@ -8,6 +8,7 @@
 import signal
 import sys
 import threading
+import json
 from typing import Optional
 
 import GPUtil
@@ -18,6 +19,9 @@
 from llama import Llama
 from pydantic import BaseModel
 
+# Constants
+MODEL_INFO = "model_info.json"
+
 # Setup argparse
 parser = argparse.ArgumentParser(description="Llama API server.")
 parser.add_argument("--ckpt_dir", default="weights/", help="Checkpoint directory.")
@@ -191,6 +195,13 @@ def get_metrics():
         except Exception as e:
             return {"error": str(e)}
 
+    @app_main.get("/version")
+    def get_version():
+        with open(f"/workspace/llama/{MODEL_INFO}", "r") as f:
+            model_info = json.load(f)
+
+        return model_info
+
 def setup_worker_routes():
     @app_worker.get("/healthz")
     def health_check():

@@ -8,6 +8,7 @@
 import signal
 import sys
 import threading
+import json
 from typing import Optional
 
 import GPUtil
@@ -18,6 +19,9 @@
 from llama import Llama
 from pydantic import BaseModel
 
+# Constants
+MODEL_INFO = "model_info.json"
+
 # Setup argparse
 parser = argparse.ArgumentParser(description="Llama API server.")
 parser.add_argument("--ckpt_dir", default="weights/", help="Checkpoint directory.")
@@ -180,6 +184,13 @@ def get_metrics():
         except Exception as e:
             return {"error": str(e)}
 
+    @app_main.get("/version")
+    def get_version():
+        with open(f"/workspace/tfs/{MODEL_INFO}", "r") as f:
+            model_info = json.load(f)
+
+        return model_info
+
 def setup_worker_routes(): 
     @app_worker.get("/healthz")
     def health_check():