diff --git a/.github/workflows/e2e-preset-test.yml b/.github/workflows/e2e-preset-test.yml
index 06dd5ac3b..f89d1660c 100644
--- a/.github/workflows/e2e-preset-test.yml
+++ b/.github/workflows/e2e-preset-test.yml
@@ -48,6 +48,7 @@ jobs:
         run: |
             PR_BRANCH=${{ env.BRANCH_NAME }} \
             FORCE_RUN_ALL=${{ env.FORCE_RUN_ALL }} \
+            PR_REPO_URL=${{ github.event.pull_request.head.repo.clone_url }} \
             python3 .github/workflows/kind-cluster/determine_models.py
 
       - name: Print Determined Models
@@ -274,6 +275,11 @@ jobs:
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true')
         run: |
             curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/healthz
+
+      - name: Test version endpoint
+        if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true')
+        run: |
+            curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/version
     
       - name: Test inference endpoint
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true')
diff --git a/.github/workflows/kind-cluster/determine_models.py b/.github/workflows/kind-cluster/determine_models.py
index 5ace3ba63..18b5773e1 100644
--- a/.github/workflows/kind-cluster/determine_models.py
+++ b/.github/workflows/kind-cluster/determine_models.py
@@ -90,7 +90,7 @@ def models_to_build(files_changed):
                 seen_model_types.add(model_info["type"])
     return list(models)
 
-def check_modified_models(pr_branch):
+def check_modified_models(pr_branch, pr_repo_url):
     """Check for modified models in the repository."""
     repo_dir = Path.cwd() / "repo"
 
@@ -102,7 +102,14 @@ def check_modified_models(pr_branch):
 
     run_command("git checkout --detach")
     run_command("git fetch origin main:main")
-    run_command(f"git fetch origin {pr_branch}:{pr_branch}")
+
+    fetch_command = f"git fetch origin {pr_branch}:{pr_branch}"
+    if pr_repo_url != KAITO_REPO_URL:
+        # Add the PR's repo as a new remote only if it's different from the main repo
+        run_command("git remote add pr_repo {}".format(pr_repo_url))
+        fetch_command = f"git fetch pr_repo {pr_branch}"
+
+    run_command(fetch_command)
     run_command(f"git checkout {pr_branch}")
 
     files = run_command("git diff --name-only origin/main") # Returns each file on newline
@@ -118,6 +125,7 @@ def check_modified_models(pr_branch):
 def main():
     pr_branch = os.environ.get("PR_BRANCH", "main") # If not specified default to 'main'
     force_run_all = os.environ.get("FORCE_RUN_ALL", "false") # If not specified default to False
+    pr_repo_url = os.environ.get("PR_REPO_URL", KAITO_REPO_URL)
 
     affected_models = []
     if force_run_all != "false":
@@ -125,7 +133,7 @@ def main():
     else:
         # Logic to determine affected models
         # Example: affected_models = ['model1', 'model2', 'model3']
-        affected_models = check_modified_models(pr_branch)
+        affected_models = check_modified_models(pr_branch, pr_repo_url)
 
     # Convert the list of models into JSON matrix format
     matrix = create_matrix(affected_models)
diff --git a/.github/workflows/kind-cluster/docker-job-template.yaml b/.github/workflows/kind-cluster/docker-job-template.yaml
index a19860f88..99954233f 100644
--- a/.github/workflows/kind-cluster/docker-job-template.yaml
+++ b/.github/workflows/kind-cluster/docker-job-template.yaml
@@ -43,6 +43,8 @@ spec:
                          --build-arg WEIGHTS_PATH=/weights \
                          --build-arg VERSION={{VERSION}} \
                          --build-arg MODEL_TYPE={{MODEL_TYPE}} \
+                         --build-arg IMAGE_NAME={{IMAGE_NAME}} \
+                         --build-arg MODEL_VERSION={{MODEL_VERSION}} \
                          -f $DOCKERFILE_PATH /
             docker push $ACR_NAME.azurecr.io/{{IMAGE_NAME}}:$VERSION
         env:
diff --git a/.github/workflows/preset-image-build.yml b/.github/workflows/preset-image-build.yml
index 5cdb8f98e..a5f100560 100644
--- a/.github/workflows/preset-image-build.yml
+++ b/.github/workflows/preset-image-build.yml
@@ -55,6 +55,7 @@ jobs:
         run: |
           PR_BRANCH=${{ env.BRANCH_NAME }} \
           FORCE_RUN_ALL=${{ env.FORCE_RUN_ALL }} \
+          PR_REPO_URL=${{ github.event.pull_request.head.repo.clone_url }} \
           python3 .github/workflows/kind-cluster/determine_models.py
             
       - name: Print Determined Models
diff --git a/docker/presets/inference/llama-2/Dockerfile b/docker/presets/inference/llama-2/Dockerfile
index 285cb122a..4d85753d7 100644
--- a/docker/presets/inference/llama-2/Dockerfile
+++ b/docker/presets/inference/llama-2/Dockerfile
@@ -3,6 +3,8 @@
 #              --build-arg WEIGHTS_PATH=/weights \
 #              --build-arg VERSION={{VERSION}} \
 #              --build-arg MODEL_TYPE={{MODEL_TYPE}} \
+#              --build-arg IMAGE_NAME={{IMAGE_NAME}} \
+#              --build-arg MODEL_VERSION={{MODEL_VERSION}} \
 
 FROM python:3.8-slim
 WORKDIR /workspace
@@ -26,8 +28,12 @@ RUN pip install 'uvicorn[standard]'
 ARG WEIGHTS_PATH
 ARG MODEL_TYPE
 ARG VERSION
-# Write the version to a file
-RUN echo $VERSION > /workspace/llama/version.txt
+ARG IMAGE_NAME
+ARG MODEL_VERSION
+
+# Write metadata to model_info.json file
+RUN MODEL_VERSION_HASH="${MODEL_VERSION##*/}" && \
+    echo "{\"Model Type\": \"$MODEL_TYPE\", \"Version\": \"$VERSION\", \"Image Name\": \"$IMAGE_NAME\", \"Model Version URL\": \"$MODEL_VERSION\", \"REVISION_ID\": \"$MODEL_VERSION_HASH\"}" > /workspace/llama/model_info.json
 
 ADD ${WEIGHTS_PATH} /workspace/llama/llama-2/weights
 ADD kaito/presets/inference/${MODEL_TYPE} /workspace/llama/llama-2
diff --git a/docker/presets/inference/tfs-onnx/Dockerfile b/docker/presets/inference/tfs-onnx/Dockerfile
index 12e788346..8fdfc7440 100644
--- a/docker/presets/inference/tfs-onnx/Dockerfile
+++ b/docker/presets/inference/tfs-onnx/Dockerfile
@@ -4,12 +4,15 @@ FROM mcr.microsoft.com/aifx/acpt/stable-ubuntu2004-cu118-py38-torch211
 ARG WEIGHTS_PATH
 ARG MODEL_TYPE
 ARG VERSION
+ARG IMAGE_NAME
+ARG MODEL_VERSION
 
 # Set the working directory
 WORKDIR /workspace/tfs
 
-# Write the version to a file
-RUN echo $VERSION > /workspace/tfs/version.txt
+# Write metadata to model_info.json file
+RUN MODEL_VERSION_HASH="${MODEL_VERSION##*/}" && \
+    echo "{\"Model Type\": \"$MODEL_TYPE\", \"Version\": \"$VERSION\", \"Image Name\": \"$IMAGE_NAME\", \"Model Version URL\": \"$MODEL_VERSION\", \"REVISION_ID\": \"$MODEL_VERSION_HASH\"}" > /workspace/tfs/model_info.json
 
 # First, copy just the requirements.txt file and install dependencies
 # This is done before copying the code to utilize Docker's layer caching and
diff --git a/docker/presets/inference/tfs/Dockerfile b/docker/presets/inference/tfs/Dockerfile
index 5a322b8bd..863e40728 100644
--- a/docker/presets/inference/tfs/Dockerfile
+++ b/docker/presets/inference/tfs/Dockerfile
@@ -3,12 +3,15 @@ FROM python:3.10-slim
 ARG WEIGHTS_PATH
 ARG MODEL_TYPE
 ARG VERSION
+ARG IMAGE_NAME
+ARG MODEL_VERSION
 
 # Set the working directory
 WORKDIR /workspace/tfs
 
-# Write the version to a file
-RUN echo $VERSION > /workspace/tfs/version.txt
+# Write metadata to model_info.json file
+RUN MODEL_VERSION_HASH="${MODEL_VERSION##*/}" && \
+    echo "{\"Model Type\": \"$MODEL_TYPE\", \"Version\": \"$VERSION\", \"Image Name\": \"$IMAGE_NAME\", \"Model Version URL\": \"$MODEL_VERSION\", \"REVISION_ID\": \"$MODEL_VERSION_HASH\"}" > /workspace/tfs/model_info.json
 
 # First, copy just the preset files and install dependencies
 # This is done before copying the code to utilize Docker's layer caching and
diff --git a/presets/inference/llama2-chat/inference_api.py b/presets/inference/llama2-chat/inference_api.py
index 11776bf3d..a91786e53 100644
--- a/presets/inference/llama2-chat/inference_api.py
+++ b/presets/inference/llama2-chat/inference_api.py
@@ -8,6 +8,7 @@
 import signal
 import sys
 import threading
+import json
 from typing import Optional
 
 import GPUtil
@@ -18,6 +19,9 @@
 from llama import Llama
 from pydantic import BaseModel
 
+# Constants
+MODEL_INFO = "model_info.json"
+
 # Setup argparse
 parser = argparse.ArgumentParser(description="Llama API server.")
 parser.add_argument("--ckpt_dir", default="weights/", help="Checkpoint directory.")
@@ -191,6 +195,13 @@ def get_metrics():
         except Exception as e:
             return {"error": str(e)}
 
+    @app_main.get("/version")
+    def get_version():
+        with open(f"/workspace/llama/{MODEL_INFO}", "r") as f:
+            model_info = json.load(f)
+
+        return model_info
+
 def setup_worker_routes():
     @app_worker.get("/healthz")
     def health_check():
diff --git a/presets/inference/llama2-completion/inference_api.py b/presets/inference/llama2-completion/inference_api.py
index cf500146a..f29ba91e1 100644
--- a/presets/inference/llama2-completion/inference_api.py
+++ b/presets/inference/llama2-completion/inference_api.py
@@ -8,6 +8,7 @@
 import signal
 import sys
 import threading
+import json
 from typing import Optional
 
 import GPUtil
@@ -18,6 +19,9 @@
 from llama import Llama
 from pydantic import BaseModel
 
+# Constants
+MODEL_INFO = "model_info.json"
+
 # Setup argparse
 parser = argparse.ArgumentParser(description="Llama API server.")
 parser.add_argument("--ckpt_dir", default="weights/", help="Checkpoint directory.")
@@ -180,6 +184,13 @@ def get_metrics():
         except Exception as e:
             return {"error": str(e)}
 
+    @app_main.get("/version")
+    def get_version():
+        with open(f"/workspace/tfs/{MODEL_INFO}", "r") as f:
+            model_info = json.load(f)
+
+        return model_info
+
 def setup_worker_routes(): 
     @app_worker.get("/healthz")
     def health_check():
diff --git a/presets/inference/text-generation/api_spec.json b/presets/inference/text-generation/api_spec.json
index 480fa97e4..8cdb9c16d 100644
--- a/presets/inference/text-generation/api_spec.json
+++ b/presets/inference/text-generation/api_spec.json
@@ -1,599 +1,658 @@
 {
     "openapi": "3.1.0",
     "info": {
-        "title": "FastAPI",
-        "version": "0.1.0"
+      "title": "FastAPI",
+      "version": "0.1.0"
     },
     "paths": {
-        "/": {
-            "get": {
-                "summary": "Home Endpoint",
-                "description": "A simple endpoint that indicates the server is running.\nNo parameters are required. Returns a message indicating the server status.",
-                "operationId": "home__get",
-                "responses": {
-                    "200": {
-                        "description": "Successful Response",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/HomeResponse"
-                                }
-                            }
-                        }
-                    }
+      "/": {
+        "get": {
+          "summary": "Home Endpoint",
+          "description": "A simple endpoint that indicates the server is running.\nNo parameters are required. Returns a message indicating the server status.",
+          "operationId": "home__get",
+          "responses": {
+            "200": {
+              "description": "Successful Response",
+              "content": {
+                "application/json": {
+                  "schema": {
+                    "$ref": "#/components/schemas/HomeResponse"
+                  }
                 }
+              }
             }
-        },
-        "/healthz": {
-            "get": {
-                "summary": "Health Check Endpoint",
-                "operationId": "health_check_healthz_get",
-                "responses": {
-                    "200": {
-                        "description": "Successful Response",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/HealthStatus"
-                                },
-                                "example": {
-                                    "status": "Healthy"
-                                }
-                            }
-                        }
-                    },
-                    "500": {
-                        "description": "Error Response",
-                        "content": {
-                            "application/json": {
-                                "examples": {
-                                    "model_uninitialized": {
-                                        "summary": "Model not initialized",
-                                        "value": {
-                                            "detail": "Model not initialized"
-                                        }
-                                    },
-                                    "pipeline_uninitialized": {
-                                        "summary": "Pipeline not initialized",
-                                        "value": {
-                                            "detail": "Pipeline not initialized"
-                                        }
-                                    }
-                                }
-                            }
-                        }
+          }
+        }
+      },
+      "/healthz": {
+        "get": {
+          "summary": "Health Check Endpoint",
+          "operationId": "health_check_healthz_get",
+          "responses": {
+            "200": {
+              "description": "Successful Response",
+              "content": {
+                "application/json": {
+                  "schema": {
+                    "$ref": "#/components/schemas/HealthStatus"
+                  },
+                  "example": {
+                    "status": "Healthy"
+                  }
+                }
+              }
+            },
+            "500": {
+              "description": "Error Response",
+              "content": {
+                "application/json": {
+                  "examples": {
+                    "model_uninitialized": {
+                      "summary": "Model not initialized",
+                      "value": {
+                        "detail": "Model not initialized"
+                      }
+                    },
+                    "pipeline_uninitialized": {
+                      "summary": "Pipeline not initialized",
+                      "value": {
+                        "detail": "Pipeline not initialized"
+                      }
                     }
+                  }
                 }
+              }
             }
-        },
-        "/chat": {
-            "post": {
-                "summary": "Chat Endpoint",
-                "description": "Processes chat requests, generating text based on the specified pipeline (text generation or conversational).\nValidates required parameters based on the pipeline and returns the generated text.",
-                "operationId": "generate_text_chat_post",
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/UnifiedRequestModel"
-                            },
-                            "examples": {
-                                "text_generation_example": {
-                                    "summary": "Text Generation Example",
-                                    "description": "An example of a text generation request.",
-                                    "value": {
-                                        "prompt": "Tell me a joke",
-                                        "return_full_text": true,
-                                        "clean_up_tokenization_spaces": false,
-                                        "generate_kwargs": {
-                                            "max_length": 200,
-                                            "min_length": 0,
-                                            "do_sample": true,
-                                            "early_stopping": false,
-                                            "num_beams": 1,
-                                            "temperature": 1,
-                                            "top_k": 10,
-                                            "top_p": 1,
-                                            "typical_p": 1,
-                                            "repetition_penalty": 1,
-                                            "eos_token_id": 11
-                                        }
-                                    }
-                                },
-                                "conversation_example": {
-                                    "summary": "Conversation Example",
-                                    "description": "An example of a conversational request.",
-                                    "value": {
-                                        "messages": [
-                                            {
-                                                "role": "user",
-                                                "content": "What is your favourite condiment?"
-                                            },
-                                            {
-                                                "role": "assistant",
-                                                "content": "Well, im quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever im cooking up in the kitchen!"
-                                            },
-                                            {
-                                                "role": "user",
-                                                "content": "Do you have mayonnaise recipes?"
-                                            }
-                                        ],
-                                        "return_full_text": true,
-                                        "clean_up_tokenization_spaces": false,
-                                        "generate_kwargs": {
-                                            "max_length": 200,
-                                            "min_length": 0,
-                                            "do_sample": true,
-                                            "early_stopping": false,
-                                            "num_beams": 1,
-                                            "temperature": 1,
-                                            "top_k": 10,
-                                            "top_p": 1,
-                                            "typical_p": 1,
-                                            "repetition_penalty": 1,
-                                            "eos_token_id": 11
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                    },
-                    "required": true
+          }
+        }
+      },
+      "/chat": {
+        "post": {
+          "summary": "Chat Endpoint",
+          "description": "Processes chat requests, generating text based on the specified pipeline (text generation or conversational).\nValidates required parameters based on the pipeline and returns the generated text.",
+          "operationId": "generate_text_chat_post",
+          "requestBody": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/UnifiedRequestModel"
                 },
-                "responses": {
-                    "200": {
-                        "description": "Successful Response",
-                        "content": {
-                            "application/json": {
-                                "schema": {},
-                                "examples": {
-                                    "text_generation": {
-                                        "summary": "Text Generation Response",
-                                        "value": {
-                                            "Result": "Generated text based on the prompt."
-                                        }
-                                    },
-                                    "conversation": {
-                                        "summary": "Conversation Response",
-                                        "value": {
-                                            "Result": "Response to the last message in the conversation."
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "description": "Validation Error",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ErrorResponse"
-                                },
-                                "examples": {
-                                    "missing_prompt": {
-                                        "summary": "Missing Prompt",
-                                        "value": {
-                                            "detail": "Text generation parameter prompt required"
-                                        }
-                                    },
-                                    "missing_messages": {
-                                        "summary": "Missing Messages",
-                                        "value": {
-                                            "detail": "Conversational parameter messages required"
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                    },
-                    "422": {
-                        "description": "Validation Error",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/HTTPValidationError"
-                                }
-                            }
-                        }
-                    },
-                    "500": {
-                        "description": "Internal Server Error",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ErrorResponse"
-                                }
-                            }
+                "examples": {
+                  "text_generation_example": {
+                    "summary": "Text Generation Example",
+                    "description": "An example of a text generation request.",
+                    "value": {
+                      "prompt": "Tell me a joke",
+                      "return_full_text": true,
+                      "clean_up_tokenization_spaces": false,
+                      "generate_kwargs": {
+                        "max_length": 200,
+                        "min_length": 0,
+                        "do_sample": true,
+                        "early_stopping": false,
+                        "num_beams": 1,
+                        "temperature": 1,
+                        "top_k": 10,
+                        "top_p": 1,
+                        "typical_p": 1,
+                        "repetition_penalty": 1,
+                        "eos_token_id": 11
+                      }
+                    }
+                  },
+                  "conversation_example": {
+                    "summary": "Conversation Example",
+                    "description": "An example of a conversational request.",
+                    "value": {
+                      "messages": [
+                        {
+                          "role": "user",
+                          "content": "What is your favourite condiment?"
+                        },
+                        {
+                          "role": "assistant",
+                          "content": "Well, im quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever im cooking up in the kitchen!"
+                        },
+                        {
+                          "role": "user",
+                          "content": "Do you have mayonnaise recipes?"
                         }
+                      ],
+                      "return_full_text": true,
+                      "clean_up_tokenization_spaces": false,
+                      "generate_kwargs": {
+                        "max_length": 200,
+                        "min_length": 0,
+                        "do_sample": true,
+                        "early_stopping": false,
+                        "num_beams": 1,
+                        "temperature": 1,
+                        "top_k": 10,
+                        "top_p": 1,
+                        "typical_p": 1,
+                        "repetition_penalty": 1,
+                        "eos_token_id": 11
+                      }
+                    }
+                  }
+                }
+              }
+            },
+            "required": true
+          },
+          "responses": {
+            "200": {
+              "description": "Successful Response",
+              "content": {
+                "application/json": {
+                  "schema": {},
+                  "examples": {
+                    "text_generation": {
+                      "summary": "Text Generation Response",
+                      "value": {
+                        "Result": "Generated text based on the prompt."
+                      }
+                    },
+                    "conversation": {
+                      "summary": "Conversation Response",
+                      "value": {
+                        "Result": "Response to the last message in the conversation."
+                      }
+                    }
+                  }
+                }
+              }
+            },
+            "400": {
+              "description": "Validation Error",
+              "content": {
+                "application/json": {
+                  "schema": {
+                    "$ref": "#/components/schemas/ErrorResponse"
+                  },
+                  "examples": {
+                    "missing_prompt": {
+                      "summary": "Missing Prompt",
+                      "value": {
+                        "detail": "Text generation parameter prompt required"
+                      }
+                    },
+                    "missing_messages": {
+                      "summary": "Missing Messages",
+                      "value": {
+                        "detail": "Conversational parameter messages required"
+                      }
                     }
+                  }
                 }
+              }
+            },
+            "422": {
+              "description": "Validation Error",
+              "content": {
+                "application/json": {
+                  "schema": {
+                    "$ref": "#/components/schemas/HTTPValidationError"
+                  }
+                }
+              }
+            },
+            "500": {
+              "description": "Internal Server Error",
+              "content": {
+                "application/json": {
+                  "schema": {
+                    "$ref": "#/components/schemas/ErrorResponse"
+                  }
+                }
+              }
             }
-        },
-        "/metrics": {
-            "get": {
-                "summary": "Metrics Endpoint",
-                "description": "Provides system metrics, including GPU details if available, or CPU and memory usage otherwise.\nUseful for monitoring the resource utilization of the server running the ML models.",
-                "operationId": "get_metrics_metrics_get",
-                "responses": {
-                    "200": {
-                        "description": "Successful Response",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/MetricsResponse"
-                                },
-                                "examples": {
-                                    "gpu_metrics": {
-                                        "summary": "Example when GPUs are available",
-                                        "value": {
-                                            "gpu_info": [
-                                                {
-                                                    "id": "GPU-1234",
-                                                    "name": "GeForce GTX 950",
-                                                    "load": "25.00%",
-                                                    "temperature": "55 C",
-                                                    "memory": {
-                                                        "used": "1.00 GB",
-                                                        "total": "2.00 GB"
-                                                    }
-                                                }
-                                            ]
-                                        }
-                                    },
-                                    "cpu_metrics": {
-                                        "summary": "Example when only CPU is available",
-                                        "value": {
-                                            "cpu_info": {
-                                                "load_percentage": 20,
-                                                "physical_cores": 4,
-                                                "total_cores": 8,
-                                                "memory": {
-                                                    "used": "4.00 GB",
-                                                    "total": "16.00 GB"
-                                                }
-                                            }
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                    },
-                    "500": {
-                        "description": "Internal Server Error",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ErrorResponse"
-                                }
+          }
+        }
+      },
+      "/metrics": {
+        "get": {
+          "summary": "Metrics Endpoint",
+          "description": "Provides system metrics, including GPU details if available, or CPU and memory usage otherwise.\nUseful for monitoring the resource utilization of the server running the ML models.",
+          "operationId": "get_metrics_metrics_get",
+          "responses": {
+            "200": {
+              "description": "Successful Response",
+              "content": {
+                "application/json": {
+                  "schema": {
+                    "$ref": "#/components/schemas/MetricsResponse"
+                  },
+                  "examples": {
+                    "gpu_metrics": {
+                      "summary": "Example when GPUs are available",
+                      "value": {
+                        "gpu_info": [
+                          {
+                            "id": "GPU-1234",
+                            "name": "GeForce GTX 950",
+                            "load": "25.00%",
+                            "temperature": "55 C",
+                            "memory": {
+                              "used": "1.00 GB",
+                              "total": "2.00 GB"
                             }
+                          }
+                        ]
+                      }
+                    },
+                    "cpu_metrics": {
+                      "summary": "Example when only CPU is available",
+                      "value": {
+                        "cpu_info": {
+                          "load_percentage": 20,
+                          "physical_cores": 4,
+                          "total_cores": 8,
+                          "memory": {
+                            "used": "4.00 GB",
+                            "total": "16.00 GB"
+                          }
                         }
+                      }
                     }
+                  }
+                }
+              }
+            },
+            "500": {
+              "description": "Internal Server Error",
+              "content": {
+                "application/json": {
+                  "schema": {
+                    "$ref": "#/components/schemas/ErrorResponse"
+                  }
                 }
+              }
             }
+          }
         }
-    },
-    "components": {
-        "schemas": {
-            "CPUInfo": {
-                "properties": {
-                    "load_percentage": {
-                        "type": "number",
-                        "title": "Load Percentage"
-                    },
-                    "physical_cores": {
-                        "type": "integer",
-                        "title": "Physical Cores"
-                    },
-                    "total_cores": {
-                        "type": "integer",
-                        "title": "Total Cores"
-                    },
-                    "memory": {
-                        "$ref": "#/components/schemas/MemoryInfo"
+      },
+      "/version": {
+        "get": {
+          "summary": "Get Model Information",
+          "description": "Reads and returns model version information from a predefined JSON file.",
+          "operationId": "get_version_version_get",
+          "responses": {
+            "200": {
+              "description": "Successful Response",
+              "content": {
+                "application/json": {
+                  "schema": {},
+                  "examples": {
+                    "model_info": {
+                      "summary": "Model Information Response",
+                      "value": {
+                        "Model Type": "Your Model Type",
+                        "Version": "1.0.0",
+                        "Image Name": "model_image_name",
+                        "Model Version URL": "http://example.com/model/version",
+                        "REVISION_ID": "revision_hash"
+                      }
                     }
-                },
-                "type": "object",
-                "required": [
-                    "load_percentage",
-                    "physical_cores",
-                    "total_cores",
-                    "memory"
-                ],
-                "title": "CPUInfo"
-            },
-            "ErrorResponse": {
-                "properties": {
-                    "detail": {
-                        "type": "string",
-                        "title": "Detail"
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "detail"
-                ],
-                "title": "ErrorResponse"
-            },
-            "GPUInfo": {
-                "properties": {
-                    "id": {
-                        "type": "string",
-                        "title": "Id"
-                    },
-                    "name": {
-                        "type": "string",
-                        "title": "Name"
-                    },
-                    "load": {
-                        "type": "string",
-                        "title": "Load"
-                    },
-                    "temperature": {
-                        "type": "string",
-                        "title": "Temperature"
-                    },
-                    "memory": {
-                        "$ref": "#/components/schemas/MemoryInfo"
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "id",
-                    "name",
-                    "load",
-                    "temperature",
-                    "memory"
-                ],
-                "title": "GPUInfo"
-            },
-            "GenerateKwargs": {
-                "properties": {
-                    "max_length": {
-                        "type": "integer",
-                        "title": "Max Length",
-                        "default": 200
-                    },
-                    "min_length": {
-                        "type": "integer",
-                        "title": "Min Length",
-                        "default": 0
-                    },
-                    "do_sample": {
-                        "type": "boolean",
-                        "title": "Do Sample",
-                        "default": true
-                    },
-                    "early_stopping": {
-                        "type": "boolean",
-                        "title": "Early Stopping",
-                        "default": false
-                    },
-                    "num_beams": {
-                        "type": "integer",
-                        "title": "Num Beams",
-                        "default": 1
-                    },
-                    "temperature": {
-                        "type": "number",
-                        "title": "Temperature",
-                        "default": 1
-                    },
-                    "top_k": {
-                        "type": "integer",
-                        "title": "Top K",
-                        "default": 10
-                    },
-                    "top_p": {
-                        "type": "number",
-                        "title": "Top P",
-                        "default": 1
-                    },
-                    "typical_p": {
-                        "type": "number",
-                        "title": "Typical P",
-                        "default": 1
-                    },
-                    "repetition_penalty": {
-                        "type": "number",
-                        "title": "Repetition Penalty",
-                        "default": 1
-                    },
-                    "pad_token_id": {
-                        "type": "integer",
-                        "title": "Pad Token Id"
-                    },
-                    "eos_token_id": {
-                        "type": "integer",
-                        "title": "Eos Token Id",
-                        "default": 11
-                    }
-                },
-                "type": "object",
-                "title": "GenerateKwargs",
-                "example": {
-                    "max_length": 200,
-                    "temperature": 0.7,
-                    "top_p": 0.9,
-                    "additional_param": "Example value"
+                  }
                 }
+              }
             },
-            "HTTPValidationError": {
-                "properties": {
-                    "detail": {
-                        "items": {
-                            "$ref": "#/components/schemas/ValidationError"
-                        },
-                        "type": "array",
-                        "title": "Detail"
-                    }
-                },
-                "type": "object",
-                "title": "HTTPValidationError"
-            },
-            "HealthStatus": {
-                "properties": {
-                    "status": {
-                        "type": "string",
-                        "title": "Status",
-                        "example": "Healthy"
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "status"
-                ],
-                "title": "HealthStatus"
-            },
-            "HomeResponse": {
-                "properties": {
-                    "message": {
-                        "type": "string",
-                        "title": "Message",
-                        "example": "Server is running"
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "message"
-                ],
-                "title": "HomeResponse"
-            },
-            "MemoryInfo": {
-                "properties": {
-                    "used": {
-                        "type": "string",
-                        "title": "Used"
-                    },
-                    "total": {
-                        "type": "string",
-                        "title": "Total"
+            "404": {
+              "description": "Model Info Not Found",
+              "content": {
+                "application/json": {
+                  "examples": {
+                    "file_not_found": {
+                      "summary": "Model Info File Not Found",
+                      "value": {
+                        "detail": "/workspace/tfs/model_info.json file not found."
+                      }
                     }
-                },
-                "type": "object",
-                "required": [
-                    "used",
-                    "total"
-                ],
-                "title": "MemoryInfo"
-            },
-            "Message": {
-                "properties": {
-                    "role": {
-                        "type": "string",
-                        "title": "Role"
-                    },
-                    "content": {
-                        "type": "string",
-                        "title": "Content"
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "role",
-                    "content"
-                ],
-                "title": "Message"
-            },
-            "MetricsResponse": {
-                "properties": {
-                    "gpu_info": {
-                        "items": {
-                            "$ref": "#/components/schemas/GPUInfo"
-                        },
-                        "type": "array",
-                        "title": "Gpu Info"
-                    },
-                    "cpu_info": {
-                        "$ref": "#/components/schemas/CPUInfo"
-                    }
-                },
-                "type": "object",
-                "title": "MetricsResponse"
-            },
-            "UnifiedRequestModel": {
-                "properties": {
-                    "prompt": {
-                        "type": "string",
-                        "title": "Prompt",
-                        "description": "Prompt for text generation. Required for text-generation pipeline. Do not use with 'messages'."
-                    },
-                    "return_full_text": {
-                        "type": "boolean",
-                        "title": "Return Full Text",
-                        "description": "Return full text if True, else only added text",
-                        "default": true
-                    },
-                    "clean_up_tokenization_spaces": {
-                        "type": "boolean",
-                        "title": "Clean Up Tokenization Spaces",
-                        "description": "Clean up extra spaces in text output",
-                        "default": false
-                    },
-                    "prefix": {
-                        "type": "string",
-                        "title": "Prefix",
-                        "description": "Prefix added to prompt"
-                    },
-                    "handle_long_generation": {
-                        "type": "string",
-                        "title": "Handle Long Generation",
-                        "description": "Strategy to handle long generation"
-                    },
-                    "generate_kwargs": {
-                        "allOf": [
-                            {
-                                "$ref": "#/components/schemas/GenerateKwargs"
-                            }
-                        ],
-                        "title": "Generate Kwargs",
-                        "description": "Additional kwargs for generate method"
-                    },
-                    "messages": {
-                        "items": {
-                            "$ref": "#/components/schemas/Message"
-                        },
-                        "type": "array",
-                        "title": "Messages",
-                        "description": "Messages for conversational model. Required for conversational pipeline. Do not use with 'prompt'."
-                    }
-                },
-                "type": "object",
-                "title": "UnifiedRequestModel"
-            },
-            "ValidationError": {
-                "properties": {
-                    "loc": {
-                        "items": {
-                            "anyOf": [
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "integer"
-                                }
-                            ]
-                        },
-                        "type": "array",
-                        "title": "Location"
-                    },
-                    "msg": {
-                        "type": "string",
-                        "title": "Message"
-                    },
-                    "type": {
-                        "type": "string",
-                        "title": "Error Type"
+                  }
+                }
+              }
+            },
+            "500": {
+              "description": "Internal Server Error",
+              "content": {
+                "application/json": {
+                  "examples": {
+                    "unexpected_error": {
+                      "summary": "Unexpected Error",
+                      "value": {
+                        "detail": "An unexpected error occurred on the server."
+                      }
                     }
-                },
-                "type": "object",
-                "required": [
-                    "loc",
-                    "msg",
-                    "type"
-                ],
-                "title": "ValidationError"
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "components": {
+      "schemas": {
+        "CPUInfo": {
+          "properties": {
+            "load_percentage": {
+              "type": "number",
+              "title": "Load Percentage"
+            },
+            "physical_cores": {
+              "type": "integer",
+              "title": "Physical Cores"
+            },
+            "total_cores": {
+              "type": "integer",
+              "title": "Total Cores"
+            },
+            "memory": {
+              "$ref": "#/components/schemas/MemoryInfo"
+            }
+          },
+          "type": "object",
+          "required": [
+            "load_percentage",
+            "physical_cores",
+            "total_cores",
+            "memory"
+          ],
+          "title": "CPUInfo"
+        },
+        "ErrorResponse": {
+          "properties": {
+            "detail": {
+              "type": "string",
+              "title": "Detail"
+            }
+          },
+          "type": "object",
+          "required": [
+            "detail"
+          ],
+          "title": "ErrorResponse"
+        },
+        "GPUInfo": {
+          "properties": {
+            "id": {
+              "type": "string",
+              "title": "Id"
+            },
+            "name": {
+              "type": "string",
+              "title": "Name"
+            },
+            "load": {
+              "type": "string",
+              "title": "Load"
+            },
+            "temperature": {
+              "type": "string",
+              "title": "Temperature"
+            },
+            "memory": {
+              "$ref": "#/components/schemas/MemoryInfo"
+            }
+          },
+          "type": "object",
+          "required": [
+            "id",
+            "name",
+            "load",
+            "temperature",
+            "memory"
+          ],
+          "title": "GPUInfo"
+        },
+        "GenerateKwargs": {
+          "properties": {
+            "max_length": {
+              "type": "integer",
+              "title": "Max Length",
+              "default": 200
+            },
+            "min_length": {
+              "type": "integer",
+              "title": "Min Length",
+              "default": 0
+            },
+            "do_sample": {
+              "type": "boolean",
+              "title": "Do Sample",
+              "default": true
+            },
+            "early_stopping": {
+              "type": "boolean",
+              "title": "Early Stopping",
+              "default": false
+            },
+            "num_beams": {
+              "type": "integer",
+              "title": "Num Beams",
+              "default": 1
+            },
+            "temperature": {
+              "type": "number",
+              "title": "Temperature",
+              "default": 1
+            },
+            "top_k": {
+              "type": "integer",
+              "title": "Top K",
+              "default": 10
+            },
+            "top_p": {
+              "type": "number",
+              "title": "Top P",
+              "default": 1
+            },
+            "typical_p": {
+              "type": "number",
+              "title": "Typical P",
+              "default": 1
+            },
+            "repetition_penalty": {
+              "type": "number",
+              "title": "Repetition Penalty",
+              "default": 1
+            },
+            "pad_token_id": {
+              "type": "integer",
+              "title": "Pad Token Id"
+            },
+            "eos_token_id": {
+              "type": "integer",
+              "title": "Eos Token Id",
+              "default": 11
+            }
+          },
+          "type": "object",
+          "title": "GenerateKwargs",
+          "example": {
+            "max_length": 200,
+            "temperature": 0.7,
+            "top_p": 0.9,
+            "additional_param": "Example value"
+          }
+        },
+        "HTTPValidationError": {
+          "properties": {
+            "detail": {
+              "items": {
+                "$ref": "#/components/schemas/ValidationError"
+              },
+              "type": "array",
+              "title": "Detail"
+            }
+          },
+          "type": "object",
+          "title": "HTTPValidationError"
+        },
+        "HealthStatus": {
+          "properties": {
+            "status": {
+              "type": "string",
+              "title": "Status",
+              "example": "Healthy"
+            }
+          },
+          "type": "object",
+          "required": [
+            "status"
+          ],
+          "title": "HealthStatus"
+        },
+        "HomeResponse": {
+          "properties": {
+            "message": {
+              "type": "string",
+              "title": "Message",
+              "example": "Server is running"
+            }
+          },
+          "type": "object",
+          "required": [
+            "message"
+          ],
+          "title": "HomeResponse"
+        },
+        "MemoryInfo": {
+          "properties": {
+            "used": {
+              "type": "string",
+              "title": "Used"
+            },
+            "total": {
+              "type": "string",
+              "title": "Total"
+            }
+          },
+          "type": "object",
+          "required": [
+            "used",
+            "total"
+          ],
+          "title": "MemoryInfo"
+        },
+        "Message": {
+          "properties": {
+            "role": {
+              "type": "string",
+              "title": "Role"
+            },
+            "content": {
+              "type": "string",
+              "title": "Content"
+            }
+          },
+          "type": "object",
+          "required": [
+            "role",
+            "content"
+          ],
+          "title": "Message"
+        },
+        "MetricsResponse": {
+          "properties": {
+            "gpu_info": {
+              "items": {
+                "$ref": "#/components/schemas/GPUInfo"
+              },
+              "type": "array",
+              "title": "Gpu Info"
+            },
+            "cpu_info": {
+              "$ref": "#/components/schemas/CPUInfo"
+            }
+          },
+          "type": "object",
+          "title": "MetricsResponse"
+        },
+        "UnifiedRequestModel": {
+          "properties": {
+            "prompt": {
+              "type": "string",
+              "title": "Prompt",
+              "description": "Prompt for text generation. Required for text-generation pipeline. Do not use with 'messages'."
+            },
+            "return_full_text": {
+              "type": "boolean",
+              "title": "Return Full Text",
+              "description": "Return full text if True, else only added text",
+              "default": true
+            },
+            "clean_up_tokenization_spaces": {
+              "type": "boolean",
+              "title": "Clean Up Tokenization Spaces",
+              "description": "Clean up extra spaces in text output",
+              "default": false
+            },
+            "prefix": {
+              "type": "string",
+              "title": "Prefix",
+              "description": "Prefix added to prompt"
+            },
+            "handle_long_generation": {
+              "type": "string",
+              "title": "Handle Long Generation",
+              "description": "Strategy to handle long generation"
+            },
+            "generate_kwargs": {
+              "allOf": [
+                {
+                  "$ref": "#/components/schemas/GenerateKwargs"
+                }
+              ],
+              "title": "Generate Kwargs",
+              "description": "Additional kwargs for generate method"
+            },
+            "messages": {
+              "items": {
+                "$ref": "#/components/schemas/Message"
+              },
+              "type": "array",
+              "title": "Messages",
+              "description": "Messages for conversational model. Required for conversational pipeline. Do not use with 'prompt'."
+            }
+          },
+          "type": "object",
+          "title": "UnifiedRequestModel"
+        },
+        "ValidationError": {
+          "properties": {
+            "loc": {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "string"
+                  },
+                  {
+                    "type": "integer"
+                  }
+                ]
+              },
+              "type": "array",
+              "title": "Location"
+            },
+            "msg": {
+              "type": "string",
+              "title": "Message"
+            },
+            "type": {
+              "type": "string",
+              "title": "Error Type"
             }
+          },
+          "type": "object",
+          "required": [
+            "loc",
+            "msg",
+            "type"
+          ],
+          "title": "ValidationError"
         }
+      }
     }
-}
\ No newline at end of file
+  }
\ No newline at end of file
diff --git a/presets/inference/text-generation/inference_api.py b/presets/inference/text-generation/inference_api.py
index c23a15c6b..23dccba5d 100644
--- a/presets/inference/text-generation/inference_api.py
+++ b/presets/inference/text-generation/inference_api.py
@@ -1,5 +1,6 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
+import json
 import os
 from dataclasses import asdict, dataclass, field
 from typing import Annotated, Any, Dict, List, Optional
@@ -15,6 +16,10 @@
 from transformers import (AutoModelForCausalLM, AutoTokenizer,
                           GenerationConfig, HfArgumentParser)
 
+# Constants
+APP_DIR = "/workspace/tfs"
+WEIGHTS_DIR = f"{APP_DIR}/weights"
+MODEL_INFO_FILE = f"{APP_DIR}/model_info.json"
 
 @dataclass
 class ModelConfig:
@@ -22,7 +27,7 @@ class ModelConfig:
     Transformers Model Configuration Parameters
     """
     pipeline: str = field(metadata={"help": "The model pipeline for the pre-trained model"})
-    pretrained_model_name_or_path: Optional[str] = field(default="/workspace/tfs/weights", metadata={"help": "Path to the pretrained model or model identifier from huggingface.co/models"})
+    pretrained_model_name_or_path: Optional[str] = field(default=WEIGHTS_DIR, metadata={"help": "Path to the pretrained model or model identifier from huggingface.co/models"})
     state_dict: Optional[Dict[str, Any]] = field(default=None, metadata={"help": "State dictionary for the model"})
     cache_dir: Optional[str] = field(default=None, metadata={"help": "Cache directory for the model"})
     from_tf: bool = field(default=False, metadata={"help": "Load model from a TensorFlow checkpoint"})
@@ -428,6 +433,74 @@ def get_metrics():
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 
+@app.get(
+    "/version",
+    summary="Get Model Information",
+    response_description="Model Version Information",
+    responses={
+        200: {
+            "description": "Successful Response",
+            "content": {
+                "application/json": {
+                    "examples": {
+                        "model_info": {
+                            "summary": "Model Information Response",
+                            "value": {
+                                "Model Type": "Your Model Type",
+                                "Version": "1.0.0",
+                                "Image Name": "model_image_name",
+                                "Model Version URL": "http://example.com/model/version",
+                                "REVISION_ID": "revision_hash"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        404: {
+            "description": "Model Info Not Found",
+            "content": {
+                "application/json": {
+                    "examples": {
+                        "file_not_found": {
+                            "summary": "Model Info File Not Found",
+                            "value": {"detail": f"{MODEL_INFO_FILE} file not found."}
+                        }
+                    }
+                }
+            }
+        },
+        500: {
+            "description": "Internal Server Error",
+            "content": {
+                "application/json": {
+                    "examples": {
+                        "unexpected_error": {
+                            "summary": "Unexpected Error",
+                            "value": {
+                                "detail": "An unexpected error occurred on the server."
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+)
+def get_version():
+    """
+    Reads and returns model version information from a predefined JSON file.
+    """
+    try:
+        with open(MODEL_INFO_FILE, "r") as f:
+            model_info = json.load(f)
+    except FileNotFoundError:
+        raise HTTPException(status_code=404, detail="model_info.json file not found.")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+    
+    return model_info
+
 if __name__ == "__main__":
     local_rank = int(os.environ.get("LOCAL_RANK", 0)) # Default to 0 if not set
     port = 5000 + local_rank # Adjust port based on local rank
diff --git a/presets/models/supported_models.yaml b/presets/models/supported_models.yaml
index 0441a945a..ae57ff75d 100644
--- a/presets/models/supported_models.yaml
+++ b/presets/models/supported_models.yaml
@@ -3,28 +3,29 @@ models:
   - name: llama-2-7b
     type: llama2-completion
     runtime: llama-2
-    tag: 0.0.3
+    tag: 0.0.4
   - name: llama-2-7b-chat
     type: llama2-chat
     runtime: llama-2
-    tag: 0.0.3
+    tag: 0.0.4
   - name: llama-2-13b
     type: llama2-completion
     runtime: llama-2
-    tag: 0.0.3
+    tag: 0.0.4
   - name: llama-2-13b-chat
     type: llama2-chat
     runtime: llama-2
-    tag: 0.0.3
+    tag: 0.0.4
   - name: llama-2-70b
     type: llama2-completion
     runtime: llama-2
-    tag: 0.0.3
+    tag: 0.0.4
   - name: llama-2-70b-chat
     type: llama2-chat
     runtime: llama-2
-    tag: 0.0.3
+    tag: 0.0.4
     # Tag history:
+    # 0.0.4 - Version endpoint (#297)
     # 0.0.3 - Inference API Cleanup (#233)
     # 0.0.2 - Eliminate Unnecessary Process Group Creation in Worker Initialization (#244)
     # 0.0.1 - Initial Release
@@ -34,28 +35,31 @@ models:
     type: text-generation
     version: https://huggingface.co/tiiuae/falcon-7b/commit/898df1396f35e447d5fe44e0a3ccaaaa69f30d36
     runtime: tfs
-    tag: 0.0.4
+    tag: 0.0.5
   - name: falcon-7b-instruct
     type: text-generation
     version: https://huggingface.co/tiiuae/falcon-7b-instruct/commit/cf4b3c42ce2fdfe24f753f0f0d179202fea59c99
     runtime: tfs
-    tag: 0.0.4
+    tag: 0.0.5
     # Tag history:
+    # 0.0.5 - Version endpoint (#297)
     # 0.0.4 - Adjust default model params (#310)
     # 0.0.3 - Update Default Params (#294)
     # 0.0.2 - Inference API Cleanup (#233)
     # 0.0.1 - Initial Release
+
   - name: falcon-40b
     type: text-generation
     version: https://huggingface.co/tiiuae/falcon-40b/commit/4a70170c215b36a3cce4b4253f6d0612bb7d4146
     runtime: tfs
-    tag: 0.0.5
+    tag: 0.0.6
   - name: falcon-40b-instruct
     type: text-generation
     version: https://huggingface.co/tiiuae/falcon-40b-instruct/commit/ecb78d97ac356d098e79f0db222c9ce7c5d9ee5f
     runtime: tfs
-    tag: 0.0.5
+    tag: 0.0.6
     # Tag history for 40b models:
+    # 0.0.6 - Version endpoint (#297)
     # 0.0.5 - Adjust default model params (#310)
     # 0.0.4 - Skipped due to incomplete upload issue
     # 0.0.3 - Update Default Params (#294)
@@ -67,13 +71,14 @@ models:
     type: text-generation 
     version: https://huggingface.co/mistralai/Mistral-7B-v0.1/commit/26bca36bde8333b5d7f72e9ed20ccda6a618af24
     runtime: tfs
-    tag: 0.0.4
+    tag: 0.0.5
   - name: mistral-7b-instruct
     type: text-generation
     version: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/commit/b70aa86578567ba3301b21c8a27bea4e8f6d6d61
     runtime: tfs
-    tag: 0.0.4
+    tag: 0.0.5
     # Tag history:
+    # 0.0.5 - Version endpoint (#297)
     # 0.0.4 - Adjust default model params (#310)
     # 0.0.3 - Update Default Params (#294)
     # 0.0.2 - Inference API Cleanup (#233)
@@ -84,8 +89,9 @@ models:
     type: text-generation 
     version: https://huggingface.co/microsoft/phi-2/commit/b10c3eba545ad279e7208ee3a5d644566f001670
     runtime: tfs
-    tag: 0.0.3
+    tag: 0.0.4
     # Tag history:
+    # 0.0.4 - Version endpoint (#297)
     # 0.0.3 - Adjust default model params (#310)
     # 0.0.2 - Update Default Params (#294)
     # 0.0.1 - Initial Release