diff --git a/.dapr/appconfig-dev.yaml b/.dapr/appconfig-dev.yaml
new file mode 100644
index 000000000000..b22f553b6877
--- /dev/null
+++ b/.dapr/appconfig-dev.yaml
@@ -0,0 +1,19 @@
+apiVersion: dapr.io/v1alpha1
+kind: Configuration
+metadata:
+  name: appconfig
+  namespace: default
+spec:
+  tracing:
+    samplingRate: "1"
+    stdout: true
+  features:
+    - name: SchedulerReminders
+      enabled: true
+  #    zipkin:
+  #      endpointAddress: http://localhost:9411/api/v2/spans
+  secrets:
+    scopes:
+      - storeName: secretstore-local
+        defaultAccess: allow
+        deniedSecrets: [ ]
diff --git a/.dapr/components/configstore.yaml b/.dapr/components/configstore.yaml
new file mode 100644
index 000000000000..4149f5d9c955
--- /dev/null
+++ b/.dapr/components/configstore.yaml
@@ -0,0 +1,20 @@
+# Reference: https://docs.dapr.io/reference/components-reference/supported-configuration-stores/redis-configuration-store/
+
+apiVersion: dapr.io/v1alpha1
+kind: Component
+metadata:
+  name: configstore
+  namespace: development
+spec:
+  type: configuration.redis
+  metadata:
+    - name: redisHost
+      secretKeyRef:
+        name: REDIS_URI
+        key: REDIS_URI
+    - name: redisPassword
+      secretKeyRef:
+        name: REDIS_PASSWORD
+        key: REDIS_PASSWORD
+auth:
+  secretStore: secretstore-local
\ No newline at end of file
diff --git a/.dapr/components/pubsub-redis.yaml b/.dapr/components/pubsub-redis.yaml
new file mode 100644
index 000000000000..346914a8f510
--- /dev/null
+++ b/.dapr/components/pubsub-redis.yaml
@@ -0,0 +1,25 @@
+# Reference: https://docs.dapr.io/reference/components-reference/supported-pubsub/setup-redis-pubsub/
+
+apiVersion: dapr.io/v1alpha1
+kind: Component
+metadata:
+  name: pubsub-redis
+  namespace: development
+spec:
+  type: pubsub.redis
+  version: v1
+  metadata:
+    - name: redisHost
+      secretKeyRef:
+        name: REDIS_URI
+        key: REDIS_URI
+    - name: redisPassword
+      secretKeyRef:
+        name: REDIS_PASSWORD
+        key: REDIS_PASSWORD
+    - name: consumerID
+      value: "{appID}"
+    - name: concurrency
+      value: "10"
+auth:
+  secretStore: secretstore-local
\ No newline at end of file
diff --git a/.dapr/components/secretstore-env.yaml b/.dapr/components/secretstore-env.yaml
new file mode 100644
index 000000000000..a8689cd3e6e1
--- /dev/null
+++ b/.dapr/components/secretstore-env.yaml
@@ -0,0 +1,13 @@
+# Reference: https://docs.dapr.io/reference/components-reference/supported-secret-stores/file-secret-store/
+
+apiVersion: dapr.io/v1alpha1
+kind: Component
+metadata:
+  name: secretstore-local
+  namespace: development
+spec:
+  type: secretstores.local.env
+  version: v1
+  metadata:
+    - name: prefix
+      value: "SECRETS_"
\ No newline at end of file
diff --git a/.dapr/components/statestore.yaml b/.dapr/components/statestore.yaml
new file mode 100644
index 000000000000..573694eebc40
--- /dev/null
+++ b/.dapr/components/statestore.yaml
@@ -0,0 +1,23 @@
+# Reference: https://docs.dapr.io/reference/components-reference/supported-state-stores/setup-redis/
+
+apiVersion: dapr.io/v1alpha1
+kind: Component
+metadata:
+  name: statestore
+  namespace: development
+spec:
+  type: state.redis
+  version: v1
+  metadata:
+    - name: redisHost
+      secretKeyRef:
+        name: REDIS_URI
+        key: REDIS_URI
+    - name: redisPassword
+      secretKeyRef:
+        name: REDIS_PASSWORD
+        key: REDIS_PASSWORD
+    - name: actorStateStore
+      value: "true"
+auth:
+  secretStore: secretstore-local
\ No newline at end of file
diff --git a/.dapr/components/subscriptions.yaml b/.dapr/components/subscriptions.yaml
new file mode 100644
index 000000000000..7bc9ff3ca6ee
--- /dev/null
+++ b/.dapr/components/subscriptions.yaml
@@ -0,0 +1,12 @@
+apiVersion: dapr.io/v2alpha1
+kind: Subscription
+metadata:
+  name: pubsub-subscription
+spec:
+  topic: budLitellmMessages
+  routes:
+    default: /notifications
+  pubsubname: pubsub-redis
+  deadLetterTopic: poisonMessages
+scopes:
+  - budlitellm
\ No newline at end of file
diff --git a/.dockerignore b/.dockerignore
index 929eace5e343..2919658574fb 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -9,3 +9,4 @@ tests
 .devcontainer
 *.tgz
 log.txt
+budlitellm
diff --git a/Dockerfile.bud b/Dockerfile.bud
index e64c89ba763c..3cb2b8f18d85 100644
--- a/Dockerfile.bud
+++ b/Dockerfile.bud
@@ -11,7 +11,7 @@ WORKDIR /app
 
 # Install build dependencies
 RUN apt-get clean && apt-get update && \
-    apt-get install -y gcc python3-dev && \
+    apt-get install -y gcc python3-dev git && \
     rm -rf /var/lib/apt/lists/*
 
 RUN pip install --upgrade pip && \
@@ -21,7 +21,7 @@ RUN pip install --upgrade pip && \
 COPY . .
 
 # Build Admin UI
-RUN chmod +x build_admin_ui.sh && ./build_admin_ui.sh
+RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
 
 # Build the package
 RUN rm -rf dist/* && python -m build
@@ -44,13 +44,13 @@ RUN pip uninstall PyJWT -y
 RUN pip install PyJWT --no-cache-dir
 
 # Build Admin UI
-RUN chmod +x build_admin_ui.sh && ./build_admin_ui.sh
+RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
 
 # Runtime stage
 FROM $LITELLM_RUNTIME_IMAGE AS runtime
 
 # Update dependencies and clean up - handles debian security issue
-RUN apt-get update && apt-get upgrade -y && rm -rf /var/lib/apt/lists/* 
+RUN apt-get update && apt-get install -y git && apt-get upgrade -y && rm -rf /var/lib/apt/lists/* 
 
 WORKDIR /app
 # Copy the current directory contents into the container at /app
@@ -64,10 +64,14 @@ COPY --from=builder /wheels/ /wheels/
 # Install the built wheel using pip; again using a wildcard if it's the only file
 RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
 
+# ensure gptcache is installed
+RUN pip install git+https://github.com/BudEcosystem/BudServeGPTCache.git#egg=gptcache
+RUN pip install git+https://github.com/BudEcosystem/bud-microframe.git#egg=budmicroframe
+
 # Generate prisma client
 RUN prisma generate
-RUN chmod +x entrypoint.sh
+# RUN chmod +x entrypoint.sh
 
-EXPOSE 4000/tcp
+EXPOSE 4010/tcp
 
-ENTRYPOINT ["sh", "-c", "cp /app/config/config.yaml /app && litellm --config /app/config.yaml --port 4000"]
\ No newline at end of file
+# ENTRYPOINT ["sh", "-c", "litellm --config /app/litellm_config.yaml --port 4000"]
\ No newline at end of file
diff --git a/deploy/docker-compose-dev.yaml b/deploy/docker-compose-dev.yaml
new file mode 100644
index 000000000000..05ba6ff68c1e
--- /dev/null
+++ b/deploy/docker-compose-dev.yaml
@@ -0,0 +1,70 @@
+include:
+  - ./docker-compose-redis.yaml
+
+services:
+  bud-litellm-app:
+    image: bud-microframe/$APP_NAME:$NAMESPACE
+    profiles:
+      - app
+    container_name: bud-mf-$NAMESPACE-$APP_NAME
+    build:
+      context: ..
+      dockerfile: ./Dockerfile.bud
+    # command: [ "litellm", "--config", "/app/litellm_config.yaml", "--port", $APP_PORT ]
+    # command: sh -c "cd litellm/proxy && litellm --config /app/litellm_config.yaml --port $APP_PORT"
+    command: sh -c "tail -f /dev/null"
+    # command : sh -c "alembic -c ./budcluster/alembic.ini upgrade head && tail -f /dev/null" # && uvicorn $APP_NAME.main:app --host 0.0.0.0 --port $APP_PORT --reload"
+    # ports:
+    #   - "$DAPR_GRPC_PORT:$DAPR_GRPC_PORT" # Dapr instances communicate over gRPC so gRPC port needs to be exposed
+    #   - "$DAPR_HTTP_PORT:$DAPR_HTTP_PORT" # Expose Dapr HTTP port for service invocation
+    #   - "$APP_PORT:$APP_PORT" # Expose app port for debugging purposes
+    volumes:
+      - ../:/app/
+      - ../cache:/app/cache
+    env_file:
+      - path: ../.env
+        required: true
+    depends_on:
+      - bud-litellm-redis
+      - bud-litellm-placement
+    network_mode: host
+  bud-litellm-sidecar:
+    container_name: bud-mf-$NAMESPACE-$APP_NAME-dapr
+    image: "daprio/daprd:edge"
+    command: [
+      "./daprd",
+      "--app-id", "$APP_NAME",
+      "--app-port", "$APP_PORT",
+      "--dapr-http-port", "$DAPR_HTTP_PORT",
+      "--dapr-grpc-port", "$DAPR_GRPC_PORT",
+      "--placement-host-address", "$DAPR_PLACEMENT_HOST:$DAPR_PLACEMENT_PORT", # Dapr's placement service can be reach via the docker DNS entry
+      "--metrics-port", "$DAPR_METRICS_PORT",
+      "--resources-path", "/components",
+      "--config", "/config/appconfig.yaml",
+      "--log-as-json"
+    ]
+    env_file:
+      - path: ../.env
+        required: true
+    volumes:
+      # - "../crypto-keys:/crypto-keys"
+      - "${DAPR_COMPONENTS:-../.dapr/components/}:/components" # Mount the components folder for the runtime to use. The mounted location must match the --resources-path argument.
+      - "${DAPR_APP_CONFIG:-../.dapr/appconfig-dev.yaml}:/config/appconfig.yaml" # Mount the config file for the runtime to use. The mounted location must match the --config argument.
+      - ./:/app/
+    network_mode: "host"
+    # ports:
+    #   - "$DAPR_GRPC_PORT:$DAPR_GRPC_PORT"
+    #   - "$DAPR_HTTP_PORT:$DAPR_HTTP_PORT"
+    # network_mode: "service:app"
+  bud-litellm-placement:
+    container_name: bud-mf-$NAMESPACE-$APP_NAME-placement
+    image: "daprio/placement:edge"
+    command: [ "./placement", "--port", "$DAPR_PLACEMENT_PORT" ]
+    ports:
+      - "$DAPR_PLACEMENT_PORT:$DAPR_PLACEMENT_PORT"
+    networks:
+      - bud-litellm-network
+networks:
+  bud-litellm-network:
+    name: bud-mf-$NAMESPACE-$APP_NAME
+    driver: bridge
\ No newline at end of file
diff --git a/deploy/docker-compose-redis.yaml b/deploy/docker-compose-redis.yaml
new file mode 100644
index 000000000000..8f8b892ade14
--- /dev/null
+++ b/deploy/docker-compose-redis.yaml
@@ -0,0 +1,28 @@
+services:
+  bud-litellm-redis:
+    container_name: bud-mf-$NAMESPACE-litellm-redis
+    image: redis:alpine
+    ports:
+      - "$REDIS_PORT:$REDIS_PORT"
+    command:
+      - /bin/sh
+      - -c
+      - redis-server --requirepass "${SECRETS_REDIS_PASSWORD:?REDIS_PASSWORD variable is not set}" --port ${REDIS_PORT}
+    sysctls:
+      net.core.somaxconn: 1024
+    healthcheck:
+      test: [ "CMD", "redis-cli", "--raw", "incr", "ping" ]
+    volumes:
+      - budlitellm_redis_data:/data
+    # networks:
+    #   - bud-microframe-nw
+    # network_mode: "host"
+    networks:
+      - bud-litellm-network
+
+volumes:
+  budlitellm_redis_data:
+networks:
+  bud-litellm-network:
+    name: bud-mf-$NAMESPACE-$APP_NAME
+    driver: bridge
diff --git a/deploy/start_dev.sh b/deploy/start_dev.sh
new file mode 100755
index 000000000000..b11453997ffb
--- /dev/null
+++ b/deploy/start_dev.sh
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+
+#
+#  -----------------------------------------------------------------------------
+#  Copyright (c) 2024 Bud Ecosystem Inc.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#  -----------------------------------------------------------------------------
+#
+
+DAPR_COMPONENTS="../.dapr/components/"
+DAPR_APP_CONFIG="../.dapr/appconfig-dev.yaml"
+
+DOCKER_COMPOSE_FILE="./deploy/docker-compose-dev.yaml"
+BUILD_FLAG=""
+DETACH_FLAG=""
+
+function display_help() {
+    echo "Usage: $0 [options]"
+    echo
+    echo "Options:"
+    echo "  --dapr-components        Set the dapr components folder path, this should be relative to the deploy directory (default: $DAPR_COMPONENTS)"
+    echo "  --dapr-app-config        Set the dapr app config path, this should be relative to the deploy directory (default: $DAPR_APP_CONFIG)"
+    echo "  -f FILE                  Specify the Docker Compose file to use, this should be relative to your current directory (default: $DOCKER_COMPOSE_FILE)"
+    echo "  --build                  Include this flag to force a rebuild of the Docker containers"
+    echo "  --skip-app               Include this flag to skip the app container"
+    echo "  -d                       Include this flag to detach and run the containers in background"
+    echo "  --help                   Display this help message and exit"
+    echo
+    echo "Example:"
+    echo "  $0 -f docker-compose-local.yaml --build"
+    echo "  This will use 'docker-compose-local.yaml' and force a rebuild of the containers."
+    echo
+    exit 0
+}
+
+# Parse arguments
+while [[ "$#" -gt 0 ]]; do
+    case $1 in
+        --dapr-components) DAPR_COMPONENTS="$2"; shift ;;
+        --dapr-app-config) DAPR_APP_CONFIG="$2"; shift ;;
+        -f) DOCKER_COMPOSE_FILE="$2"; shift ;;
+        --build) BUILD_FLAG="--build" ;;
+        --skip-app) SKIP_APP_FLAG="true" ;;
+        -d) DETACH_FLAG="-d" ;;
+        --help) display_help ;;
+        *) echo "Unknown parameter passed: $1"; show_help; exit 1 ;;
+    esac
+    shift
+done
+
+set -a
+source ./.env
+set +a
+
+export REDIS_PORT=$(echo "${SECRETS_REDIS_URI:-redis:6379}" | cut -d':' -f2)
+
+: ${APP_NAME:?Application name is required, use --app-name flag to specify the name.}
+
+# Print the environment variables
+echo "****************************************************"
+echo "*                                                  *"
+echo "*         Starting Microservice Environment        *"
+echo "*                                                  *"
+echo "****************************************************"
+echo ""
+echo "🛠 App Name            : $APP_NAME"
+echo "🌐 App Port             : $APP_PORT"
+echo "🔑 Redis Uri           : $SECRETS_REDIS_URI"
+echo "🌍 Dapr HTTP Port      : $DAPR_HTTP_PORT"
+echo "🌍 Dapr gRPC Port      : $DAPR_GRPC_PORT"
+echo "🛠 Namespace            : $NAMESPACE"
+echo "📊 Log Level           : $LOG_LEVEL"
+echo "🗂 Config Store Name    : $CONFIGSTORE_NAME"
+echo "🔐 Secret Store Name   : $SECRETSTORE_NAME"
+echo "🛠 Dapr Components     : $DAPR_COMPONENTS"
+echo "🛠 Dapr App Config     : $DAPR_APP_CONFIG"
+echo "🛠 Docker Compose File : $DOCKER_COMPOSE_FILE"
+echo "🚀 Build flag          : $BUILD_FLAG"
+echo ""
+echo "****************************************************"
+
+# Bring up Docker Compose
+echo "Bringing up Docker Compose with file: $DOCKER_COMPOSE_FILE"
+if [ -z "$SKIP_APP_FLAG" ]; then
+    docker compose --profile app -f "$DOCKER_COMPOSE_FILE" up $BUILD_FLAG $DETACH_FLAG
+else
+    docker compose -f "$DOCKER_COMPOSE_FILE" up $BUILD_FLAG $DETACH_FLAG
+fi
\ No newline at end of file
diff --git a/deploy/stop_dev.sh b/deploy/stop_dev.sh
new file mode 100755
index 000000000000..456654e9a804
--- /dev/null
+++ b/deploy/stop_dev.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+#
+#  -----------------------------------------------------------------------------
+#  Copyright (c) 2024 Bud Ecosystem Inc.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#  -----------------------------------------------------------------------------
+#
+
+# Default value for Docker Compose file
+DOCKER_COMPOSE_FILE="./deploy/docker-compose-dev.yaml"
+
+# Function to display help message
+function display_help() {
+    echo "Usage: $0 [options]"
+    echo
+    echo "Options:"
+    echo "  -f FILE   Specify the Docker Compose file to use (default: deploy/docker-compose-dev.yaml)"
+    echo "  --help                Display this help message and exit"
+    echo
+    echo "Example:"
+    echo "  $0 -f docker-compose-local.yaml"
+    echo "  This will stop the services using 'docker-compose-local.yaml'."
+    echo
+    exit 0
+}
+
+# Parse optional arguments
+while [[ "$#" -gt 0 ]]; do
+    case $1 in
+        -f) DOCKER_COMPOSE_FILE="$2"; shift ;;
+        --help) display_help ;;
+        *) echo "Unknown parameter: $1"; exit 1 ;;
+    esac
+    shift
+done
+
+set -a
+source ./.env
+set +a
+
+export REDIS_PORT=$(echo "${SECRETS_REDIS_URI:-redis:6379}" | cut -d':' -f2)
+
+# Stop Docker Compose services
+echo "Stopping services defined in: $DOCKER_COMPOSE_FILE"
+docker compose -f "$DOCKER_COMPOSE_FILE" stop
\ No newline at end of file
diff --git a/litellm/__about__.py b/litellm/__about__.py
new file mode 100644
index 000000000000..9c3a5276d863
--- /dev/null
+++ b/litellm/__about__.py
@@ -0,0 +1,19 @@
+#  -----------------------------------------------------------------------------
+#  Copyright (c) 2024 Bud Ecosystem Inc.
+#  #
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#  #
+#      http://www.apache.org/licenses/LICENSE-2.0
+#  #
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#  -----------------------------------------------------------------------------
+
+"""Contains metadata about the package, including version information and author details."""
+
+__version__ = "budlitellm@0.0.1"
diff --git a/litellm/commons/config.py b/litellm/commons/config.py
new file mode 100644
index 000000000000..07d562fd024c
--- /dev/null
+++ b/litellm/commons/config.py
@@ -0,0 +1,86 @@
+#  -----------------------------------------------------------------------------
+#  Copyright (c) 2024 Bud Ecosystem Inc.
+#  #
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#  #
+#      http://www.apache.org/licenses/LICENSE-2.0
+#  #
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#  -----------------------------------------------------------------------------
+
+"""Manages application and secret configurations, utilizing environment variables and Dapr's configuration store for syncing."""
+
+from pathlib import Path
+from typing import Optional
+
+from budmicroframe.commons.config import BaseAppConfig, BaseSecretsConfig, register_settings, enable_periodic_sync_from_store
+from pydantic import DirectoryPath, Field
+
+from litellm.__about__ import __version__
+
+
+class AppConfig(BaseAppConfig):
+    name: str = __version__.split("@")[0]
+    version: str = __version__.split("@")[-1]
+    description: str = Field("Bud-Litellm is a proxy server for LLM requests.", alias="DOCS_DESCRIPTION")
+    api_root: str = Field("", alias="SERVER_ROOT_PATH")
+
+    # Base Directory
+    base_dir: DirectoryPath = Path(__file__).parent.parent.parent.resolve()
+
+    # Bud-Litellm env
+    litellm_log: str = Field("DEBUG", alias="LITELLM_LOG")
+    litellm_master_key: str = Field("sk-1234", alias="LITELLM_MASTER_KEY")
+    litellm_salt_key: str = Field("litellm_salt_key", alias="LITELLM_SALT_KEY")
+    database_url: str = Field(..., alias="DATABASE_URL")
+    store_model_in_db: bool = Field(True, alias="STORE_MODEL_IN_DB")
+    budserve_app_baseurl: str = Field("http://127.0.0.1:8050", alias="BUDSERVE_APP_BASEURL")
+    
+    # Redis Config
+    redis_host: str = Field("localhost", alias="REDIS_HOST")
+    redis_port: int = Field(6379, alias="REDIS_PORT")
+    redis_username: str = Field("", alias="REDIS_USERNAME")
+    redis_password: str = Field("", alias="REDIS_PASSWORD")
+    redis_db: int = Field(0, alias="REDIS_DB")
+    
+    # Cache Config
+    enable_cache: bool = Field(False, alias="ENABLE_CACHE")
+    enable_cache_metric: bool = Field(False, alias="ENABLE_CACHE_METRIC")
+    cache_eviction_policy: str = Field("LRU", alias="CACHE_EVICTION_POLICY")
+    cache_max_size: int = Field(1000, alias="CACHE_MAX_SIZE")
+    cache_ttl: int = Field(3600, alias="CACHE_TTL")
+    cache_score_threshold: float = Field(0.8, alias="CACHE_SCORE_THRESHOLD")
+    cache_embedding_model: str = Field("sentence-transformers/all-MiniLM-L6-v2", alias="CACHE_EMBEDDING_MODEL")
+
+    # Metrics App and Topic
+    budmetrics_app_name: str = Field("budMetrics", alias="BUDMETRICS_APP_NAME")
+    budmetrics_topic_name: str = Field("budMetricsMessages", alias="BUDMETRICS_TOPIC_NAME")
+    
+
+class SecretsConfig(BaseSecretsConfig):
+    name: str = __version__.split("@")[0]
+    version: str = __version__.split("@")[-1]
+    
+    # Database
+    psql_user: Optional[str] = Field(
+        None,
+        alias="PSQL_USER",
+        json_schema_extra=enable_periodic_sync_from_store(is_global=True),
+    )
+    psql_password: Optional[str] = Field(
+        None,
+        alias="PSQL_PASSWORD",
+        json_schema_extra=enable_periodic_sync_from_store(is_global=True),
+    )
+
+
+app_settings = AppConfig()
+secrets_settings = SecretsConfig()
+
+register_settings(app_settings, secrets_settings)
\ No newline at end of file
diff --git a/litellm/custom_callbacks.py b/litellm/custom_callbacks.py
new file mode 100644
index 000000000000..7f6f6067c00e
--- /dev/null
+++ b/litellm/custom_callbacks.py
@@ -0,0 +1,179 @@
+import copy
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Union
+from urllib.parse import urlparse
+from uuid import UUID, uuid4
+
+from litellm.integrations.custom_logger import CustomLogger
+import litellm
+from litellm.commons.config import app_settings
+from litellm._logging import verbose_logger
+from budmicroframe.commons.schemas import CloudEventBase
+from budmicroframe.shared.dapr_service import DaprService
+
+# error in budserve_middleware.py
+# Scenario 1: if user sends wrong api key
+# Scenario 2: if user sends wrong model param
+
+# Keys i won't have:
+# project_id, project_name, endpoint_id, endpoint_name (what user has sent), endpoint_path,
+# model_id, provider, modality, model_name
+# Keys i can set:
+# request_arrival_time == request_forwarded_time == response_start_time == response_end_time
+# request_body, response_body, cost = 0, is_cache_hit = False, is_success = False, is_streaming = False
+
+
+
+class RequestMetrics(CloudEventBase):
+    request_id: UUID
+    request_ip: Optional[str] = None
+    project_id: Optional[UUID]
+    project_name: Optional[str]
+    endpoint_id: Optional[UUID]
+    endpoint_name: Optional[str]
+    endpoint_path: Optional[str]
+    model_id: Optional[UUID]
+    model_name: Optional[str]
+    provider: Optional[str]
+    modality: Optional[str]
+    request_arrival_time: datetime
+    request_forwarded_time: datetime
+    response_start_time: datetime
+    response_end_time: datetime
+    request_body: Dict[str, Any]
+    response_body: Union[Dict[str, Any], List[Dict[str, Any]]]
+    cost: Optional[float] = None
+    is_cache_hit: bool
+    is_streaming: bool = False
+    is_success: bool
+
+    def validate_intervals(self) -> "RequestMetrics":
+        if self.response_start_time > self.response_end_time:
+            raise ValueError("Response start time cannot be after response end time.")
+        if self.request_arrival_time > self.response_start_time:
+            raise ValueError("Request arrival time cannot be after response start time.")
+        if self.request_forwarded_time > self.response_start_time:
+            raise ValueError("Request forwarded time cannot be after response start time.")
+        if self.request_arrival_time > self.response_end_time:
+            raise ValueError("Request arrival time cannot be after response end time.")
+        return self
+   
+# This file includes the custom callbacks for LiteLLM Proxy
+# Once defined, these can be passed in proxy_config.yaml
+class MyCustomHandler(CustomLogger):
+    def log_pre_api_call(self, model, messages, kwargs): 
+        verbose_logger.info("Pre-API Call")
+    
+    def log_post_api_call(self, kwargs, response_obj, start_time, end_time): 
+        verbose_logger.info("Post-API Call")
+
+    def log_stream_event(self, kwargs, response_obj, start_time, end_time):
+        verbose_logger.info("On Stream")
+        
+    def log_success_event(self, kwargs, response_obj, start_time, end_time): 
+        verbose_logger.info("On Success")
+
+    def log_failure_event(self, kwargs, response_obj, start_time, end_time): 
+        verbose_logger.info("On Failure")
+        
+    def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failure=False) -> RequestMetrics:
+        # log: key, user, model, prompt, response, tokens, cost
+        # Access kwargs passed to litellm.completion()
+        verbose_logger.info(f"\nkwargs : {kwargs}")
+        # verbose_logger.info(f"\nresponse_obj : {response_obj}")
+
+        model = kwargs.get("model", None)
+        is_cache_hit = kwargs.get("cache_hit")
+        response_body = copy.deepcopy(kwargs.get("standard_logging_object", {}).get("response", {})) if not failure else {
+            "exception": str(kwargs.get("exception", None)),
+            "traceback": kwargs.get("traceback_exception", None) 
+        }
+        # Access litellm_params passed to litellm.completion(), example access `metadata`
+        litellm_params = kwargs.get("litellm_params", {})
+        proxy_server_request = litellm_params.get("proxy_server_request", {})
+        if proxy_server_request and proxy_server_request.get("body"):
+            # To handle unserializable metadata in proxy_server_request and circular dependencies
+            proxy_server_request["body"].pop("metadata", None)
+        if not proxy_server_request:
+            proxy_server_request["body"] = {
+                "model": model,
+                "messages": kwargs.get("messages", []),
+                "stream": kwargs.get("stream", False)
+            }
+        model_info = copy.deepcopy(litellm_params.get("model_info", {}))
+        metadata = litellm_params.get("metadata", {})
+        endpoint = metadata.get("endpoint", "")
+        api_route = urlparse(str(endpoint)).path
+        if litellm_params.get("api_base"):
+            api_route = f"{litellm_params['api_base']}{api_route}"
+        
+        # Calculate cost using  litellm.completion_cost()
+        response_obj = response_obj or {}
+        cost = litellm.completion_cost(completion_response=response_obj) if not failure else 0
+
+        usage = response_obj.get("usage", None) or {}
+        if isinstance(usage, litellm.Usage):
+            usage = dict(usage)
+        
+        metrics_data = RequestMetrics(
+            request_id=kwargs.get("litellm_call_id", uuid4()),
+            project_id=metadata.get("project_id", None),
+            project_name=metadata.get("project_name", None),
+            endpoint_id=model_info["metadata"]["endpoint_id"] if model_info else None,
+            endpoint_name=model,
+            endpoint_path=api_route,
+            model_id=model_info["id"] if model_info else None,
+            model_name=model_info["metadata"]["name"] if model_info else None,
+            provider=model_info["metadata"]["provider"] if model_info else None,
+            modality=model_info["metadata"]["modality"] if model_info else None,
+            request_arrival_time=start_time,
+            request_forwarded_time=kwargs.get("api_call_start_time") or start_time,
+            response_start_time=kwargs.get("completion_start_time") or end_time,
+            response_end_time=end_time,
+            request_body=proxy_server_request.get("body", {}),
+            response_body=response_body,
+            cost=cost,
+            is_cache_hit=is_cache_hit or False,
+            is_streaming=kwargs.get("stream", False),
+            is_success=not failure,
+        )
+        # verbose_logger.info(f"\n\nMetrics Data: {metrics_data}\n\n")
+        return metrics_data
+
+    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
+        verbose_logger.info("On Async Success!")
+        metrics_data = self.get_request_metrics(kwargs, response_obj, start_time, end_time)
+        metrics_data_json = metrics_data.model_dump(mode="json")
+        verbose_logger.info(f"Metrics Data JSON: {metrics_data_json}")
+        with DaprService() as dapr_service:
+            dapr_service.publish_to_topic(
+                data=metrics_data_json,
+                target_topic_name=app_settings.budmetrics_topic_name,
+                target_name=app_settings.budmetrics_app_name,
+                event_type="add_request_metrics",
+            )
+        return
+
+    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): 
+        try:
+            verbose_logger.info("On Async Failure !")
+            metrics_data = self.get_request_metrics(kwargs, response_obj, start_time, end_time, failure=True)
+            metrics_data_json = metrics_data.model_dump(mode="json")
+            verbose_logger.info(f"Metrics Data JSON: {metrics_data_json}")
+            with DaprService() as dapr_service:
+                dapr_service.publish_to_topic(
+                    data=metrics_data_json,
+                    target_topic_name=app_settings.budmetrics_topic_name,
+                    target_name=app_settings.budmetrics_app_name,
+                    event_type="add_request_metrics",
+                )
+        except Exception as e:
+            # TODO: what metrics data to log here?
+            import traceback
+            verbose_logger.info(f"Exception: {e}")
+            verbose_logger.info(f"Traceback: {traceback.format_exc()}")
+
+proxy_handler_instance = MyCustomHandler()
+
+# Set litellm.callbacks = [proxy_handler_instance] on the proxy
+# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy
\ No newline at end of file
diff --git a/litellm/proxy/budserve_middleware.py b/litellm/proxy/budserve_middleware.py
index 776f138b2c2d..57ac2a917b5c 100644
--- a/litellm/proxy/budserve_middleware.py
+++ b/litellm/proxy/budserve_middleware.py
@@ -21,6 +21,13 @@ class BudServeMiddleware(BaseHTTPMiddleware):
 
     async def get_api_key(self, request):
         authorization_header = request.headers.get("Authorization")
+        if not authorization_header:
+            raise ProxyException(
+                message="Authorization header is missing",
+                type="unauthorized",
+                param="Authorization",
+                code=401
+            )
         api_key = authorization_header.split(" ")[1]
         return api_key
     
@@ -83,12 +90,18 @@ async def dispatch(
 
         # get the request body
         request_data = await _read_request_body(request=request)
+        request.state.original_body = json.dumps(request_data)
         api_key = await self.get_api_key(request)
         endpoint_name = request_data.get("model")
 
         # get endpoint details to fill cache_params
         user_config = await self.fetch_user_config(api_key, endpoint_name)
         
+        request_data["metadata"] = {
+            "project_id": user_config.get("project_id"),
+            "project_name": user_config.get("project_name"),
+        }
+        
         # redis connection params we will set as kubernetes env variables
         # can be fetched using os.getenv
         request_data["user_config"] = {
diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py
index 094828de17a3..dbc921ab129a 100644
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@@ -631,6 +631,7 @@ def _make_openai_completion():
         ):
             try:
                 from litellm.secret_managers.main import get_secret
+                from litellm._logging import verbose_proxy_logger
 
                 if os.getenv("DATABASE_URL", None) is not None:
                     ### add connection pool + pool timeout args
@@ -651,10 +652,14 @@ def _make_openai_completion():
                     modified_url = append_query_params(database_url, params)
                     os.environ["DIRECT_URL"] = modified_url
                     ###
+                verbose_proxy_logger.info("Running prisma db push")
                 subprocess.run(["prisma"], capture_output=True)
+                verbose_proxy_logger.info("Prisma db push complete")
                 is_prisma_runnable = True
             except FileNotFoundError:
                 is_prisma_runnable = False
+            except Exception as e:
+                print(e)
 
             if is_prisma_runnable:
                 from litellm.proxy.db.check_migration import check_prisma_schema_diff
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index a41491227715..1117b8c5c725 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -25,6 +25,9 @@
     get_type_hints,
 )
 
+from budmicroframe.main import configure_app
+from litellm.commons.config import app_settings, secrets_settings
+
 if TYPE_CHECKING:
     from opentelemetry.trace import Span as _Span
 
@@ -364,15 +367,16 @@ def generate_feedback_box():
     else f"Proxy Server to call 100+ LLMs in the OpenAI format. {custom_swagger_message}\n\n{ui_message}"
 )
 
-app = FastAPI(
-    docs_url=_get_docs_url(),
-    redoc_url=_get_redoc_url(),
-    title=_title,
-    description=_description,
-    version=version,
-    root_path=server_root_path,  # check if user passed root path, FastAPI defaults this value to ""
-)
+# app = FastAPI(
+#     docs_url=_get_docs_url(),
+#     redoc_url=_get_redoc_url(),
+#     title=_title,
+#     description=_description,
+#     version=version,
+#     root_path=server_root_path,  # check if user passed root path, FastAPI defaults this value to ""
+# )
 
+app = configure_app(app_settings, secrets_settings)
 
 ### CUSTOM API DOCS [ENTERPRISE FEATURE] ###
 # Custom OpenAPI schema generator to include only selected routes
@@ -470,9 +474,13 @@ async def redirect_ui_middleware(request: Request, call_next):
 app.add_middleware(BudServeMiddleware)
 @app.middleware("http")
 async def catch_exceptions_middleware(request: Request, call_next):
+    start_time = time.time()
     try:
         return await call_next(request)
     except Exception as e:
+        # Handle the same way as the exception handler
+        import traceback
+        from litellm.custom_callbacks import proxy_handler_instance
         # Convert to ProxyException if needed
         if not isinstance(e, ProxyException):
             e = ProxyException(
@@ -481,7 +489,27 @@ async def catch_exceptions_middleware(request: Request, call_next):
                 param=None,
                 code=500
             )
-        # Handle the same way as the exception handler
+        # TODO: send error to budmetrics
+        end_time = time.time()
+        original_body = getattr(request.state, "original_body", None)
+        if original_body is not None:
+            request_body = json.loads(original_body)
+        else:
+            request_body = {}
+        kwargs = {
+            "model": request_body.get("model", None),
+            "cache_hit": False,
+            "exception": e,
+            "traceback_exception": traceback.format_exc(),
+            "litellm_params": {
+                "proxy_server_request": {"body": request_body},
+                "metadata": {
+                    "endpoint": request.url
+                }
+            },
+            "stream": request_body.get("stream", False),
+        }
+        await proxy_handler_instance.async_log_failure_event(kwargs, None, start_time, end_time)
         return JSONResponse(
             status_code=int(e.code) if e.code else status.HTTP_500_INTERNAL_SERVER_ERROR,
             content={
diff --git a/litellm/router.py b/litellm/router.py
index 17af0e6193cc..ca49b0ddaca7 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -3003,7 +3003,11 @@ async def make_call(self, original_function: Any, *args, **kwargs):
             response = await response
         ## PROCESS RESPONSE HEADERS
         await self.set_response_headers(response=response, model_group=model_group)
-
+        verbose_router_logger.info(f"TYPE OF MAKE CALL RESPONSE : {type(response)}")
+        if hasattr(response, "model"):
+            response.model = model_group
+        if isinstance(response, dict):
+            response["model"] = model_group
         return response
 
     def _handle_mock_testing_rate_limit_error(
diff --git a/litellm_config.yaml b/litellm_config.yaml
index 01422da9f8f7..91d57221fce2 100644
--- a/litellm_config.yaml
+++ b/litellm_config.yaml
@@ -5,4 +5,6 @@ router_settings:
   cache_responses: False
   redis_host: "os.environ/REDIS_HOST"
   redis_port: "os.environ/REDIS_PORT"
-  redis_password: "os.environ/REDIS_PASSWORD"
\ No newline at end of file
+  redis_password: "os.environ/REDIS_PASSWORD"
+litellm_settings:
+  callbacks: litellm.custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 7b1abcdc6828..a93b24d234a3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,6 @@
+langchain_huggingface
+gptcache
+langchain_community
 # LITELLM PROXY DEPENDENCIES #
 anyio==4.4.0 # openai + http req.
 httpx==0.27.0 # Pin Httpx dependency
@@ -38,7 +41,7 @@ cryptography==42.0.7
 python-dotenv==1.0.0 # for env 
 tiktoken==0.7.0 # for calculating usage
 importlib-metadata==6.8.0 # for random utils
-tokenizers==0.14.0 # for calculating usage
+tokenizers # for calculating usage
 click==8.1.7 # for proxy cli 
 jinja2==3.1.4 # for prompt templates
 certifi==2024.7.4 # [TODO] clean up 
@@ -48,4 +51,4 @@ tenacity==8.2.3  # for retrying requests, when litellm.num_retries set
 pydantic==2.7.1 # proxy + openai req.
 jsonschema==4.22.0 # validating json schema
 websockets==10.4 # for realtime API
-####
\ No newline at end of file
+####