diff --git a/.dapr/appconfig-dev.yaml b/.dapr/appconfig-dev.yaml new file mode 100644 index 000000000000..b22f553b6877 --- /dev/null +++ b/.dapr/appconfig-dev.yaml @@ -0,0 +1,19 @@ +apiVersion: dapr.io/v1alpha1 +kind: Configuration +metadata: + name: appconfig + namespace: default +spec: + tracing: + samplingRate: "1" + stdout: true + features: + - name: SchedulerReminders + enabled: true + # zipkin: + # endpointAddress: http://localhost:9411/api/v2/spans + secrets: + scopes: + - storeName: secretstore-local + defaultAccess: allow + deniedSecrets: [ ] diff --git a/.dapr/components/configstore.yaml b/.dapr/components/configstore.yaml new file mode 100644 index 000000000000..4149f5d9c955 --- /dev/null +++ b/.dapr/components/configstore.yaml @@ -0,0 +1,20 @@ +# Reference: https://docs.dapr.io/reference/components-reference/supported-configuration-stores/redis-configuration-store/ + +apiVersion: dapr.io/v1alpha1 +kind: Component +metadata: + name: configstore + namespace: development +spec: + type: configuration.redis + metadata: + - name: redisHost + secretKeyRef: + name: REDIS_URI + key: REDIS_URI + - name: redisPassword + secretKeyRef: + name: REDIS_PASSWORD + key: REDIS_PASSWORD +auth: + secretStore: secretstore-local \ No newline at end of file diff --git a/.dapr/components/pubsub-redis.yaml b/.dapr/components/pubsub-redis.yaml new file mode 100644 index 000000000000..346914a8f510 --- /dev/null +++ b/.dapr/components/pubsub-redis.yaml @@ -0,0 +1,25 @@ +# Reference: https://docs.dapr.io/reference/components-reference/supported-pubsub/setup-redis-pubsub/ + +apiVersion: dapr.io/v1alpha1 +kind: Component +metadata: + name: pubsub-redis + namespace: development +spec: + type: pubsub.redis + version: v1 + metadata: + - name: redisHost + secretKeyRef: + name: REDIS_URI + key: REDIS_URI + - name: redisPassword + secretKeyRef: + name: REDIS_PASSWORD + key: REDIS_PASSWORD + - name: consumerID + value: "{appID}" + - name: concurrency + value: "10" +auth: + secretStore: secretstore-local \ No newline at end of file diff --git a/.dapr/components/secretstore-env.yaml b/.dapr/components/secretstore-env.yaml new file mode 100644 index 000000000000..a8689cd3e6e1 --- /dev/null +++ b/.dapr/components/secretstore-env.yaml @@ -0,0 +1,13 @@ +# Reference: https://docs.dapr.io/reference/components-reference/supported-secret-stores/file-secret-store/ + +apiVersion: dapr.io/v1alpha1 +kind: Component +metadata: + name: secretstore-local + namespace: development +spec: + type: secretstores.local.env + version: v1 + metadata: + - name: prefix + value: "SECRETS_" \ No newline at end of file diff --git a/.dapr/components/statestore.yaml b/.dapr/components/statestore.yaml new file mode 100644 index 000000000000..573694eebc40 --- /dev/null +++ b/.dapr/components/statestore.yaml @@ -0,0 +1,23 @@ +# Reference: https://docs.dapr.io/reference/components-reference/supported-state-stores/setup-redis/ + +apiVersion: dapr.io/v1alpha1 +kind: Component +metadata: + name: statestore + namespace: development +spec: + type: state.redis + version: v1 + metadata: + - name: redisHost + secretKeyRef: + name: REDIS_URI + key: REDIS_URI + - name: redisPassword + secretKeyRef: + name: REDIS_PASSWORD + key: REDIS_PASSWORD + - name: actorStateStore + value: "true" +auth: + secretStore: secretstore-local \ No newline at end of file diff --git a/.dapr/components/subscriptions.yaml b/.dapr/components/subscriptions.yaml new file mode 100644 index 000000000000..7bc9ff3ca6ee --- /dev/null +++ b/.dapr/components/subscriptions.yaml @@ -0,0 +1,12 @@ +apiVersion: dapr.io/v2alpha1 +kind: Subscription +metadata: + name: pubsub-subscription +spec: + topic: budLitellmMessages + routes: + default: /notifications + pubsubname: pubsub-redis + deadLetterTopic: poisonMessages +scopes: + - budlitellm \ No newline at end of file diff --git a/.dockerignore b/.dockerignore index 929eace5e343..2919658574fb 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,3 +9,4 @@ tests .devcontainer *.tgz log.txt +budlitellm diff --git a/Dockerfile.bud b/Dockerfile.bud index e64c89ba763c..3cb2b8f18d85 100644 --- a/Dockerfile.bud +++ b/Dockerfile.bud @@ -11,7 +11,7 @@ WORKDIR /app # Install build dependencies RUN apt-get clean && apt-get update && \ - apt-get install -y gcc python3-dev && \ + apt-get install -y gcc python3-dev git && \ rm -rf /var/lib/apt/lists/* RUN pip install --upgrade pip && \ @@ -21,7 +21,7 @@ RUN pip install --upgrade pip && \ COPY . . # Build Admin UI -RUN chmod +x build_admin_ui.sh && ./build_admin_ui.sh +RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh # Build the package RUN rm -rf dist/* && python -m build @@ -44,13 +44,13 @@ RUN pip uninstall PyJWT -y RUN pip install PyJWT --no-cache-dir # Build Admin UI -RUN chmod +x build_admin_ui.sh && ./build_admin_ui.sh +RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh # Runtime stage FROM $LITELLM_RUNTIME_IMAGE AS runtime # Update dependencies and clean up - handles debian security issue -RUN apt-get update && apt-get upgrade -y && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y git && apt-get upgrade -y && rm -rf /var/lib/apt/lists/* WORKDIR /app # Copy the current directory contents into the container at /app @@ -64,10 +64,14 @@ COPY --from=builder /wheels/ /wheels/ # Install the built wheel using pip; again using a wildcard if it's the only file RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels +# ensure gptcache is installed +RUN pip install git+https://github.com/BudEcosystem/BudServeGPTCache.git#egg=gptcache +RUN pip install git+https://github.com/BudEcosystem/bud-microframe.git#egg=budmicroframe + # Generate prisma client RUN prisma generate -RUN chmod +x entrypoint.sh +# RUN chmod +x entrypoint.sh -EXPOSE 4000/tcp +EXPOSE 4010/tcp -ENTRYPOINT ["sh", "-c", "cp /app/config/config.yaml /app && litellm --config /app/config.yaml --port 4000"] \ No newline at end of file +# ENTRYPOINT ["sh", "-c", "litellm --config /app/litellm_config.yaml --port 4000"] \ No newline at end of file diff --git a/deploy/docker-compose-dev.yaml b/deploy/docker-compose-dev.yaml new file mode 100644 index 000000000000..05ba6ff68c1e --- /dev/null +++ b/deploy/docker-compose-dev.yaml @@ -0,0 +1,70 @@ +include: + - ./docker-compose-redis.yaml + +services: + bud-litellm-app: + image: bud-microframe/$APP_NAME:$NAMESPACE + profiles: + - app + container_name: bud-mf-$NAMESPACE-$APP_NAME + build: + context: .. + dockerfile: ./Dockerfile.bud + # command: [ "litellm", "--config", "/app/litellm_config.yaml", "--port", $APP_PORT ] + # command: sh -c "cd litellm/proxy && litellm --config /app/litellm_config.yaml --port $APP_PORT" + command: sh -c "tail -f /dev/null" + # command : sh -c "alembic -c ./budcluster/alembic.ini upgrade head && tail -f /dev/null" # && uvicorn $APP_NAME.main:app --host 0.0.0.0 --port $APP_PORT --reload" + # ports: + # - "$DAPR_GRPC_PORT:$DAPR_GRPC_PORT" # Dapr instances communicate over gRPC so gRPC port needs to be exposed + # - "$DAPR_HTTP_PORT:$DAPR_HTTP_PORT" # Expose Dapr HTTP port for service invocation + # - "$APP_PORT:$APP_PORT" # Expose app port for debugging purposes + volumes: + - ../:/app/ + - ../cache:/app/cache + env_file: + - path: ../.env + required: true + depends_on: + - bud-litellm-redis + - bud-litellm-placement + network_mode: host + bud-litellm-sidecar: + container_name: bud-mf-$NAMESPACE-$APP_NAME-dapr + image: "daprio/daprd:edge" + command: [ + "./daprd", + "--app-id", "$APP_NAME", + "--app-port", "$APP_PORT", + "--dapr-http-port", "$DAPR_HTTP_PORT", + "--dapr-grpc-port", "$DAPR_GRPC_PORT", + "--placement-host-address", "$DAPR_PLACEMENT_HOST:$DAPR_PLACEMENT_PORT", # Dapr's placement service can be reach via the docker DNS entry + "--metrics-port", "$DAPR_METRICS_PORT", + "--resources-path", "/components", + "--config", "/config/appconfig.yaml", + "--log-as-json" + ] + env_file: + - path: ../.env + required: true + volumes: + # - "../crypto-keys:/crypto-keys" + - "${DAPR_COMPONENTS:-../.dapr/components/}:/components" # Mount the components folder for the runtime to use. The mounted location must match the --resources-path argument. + - "${DAPR_APP_CONFIG:-../.dapr/appconfig-dev.yaml}:/config/appconfig.yaml" # Mount the config file for the runtime to use. The mounted location must match the --config argument. + - ./:/app/ + network_mode: "host" + # ports: + # - "$DAPR_GRPC_PORT:$DAPR_GRPC_PORT" + # - "$DAPR_HTTP_PORT:$DAPR_HTTP_PORT" + # network_mode: "service:app" + bud-litellm-placement: + container_name: bud-mf-$NAMESPACE-$APP_NAME-placement + image: "daprio/placement:edge" + command: [ "./placement", "--port", "$DAPR_PLACEMENT_PORT" ] + ports: + - "$DAPR_PLACEMENT_PORT:$DAPR_PLACEMENT_PORT" + networks: + - bud-litellm-network +networks: + bud-litellm-network: + name: bud-mf-$NAMESPACE-$APP_NAME + driver: bridge \ No newline at end of file diff --git a/deploy/docker-compose-redis.yaml b/deploy/docker-compose-redis.yaml new file mode 100644 index 000000000000..8f8b892ade14 --- /dev/null +++ b/deploy/docker-compose-redis.yaml @@ -0,0 +1,28 @@ +services: + bud-litellm-redis: + container_name: bud-mf-$NAMESPACE-litellm-redis + image: redis:alpine + ports: + - "$REDIS_PORT:$REDIS_PORT" + command: + - /bin/sh + - -c + - redis-server --requirepass "${SECRETS_REDIS_PASSWORD:?REDIS_PASSWORD variable is not set}" --port ${REDIS_PORT} + sysctls: + net.core.somaxconn: 1024 + healthcheck: + test: [ "CMD", "redis-cli", "--raw", "incr", "ping" ] + volumes: + - budlitellm_redis_data:/data + # networks: + # - bud-microframe-nw + # network_mode: "host" + networks: + - bud-litellm-network + +volumes: + budlitellm_redis_data: +networks: + bud-litellm-network: + name: bud-mf-$NAMESPACE-$APP_NAME + driver: bridge diff --git a/deploy/start_dev.sh b/deploy/start_dev.sh new file mode 100755 index 000000000000..b11453997ffb --- /dev/null +++ b/deploy/start_dev.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash + +# +# ----------------------------------------------------------------------------- +# Copyright (c) 2024 Bud Ecosystem Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- +# + +DAPR_COMPONENTS="../.dapr/components/" +DAPR_APP_CONFIG="../.dapr/appconfig-dev.yaml" + +DOCKER_COMPOSE_FILE="./deploy/docker-compose-dev.yaml" +BUILD_FLAG="" +DETACH_FLAG="" + +function display_help() { + echo "Usage: $0 [options]" + echo + echo "Options:" + echo " --dapr-components Set the dapr components folder path, this should be relative to the deploy directory (default: $DAPR_COMPONENTS)" + echo " --dapr-app-config Set the dapr app config path, this should be relative to the deploy directory (default: $DAPR_APP_CONFIG)" + echo " -f FILE Specify the Docker Compose file to use, this should be relative to your current directory (default: $DOCKER_COMPOSE_FILE)" + echo " --build Include this flag to force a rebuild of the Docker containers" + echo " --skip-app Include this flag to skip the app container" + echo " -d Include this flag to detach and run the containers in background" + echo " --help Display this help message and exit" + echo + echo "Example:" + echo " $0 -f docker-compose-local.yaml --build" + echo " This will use 'docker-compose-local.yaml' and force a rebuild of the containers." + echo + exit 0 +} + +# Parse arguments +while [[ "$#" -gt 0 ]]; do + case $1 in + --dapr-components) DAPR_COMPONENTS="$2"; shift ;; + --dapr-app-config) DAPR_APP_CONFIG="$2"; shift ;; + -f) DOCKER_COMPOSE_FILE="$2"; shift ;; + --build) BUILD_FLAG="--build" ;; + --skip-app) SKIP_APP_FLAG="true" ;; + -d) DETACH_FLAG="-d" ;; + --help) display_help ;; + *) echo "Unknown parameter passed: $1"; show_help; exit 1 ;; + esac + shift +done + +set -a +source ./.env +set +a + +export REDIS_PORT=$(echo "${SECRETS_REDIS_URI:-redis:6379}" | cut -d':' -f2) + +: ${APP_NAME:?Application name is required, use --app-name flag to specify the name.} + +# Print the environment variables +echo "****************************************************" +echo "* *" +echo "* Starting Microservice Environment *" +echo "* *" +echo "****************************************************" +echo "" +echo "🛠 App Name : $APP_NAME" +echo "🌐 App Port : $APP_PORT" +echo "🔑 Redis Uri : $SECRETS_REDIS_URI" +echo "🌍 Dapr HTTP Port : $DAPR_HTTP_PORT" +echo "🌍 Dapr gRPC Port : $DAPR_GRPC_PORT" +echo "🛠 Namespace : $NAMESPACE" +echo "📊 Log Level : $LOG_LEVEL" +echo "🗂 Config Store Name : $CONFIGSTORE_NAME" +echo "🔐 Secret Store Name : $SECRETSTORE_NAME" +echo "🛠 Dapr Components : $DAPR_COMPONENTS" +echo "🛠 Dapr App Config : $DAPR_APP_CONFIG" +echo "🛠 Docker Compose File : $DOCKER_COMPOSE_FILE" +echo "🚀 Build flag : $BUILD_FLAG" +echo "" +echo "****************************************************" + +# Bring up Docker Compose +echo "Bringing up Docker Compose with file: $DOCKER_COMPOSE_FILE" +if [ -z "$SKIP_APP_FLAG" ]; then + docker compose --profile app -f "$DOCKER_COMPOSE_FILE" up $BUILD_FLAG $DETACH_FLAG +else + docker compose -f "$DOCKER_COMPOSE_FILE" up $BUILD_FLAG $DETACH_FLAG +fi \ No newline at end of file diff --git a/deploy/stop_dev.sh b/deploy/stop_dev.sh new file mode 100755 index 000000000000..456654e9a804 --- /dev/null +++ b/deploy/stop_dev.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# +# ----------------------------------------------------------------------------- +# Copyright (c) 2024 Bud Ecosystem Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- +# + +# Default value for Docker Compose file +DOCKER_COMPOSE_FILE="./deploy/docker-compose-dev.yaml" + +# Function to display help message +function display_help() { + echo "Usage: $0 [options]" + echo + echo "Options:" + echo " -f FILE Specify the Docker Compose file to use (default: deploy/docker-compose-dev.yaml)" + echo " --help Display this help message and exit" + echo + echo "Example:" + echo " $0 -f docker-compose-local.yaml" + echo " This will stop the services using 'docker-compose-local.yaml'." + echo + exit 0 +} + +# Parse optional arguments +while [[ "$#" -gt 0 ]]; do + case $1 in + -f) DOCKER_COMPOSE_FILE="$2"; shift ;; + --help) display_help ;; + *) echo "Unknown parameter: $1"; exit 1 ;; + esac + shift +done + +set -a +source ./.env +set +a + +export REDIS_PORT=$(echo "${SECRETS_REDIS_URI:-redis:6379}" | cut -d':' -f2) + +# Stop Docker Compose services +echo "Stopping services defined in: $DOCKER_COMPOSE_FILE" +docker compose -f "$DOCKER_COMPOSE_FILE" stop \ No newline at end of file diff --git a/litellm/__about__.py b/litellm/__about__.py new file mode 100644 index 000000000000..9c3a5276d863 --- /dev/null +++ b/litellm/__about__.py @@ -0,0 +1,19 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2024 Bud Ecosystem Inc. +# # +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# # +# http://www.apache.org/licenses/LICENSE-2.0 +# # +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +"""Contains metadata about the package, including version information and author details.""" + +__version__ = "budlitellm@0.0.1" diff --git a/litellm/commons/config.py b/litellm/commons/config.py new file mode 100644 index 000000000000..07d562fd024c --- /dev/null +++ b/litellm/commons/config.py @@ -0,0 +1,86 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2024 Bud Ecosystem Inc. +# # +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# # +# http://www.apache.org/licenses/LICENSE-2.0 +# # +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +"""Manages application and secret configurations, utilizing environment variables and Dapr's configuration store for syncing.""" + +from pathlib import Path +from typing import Optional + +from budmicroframe.commons.config import BaseAppConfig, BaseSecretsConfig, register_settings, enable_periodic_sync_from_store +from pydantic import DirectoryPath, Field + +from litellm.__about__ import __version__ + + +class AppConfig(BaseAppConfig): + name: str = __version__.split("@")[0] + version: str = __version__.split("@")[-1] + description: str = Field("Bud-Litellm is a proxy server for LLM requests.", alias="DOCS_DESCRIPTION") + api_root: str = Field("", alias="SERVER_ROOT_PATH") + + # Base Directory + base_dir: DirectoryPath = Path(__file__).parent.parent.parent.resolve() + + # Bud-Litellm env + litellm_log: str = Field("DEBUG", alias="LITELLM_LOG") + litellm_master_key: str = Field("sk-1234", alias="LITELLM_MASTER_KEY") + litellm_salt_key: str = Field("litellm_salt_key", alias="LITELLM_SALT_KEY") + database_url: str = Field(..., alias="DATABASE_URL") + store_model_in_db: bool = Field(True, alias="STORE_MODEL_IN_DB") + budserve_app_baseurl: str = Field("http://127.0.0.1:8050", alias="BUDSERVE_APP_BASEURL") + + # Redis Config + redis_host: str = Field("localhost", alias="REDIS_HOST") + redis_port: int = Field(6379, alias="REDIS_PORT") + redis_username: str = Field("", alias="REDIS_USERNAME") + redis_password: str = Field("", alias="REDIS_PASSWORD") + redis_db: int = Field(0, alias="REDIS_DB") + + # Cache Config + enable_cache: bool = Field(False, alias="ENABLE_CACHE") + enable_cache_metric: bool = Field(False, alias="ENABLE_CACHE_METRIC") + cache_eviction_policy: str = Field("LRU", alias="CACHE_EVICTION_POLICY") + cache_max_size: int = Field(1000, alias="CACHE_MAX_SIZE") + cache_ttl: int = Field(3600, alias="CACHE_TTL") + cache_score_threshold: float = Field(0.8, alias="CACHE_SCORE_THRESHOLD") + cache_embedding_model: str = Field("sentence-transformers/all-MiniLM-L6-v2", alias="CACHE_EMBEDDING_MODEL") + + # Metrics App and Topic + budmetrics_app_name: str = Field("budMetrics", alias="BUDMETRICS_APP_NAME") + budmetrics_topic_name: str = Field("budMetricsMessages", alias="BUDMETRICS_TOPIC_NAME") + + +class SecretsConfig(BaseSecretsConfig): + name: str = __version__.split("@")[0] + version: str = __version__.split("@")[-1] + + # Database + psql_user: Optional[str] = Field( + None, + alias="PSQL_USER", + json_schema_extra=enable_periodic_sync_from_store(is_global=True), + ) + psql_password: Optional[str] = Field( + None, + alias="PSQL_PASSWORD", + json_schema_extra=enable_periodic_sync_from_store(is_global=True), + ) + + +app_settings = AppConfig() +secrets_settings = SecretsConfig() + +register_settings(app_settings, secrets_settings) \ No newline at end of file diff --git a/litellm/custom_callbacks.py b/litellm/custom_callbacks.py new file mode 100644 index 000000000000..7f6f6067c00e --- /dev/null +++ b/litellm/custom_callbacks.py @@ -0,0 +1,179 @@ +import copy +from datetime import datetime +from typing import Any, Dict, List, Optional, Union +from urllib.parse import urlparse +from uuid import UUID, uuid4 + +from litellm.integrations.custom_logger import CustomLogger +import litellm +from litellm.commons.config import app_settings +from litellm._logging import verbose_logger +from budmicroframe.commons.schemas import CloudEventBase +from budmicroframe.shared.dapr_service import DaprService + +# error in budserve_middleware.py +# Scenario 1: if user sends wrong api key +# Scenario 2: if user sends wrong model param + +# Keys i won't have: +# project_id, project_name, endpoint_id, endpoint_name (what user has sent), endpoint_path, +# model_id, provider, modality, model_name +# Keys i can set: +# request_arrival_time == request_forwarded_time == response_start_time == response_end_time +# request_body, response_body, cost = 0, is_cache_hit = False, is_success = False, is_streaming = False + + + +class RequestMetrics(CloudEventBase): + request_id: UUID + request_ip: Optional[str] = None + project_id: Optional[UUID] + project_name: Optional[str] + endpoint_id: Optional[UUID] + endpoint_name: Optional[str] + endpoint_path: Optional[str] + model_id: Optional[UUID] + model_name: Optional[str] + provider: Optional[str] + modality: Optional[str] + request_arrival_time: datetime + request_forwarded_time: datetime + response_start_time: datetime + response_end_time: datetime + request_body: Dict[str, Any] + response_body: Union[Dict[str, Any], List[Dict[str, Any]]] + cost: Optional[float] = None + is_cache_hit: bool + is_streaming: bool = False + is_success: bool + + def validate_intervals(self) -> "RequestMetrics": + if self.response_start_time > self.response_end_time: + raise ValueError("Response start time cannot be after response end time.") + if self.request_arrival_time > self.response_start_time: + raise ValueError("Request arrival time cannot be after response start time.") + if self.request_forwarded_time > self.response_start_time: + raise ValueError("Request forwarded time cannot be after response start time.") + if self.request_arrival_time > self.response_end_time: + raise ValueError("Request arrival time cannot be after response end time.") + return self + +# This file includes the custom callbacks for LiteLLM Proxy +# Once defined, these can be passed in proxy_config.yaml +class MyCustomHandler(CustomLogger): + def log_pre_api_call(self, model, messages, kwargs): + verbose_logger.info("Pre-API Call") + + def log_post_api_call(self, kwargs, response_obj, start_time, end_time): + verbose_logger.info("Post-API Call") + + def log_stream_event(self, kwargs, response_obj, start_time, end_time): + verbose_logger.info("On Stream") + + def log_success_event(self, kwargs, response_obj, start_time, end_time): + verbose_logger.info("On Success") + + def log_failure_event(self, kwargs, response_obj, start_time, end_time): + verbose_logger.info("On Failure") + + def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failure=False) -> RequestMetrics: + # log: key, user, model, prompt, response, tokens, cost + # Access kwargs passed to litellm.completion() + verbose_logger.info(f"\nkwargs : {kwargs}") + # verbose_logger.info(f"\nresponse_obj : {response_obj}") + + model = kwargs.get("model", None) + is_cache_hit = kwargs.get("cache_hit") + response_body = copy.deepcopy(kwargs.get("standard_logging_object", {}).get("response", {})) if not failure else { + "exception": str(kwargs.get("exception", None)), + "traceback": kwargs.get("traceback_exception", None) + } + # Access litellm_params passed to litellm.completion(), example access `metadata` + litellm_params = kwargs.get("litellm_params", {}) + proxy_server_request = litellm_params.get("proxy_server_request", {}) + if proxy_server_request and proxy_server_request.get("body"): + # To handle unserializable metadata in proxy_server_request and circular dependencies + proxy_server_request["body"].pop("metadata", None) + if not proxy_server_request: + proxy_server_request["body"] = { + "model": model, + "messages": kwargs.get("messages", []), + "stream": kwargs.get("stream", False) + } + model_info = copy.deepcopy(litellm_params.get("model_info", {})) + metadata = litellm_params.get("metadata", {}) + endpoint = metadata.get("endpoint", "") + api_route = urlparse(str(endpoint)).path + if litellm_params.get("api_base"): + api_route = f"{litellm_params['api_base']}{api_route}" + + # Calculate cost using litellm.completion_cost() + response_obj = response_obj or {} + cost = litellm.completion_cost(completion_response=response_obj) if not failure else 0 + + usage = response_obj.get("usage", None) or {} + if isinstance(usage, litellm.Usage): + usage = dict(usage) + + metrics_data = RequestMetrics( + request_id=kwargs.get("litellm_call_id", uuid4()), + project_id=metadata.get("project_id", None), + project_name=metadata.get("project_name", None), + endpoint_id=model_info["metadata"]["endpoint_id"] if model_info else None, + endpoint_name=model, + endpoint_path=api_route, + model_id=model_info["id"] if model_info else None, + model_name=model_info["metadata"]["name"] if model_info else None, + provider=model_info["metadata"]["provider"] if model_info else None, + modality=model_info["metadata"]["modality"] if model_info else None, + request_arrival_time=start_time, + request_forwarded_time=kwargs.get("api_call_start_time") or start_time, + response_start_time=kwargs.get("completion_start_time") or end_time, + response_end_time=end_time, + request_body=proxy_server_request.get("body", {}), + response_body=response_body, + cost=cost, + is_cache_hit=is_cache_hit or False, + is_streaming=kwargs.get("stream", False), + is_success=not failure, + ) + # verbose_logger.info(f"\n\nMetrics Data: {metrics_data}\n\n") + return metrics_data + + async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): + verbose_logger.info("On Async Success!") + metrics_data = self.get_request_metrics(kwargs, response_obj, start_time, end_time) + metrics_data_json = metrics_data.model_dump(mode="json") + verbose_logger.info(f"Metrics Data JSON: {metrics_data_json}") + with DaprService() as dapr_service: + dapr_service.publish_to_topic( + data=metrics_data_json, + target_topic_name=app_settings.budmetrics_topic_name, + target_name=app_settings.budmetrics_app_name, + event_type="add_request_metrics", + ) + return + + async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): + try: + verbose_logger.info("On Async Failure !") + metrics_data = self.get_request_metrics(kwargs, response_obj, start_time, end_time, failure=True) + metrics_data_json = metrics_data.model_dump(mode="json") + verbose_logger.info(f"Metrics Data JSON: {metrics_data_json}") + with DaprService() as dapr_service: + dapr_service.publish_to_topic( + data=metrics_data_json, + target_topic_name=app_settings.budmetrics_topic_name, + target_name=app_settings.budmetrics_app_name, + event_type="add_request_metrics", + ) + except Exception as e: + # TODO: what metrics data to log here? + import traceback + verbose_logger.info(f"Exception: {e}") + verbose_logger.info(f"Traceback: {traceback.format_exc()}") + +proxy_handler_instance = MyCustomHandler() + +# Set litellm.callbacks = [proxy_handler_instance] on the proxy +# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy \ No newline at end of file diff --git a/litellm/proxy/budserve_middleware.py b/litellm/proxy/budserve_middleware.py index 776f138b2c2d..57ac2a917b5c 100644 --- a/litellm/proxy/budserve_middleware.py +++ b/litellm/proxy/budserve_middleware.py @@ -21,6 +21,13 @@ class BudServeMiddleware(BaseHTTPMiddleware): async def get_api_key(self, request): authorization_header = request.headers.get("Authorization") + if not authorization_header: + raise ProxyException( + message="Authorization header is missing", + type="unauthorized", + param="Authorization", + code=401 + ) api_key = authorization_header.split(" ")[1] return api_key @@ -83,12 +90,18 @@ async def dispatch( # get the request body request_data = await _read_request_body(request=request) + request.state.original_body = json.dumps(request_data) api_key = await self.get_api_key(request) endpoint_name = request_data.get("model") # get endpoint details to fill cache_params user_config = await self.fetch_user_config(api_key, endpoint_name) + request_data["metadata"] = { + "project_id": user_config.get("project_id"), + "project_name": user_config.get("project_name"), + } + # redis connection params we will set as kubernetes env variables # can be fetched using os.getenv request_data["user_config"] = { diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 094828de17a3..dbc921ab129a 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -631,6 +631,7 @@ def _make_openai_completion(): ): try: from litellm.secret_managers.main import get_secret + from litellm._logging import verbose_proxy_logger if os.getenv("DATABASE_URL", None) is not None: ### add connection pool + pool timeout args @@ -651,10 +652,14 @@ def _make_openai_completion(): modified_url = append_query_params(database_url, params) os.environ["DIRECT_URL"] = modified_url ### + verbose_proxy_logger.info("Running prisma db push") subprocess.run(["prisma"], capture_output=True) + verbose_proxy_logger.info("Prisma db push complete") is_prisma_runnable = True except FileNotFoundError: is_prisma_runnable = False + except Exception as e: + print(e) if is_prisma_runnable: from litellm.proxy.db.check_migration import check_prisma_schema_diff diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index a41491227715..1117b8c5c725 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -25,6 +25,9 @@ get_type_hints, ) +from budmicroframe.main import configure_app +from litellm.commons.config import app_settings, secrets_settings + if TYPE_CHECKING: from opentelemetry.trace import Span as _Span @@ -364,15 +367,16 @@ def generate_feedback_box(): else f"Proxy Server to call 100+ LLMs in the OpenAI format. {custom_swagger_message}\n\n{ui_message}" ) -app = FastAPI( - docs_url=_get_docs_url(), - redoc_url=_get_redoc_url(), - title=_title, - description=_description, - version=version, - root_path=server_root_path, # check if user passed root path, FastAPI defaults this value to "" -) +# app = FastAPI( +# docs_url=_get_docs_url(), +# redoc_url=_get_redoc_url(), +# title=_title, +# description=_description, +# version=version, +# root_path=server_root_path, # check if user passed root path, FastAPI defaults this value to "" +# ) +app = configure_app(app_settings, secrets_settings) ### CUSTOM API DOCS [ENTERPRISE FEATURE] ### # Custom OpenAPI schema generator to include only selected routes @@ -470,9 +474,13 @@ async def redirect_ui_middleware(request: Request, call_next): app.add_middleware(BudServeMiddleware) @app.middleware("http") async def catch_exceptions_middleware(request: Request, call_next): + start_time = time.time() try: return await call_next(request) except Exception as e: + # Handle the same way as the exception handler + import traceback + from litellm.custom_callbacks import proxy_handler_instance # Convert to ProxyException if needed if not isinstance(e, ProxyException): e = ProxyException( @@ -481,7 +489,27 @@ async def catch_exceptions_middleware(request: Request, call_next): param=None, code=500 ) - # Handle the same way as the exception handler + # TODO: send error to budmetrics + end_time = time.time() + original_body = getattr(request.state, "original_body", None) + if original_body is not None: + request_body = json.loads(original_body) + else: + request_body = {} + kwargs = { + "model": request_body.get("model", None), + "cache_hit": False, + "exception": e, + "traceback_exception": traceback.format_exc(), + "litellm_params": { + "proxy_server_request": {"body": request_body}, + "metadata": { + "endpoint": request.url + } + }, + "stream": request_body.get("stream", False), + } + await proxy_handler_instance.async_log_failure_event(kwargs, None, start_time, end_time) return JSONResponse( status_code=int(e.code) if e.code else status.HTTP_500_INTERNAL_SERVER_ERROR, content={ diff --git a/litellm/router.py b/litellm/router.py index 17af0e6193cc..ca49b0ddaca7 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -3003,7 +3003,11 @@ async def make_call(self, original_function: Any, *args, **kwargs): response = await response ## PROCESS RESPONSE HEADERS await self.set_response_headers(response=response, model_group=model_group) - + verbose_router_logger.info(f"TYPE OF MAKE CALL RESPONSE : {type(response)}") + if hasattr(response, "model"): + response.model = model_group + if isinstance(response, dict): + response["model"] = model_group return response def _handle_mock_testing_rate_limit_error( diff --git a/litellm_config.yaml b/litellm_config.yaml index 01422da9f8f7..91d57221fce2 100644 --- a/litellm_config.yaml +++ b/litellm_config.yaml @@ -5,4 +5,6 @@ router_settings: cache_responses: False redis_host: "os.environ/REDIS_HOST" redis_port: "os.environ/REDIS_PORT" - redis_password: "os.environ/REDIS_PASSWORD" \ No newline at end of file + redis_password: "os.environ/REDIS_PASSWORD" +litellm_settings: + callbacks: litellm.custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 7b1abcdc6828..a93b24d234a3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,6 @@ +langchain_huggingface +gptcache +langchain_community # LITELLM PROXY DEPENDENCIES # anyio==4.4.0 # openai + http req. httpx==0.27.0 # Pin Httpx dependency @@ -38,7 +41,7 @@ cryptography==42.0.7 python-dotenv==1.0.0 # for env tiktoken==0.7.0 # for calculating usage importlib-metadata==6.8.0 # for random utils -tokenizers==0.14.0 # for calculating usage +tokenizers # for calculating usage click==8.1.7 # for proxy cli jinja2==3.1.4 # for prompt templates certifi==2024.7.4 # [TODO] clean up @@ -48,4 +51,4 @@ tenacity==8.2.3 # for retrying requests, when litellm.num_retries set pydantic==2.7.1 # proxy + openai req. jsonschema==4.22.0 # validating json schema websockets==10.4 # for realtime API -#### \ No newline at end of file +####