From 720116132f2cefd95c02f008e89faaaa31395f1c Mon Sep 17 00:00:00 2001 From: Ishaan Sehgal Date: Fri, 2 Feb 2024 15:47:23 -0600 Subject: [PATCH] fix: Reduce model image sizes (#225) This PR proposes changing the base image from `nvcr.io/nvidia/pytorch:23.10-py3` to the smaller `python:3.8-slim`, aiming to reduce our container images sizes. I came to this conclusion because: 1. AKS Preinstallation: On each GPU node, AKS preinstalls NVIDIA Driver (which comes with basic CUDA runtime functionality). AKS also preinstalls nvidia-container-toolkit (which includes nvidia-container-runtime), and a couple other necessary nvidia libraries that can be verified on an AKS node using: ``` ls /usr/bin | grep nvidia ``` 2. NVIDIA Device Plugin for K8s: Enabled by default on AKS clusters with N-series node pools, this DaemonSet advertises GPU resources to the K8s scheduler, allowing pods to request and be allocated GPUs. It's important to note that this plugin doesn't install NVIDIA drivers, CUDA, or the NVIDIA Container Toolkit on the nodes; these components are preconfigured by AKS (part 1). 3. Container Runtime: AKS nodes come preinstalled with `nvidia-container-toolkit` which includes `nvidia-container-runtime`. This supports GPU passthrough to containers via containerd. I learned this configuration allows containers to access the necessary NVIDIA drivers and libraries at the host level, thereby removing the need to bundle these components within individual container images. This is confirmed via the container runtime configuration (`cat /etc/containerd/config.toml`). 4. PyTorch Installation: Learned PyTorch, when installed via pip, includes additional essential GPU acceleration libraries within its binaries automatically (CUDA, cuBLAS, cuDNN, NCCL, etc), eliminating the reliance on the nvcr.io/nvidia/pytorch image for these. 5. dockignore: Added to ignore .git lfs files which were taking too much space. Based on these findings, I learned the `python:3.8-slim` base image should suffice for our requirements. I have validated this working locally, with further testing planned for built images. Aside: I found the NVIDIA GPU Operator offers additional functionalities like DCGM metrics, runtime validation, and dynamic MIG profile management, though not required for our current needs. Sources: https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/microsoft-aks.html https://catalog.ngc.nvidia.com/orgs/nvidia/containers/gpu-operator https://github.com/Azure/aks-engine/blob/master/examples/addons/nvidia-device-plugin/README.md https://hub.docker.com/r/nvidia/cuda/tags?page=1&name=%25-base https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-10.html https://stackoverflow.com/questions/45293580/whats-the-relation-between-nvidia-driver-cuda-driver-and-cuda-toolkit https://earthly.dev/blog/buildingrunning-nvidiacontainer/ https://discuss.pytorch.org/t/how-to-check-if-torch-uses-cudnn/21933 https://discuss.pytorch.org/t/is-nvidia-driver-already-included-cuda-and-cuda-toolkit/184411 https://learn.microsoft.com/en-us/azure/aks/gpu-cluster?tabs=add-ubuntu-gpu-node-pool --- .github/workflows/e2e-preset-test.yml | 36 ++++++++++++++-- .../kind-cluster/determine_models.py | 3 +- .github/workflows/kind-cluster/main.py | 42 ++++++++++++------- docker/presets/llama-2/Dockerfile | 4 +- docker/presets/tfs/Dockerfile | 2 +- .../text-generation/requirements.txt | 2 +- 6 files changed, 66 insertions(+), 23 deletions(-) diff --git a/.github/workflows/e2e-preset-test.yml b/.github/workflows/e2e-preset-test.yml index 7c1f4526b..aa8253d06 100644 --- a/.github/workflows/e2e-preset-test.yml +++ b/.github/workflows/e2e-preset-test.yml @@ -276,13 +276,43 @@ jobs: } }' \ http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/generate - elif [[ "${{ matrix.model.name }}" == *"falcon"* ]]; then + else echo "Testing inference for ${{ matrix.model.name }}" curl -X POST \ -H "accept: application/json" \ -H "Content-Type: application/json" \ - -d '{"prompt":"Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:","max_length":200,"min_length":0,"do_sample":true,"early_stopping":false,"num_beams":1,"num_beam_groups":1,"diversity_penalty":0.0,"temperature":1.0,"top_k":10,"top_p":1,"typical_p":1,"repetition_penalty":1,"length_penalty":1,"no_repeat_ngram_size":0,"encoder_no_repeat_ngram_size":0,"bad_words_ids":null,"num_return_sequences":1,"output_scores":false,"return_dict_in_generate":false,"forced_bos_token_id":null,"forced_eos_token_id":null,"remove_invalid_values":null}' \ - http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/chat + -d '{ + "prompt":"Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:", + "return_full_text": false, + "clean_up_tokenization_spaces": false, + "prefix": null, + "handle_long_generation": null, + "generate_kwargs": { + "max_length":200, + "min_length":0, + "do_sample":true, + "early_stopping":false, + "num_beams":1, + "num_beam_groups":1, + "diversity_penalty":0.0, + "temperature":1.0, + "top_k":10, + "top_p":1, + "typical_p":1, + "repetition_penalty":1, + "length_penalty":1, + "no_repeat_ngram_size":0, + "encoder_no_repeat_ngram_size":0, + "bad_words_ids":null, + "num_return_sequences":1, + "output_scores":false, + "return_dict_in_generate":false, + "forced_bos_token_id":null, + "forced_eos_token_id":null, + "remove_invalid_values":null + } + }' \ + http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/chat fi - name: Move from Test to Prod ACR diff --git a/.github/workflows/kind-cluster/determine_models.py b/.github/workflows/kind-cluster/determine_models.py index 8309c4a55..537595055 100644 --- a/.github/workflows/kind-cluster/determine_models.py +++ b/.github/workflows/kind-cluster/determine_models.py @@ -105,7 +105,8 @@ def check_modified_models(pr_branch): run_command(f"git fetch origin {pr_branch}:{pr_branch}") run_command(f"git checkout {pr_branch}") - files = run_command("git diff --name-only origin/main") + files = run_command("git diff --name-only origin/main") # Returns each file on newline + files = files.split("\n") os.chdir(Path.cwd().parent) modified_models = models_to_build(files) diff --git a/.github/workflows/kind-cluster/main.py b/.github/workflows/kind-cluster/main.py index 2ce57231e..1868e0843 100644 --- a/.github/workflows/kind-cluster/main.py +++ b/.github/workflows/kind-cluster/main.py @@ -37,42 +37,54 @@ def run_command(command): def get_model_git_info(model_version): """Get model Git Repo link and commit ID""" url_parts = model_version.split('/') - model_url = '/'.join([url_parts[:-2]]) + model_url = '/'.join(url_parts[:-2]) commit_id = url_parts[-1] return model_url, commit_id -def update_model(model_name, model_commit): - """Using Git Update Model""" +def update_model(model_name, model_commit): + """Update the model to a specific commit, including LFS files.""" weights_path = get_weights_path(model_name) + git_files_path = os.path.join(weights_path, "..", "git_files", ".git") start_dir = os.getcwd() try: # Change to weights directory os.chdir(weights_path) - run_command("git checkout main") - run_command("git pull origin main") - run_command(f"git checkout {model_commit}") + # Allow current runner access to git dir + run_command(f"git config --global --add safe.directory {weights_path}") + run_command(f"git config --global --add safe.directory {git_files_path}") + + run_command(f"git --git-dir={git_files_path} checkout main") + run_command(f"git --git-dir={git_files_path} pull origin main") + # Checkout to the specific commit + run_command(f"git --git-dir={git_files_path} checkout {model_commit}") + # Pull LFS files for the checked-out commit + run_command(f"git --git-dir={git_files_path} lfs pull") + # Remove the cached .git/lfs directory to save space (Optimization) + # run_command(f"rm -rf {os.path.join(git_files_path, 'lfs')}") except Exception as e: print(f"An error occurred: {e}") - finally: + finally: # Change back to the original directory os.chdir(start_dir) def download_new_model(model_name, model_url): - """Given URL download new model""" + """Given URL download new model.""" weights_path = get_weights_path(model_name) + git_files_path = os.path.join(weights_path, "..", "git_files") # Path for git_files directory start_dir = os.getcwd() - # If a new model then download it - if not os.path.exists(weights_path) and model_url: + + if not os.path.exists(weights_path) and model_url: try: os.makedirs(weights_path, exist_ok=True) - # Change to weights directory os.chdir(weights_path) - # Clone the repo - run_command(f"git clone {model_url}") + run_command(f"git clone {model_url} .") + + # Create git_files directory and move .git there + os.makedirs(git_files_path, exist_ok=True) + shutil.move(os.path.join(weights_path, ".git"), git_files_path) except Exception as e: print(f"An error occurred: {e}") - finally: - # Change back to the original directory + finally: os.chdir(start_dir) def main(): diff --git a/docker/presets/llama-2/Dockerfile b/docker/presets/llama-2/Dockerfile index 2b3d21924..5d888cc29 100644 --- a/docker/presets/llama-2/Dockerfile +++ b/docker/presets/llama-2/Dockerfile @@ -4,7 +4,7 @@ # --build-arg VERSION={{VERSION}} \ # --build-arg MODEL_TYPE={{MODEL_TYPE}} \ -FROM nvcr.io/nvidia/pytorch:23.10-py3 +FROM python:3.8-slim WORKDIR /workspace RUN git clone https://github.com/facebookresearch/llama @@ -14,7 +14,7 @@ WORKDIR /workspace/llama RUN sed -i $'/torch.distributed.init_process_group("nccl")/c\\ import datetime\\\n torch.distributed.init_process_group("nccl", timeout=datetime.timedelta(days=365*100))' /workspace/llama/llama/generation.py RUN pip install -e . -RUN pip install fastapi pydantic gputil +RUN pip install torch==2.2.0 fastapi==0.103.2 pydantic==1.10.9 gputil==1.4.0 RUN pip install 'uvicorn[standard]' ARG WEIGHTS_PATH diff --git a/docker/presets/tfs/Dockerfile b/docker/presets/tfs/Dockerfile index 768f9726f..e5826027b 100644 --- a/docker/presets/tfs/Dockerfile +++ b/docker/presets/tfs/Dockerfile @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/pytorch:23.10-py3 +FROM python:3.8-slim ARG WEIGHTS_PATH ARG MODEL_TYPE diff --git a/presets/inference/text-generation/requirements.txt b/presets/inference/text-generation/requirements.txt index c97d0ca03..2e8a5e33e 100644 --- a/presets/inference/text-generation/requirements.txt +++ b/presets/inference/text-generation/requirements.txt @@ -1,6 +1,6 @@ # Dependencies for TFS transformers==4.36.0 -# torch==2.1.0a0+4136153 Already included in base image +torch==2.2.0 accelerate==0.23.0 fastapi==0.103.2 pydantic==1.10.9