From 91da3da69bef8b7f03141f852d45a942bb8627fb Mon Sep 17 00:00:00 2001 From: Manvenddra Rawat Date: Fri, 27 Oct 2023 10:24:12 +0530 Subject: [PATCH] Added torch-gpu Dockerfile in packaging/dockers (#2531) * Added torch-gpu Dockerfile in packaging/dockers Signed-off-by: Manvenddra Rawat --- packaging/dockers/Dockerfile.torch_gpu | 74 +++++++++++++ packaging/dockers/README.md | 137 +++++++++++++++++++++++++ 2 files changed, 211 insertions(+) create mode 100644 packaging/dockers/Dockerfile.torch_gpu create mode 100644 packaging/dockers/README.md diff --git a/packaging/dockers/Dockerfile.torch_gpu b/packaging/dockers/Dockerfile.torch_gpu new file mode 100644 index 00000000000..a78fff18177 --- /dev/null +++ b/packaging/dockers/Dockerfile.torch_gpu @@ -0,0 +1,74 @@ +FROM docker-registry.qualcomm.com/library/nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04 + +ARG DEBIAN_FRONTEND=noninteractive +ARG APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn + +RUN mv /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/cuda.list.orig && \ + apt-get update > /dev/null && \ + apt-get install -y --no-install-recommends apt-utils && \ + apt-key del --no-tty 7fa2af80 && \ + apt-key adv --no-tty --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \ + apt-key adv --no-tty --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub && \ + apt-get update > /dev/null && \ + rm -rf /var/lib/apt/lists/* + +RUN apt-get update > /dev/null && \ + apt-get install --no-install-recommends -y \ + # Bare minimum Packages + ca-certificates \ + git \ + ssh \ + sudo \ + wget \ + xterm \ + xauth > /dev/null && \ + rm -rf /var/lib/apt/lists/* + +# Add sudo support +RUN echo "%users ALL = (ALL) NOPASSWD: ALL" >> /etc/sudoers + +RUN apt-get update -y > /dev/null && \ + apt-get install --no-install-recommends -y \ + python3.8 \ + python3-pip && \ + rm -rf /var/lib/apt/lists/* + +# Register the version in alternatives +RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1 +# Set python 3.8 as the default python +RUN update-alternatives --set python3 /usr/bin/python3.8 + +# Upgrade Python3 pip +RUN python3 -m pip --no-cache-dir install --upgrade pip + +EXPOSE 25000 +RUN apt-get update && apt-get install -y openssh-server && rm -rf /var/lib/apt/lists/* +RUN mkdir /var/run/sshd + +RUN apt-get update && apt-get install -y liblapacke liblapacke-dev && rm -rf /var/lib/apt/lists/* + +RUN apt-get update && apt-get install -y libjpeg8-dev && \ + rm -rf /var/lib/apt/lists/* + +# Set up symlink to point to the correct python version +RUN ln -sf /usr/bin/python3.8 /usr/bin/python +RUN ln -s /usr/lib/x86_64-linux-gnu/libjpeg.so /usr/lib + +RUN sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \ + sed -i 's/Port 22/Port 25000/' /etc/ssh/sshd_config + +# SSH login fix. Otherwise user is kicked off after login +RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd + +# upgrade pip +RUN python3 -m pip --no-cache-dir install --upgrade pip + +# Install the AIMET package wheel files +COPY *.whl /tmp/ +RUN cd /tmp && python3 -m pip install *.whl -f https://download.pytorch.org/whl/torch_stable.html && rm -f /tmp/*.whl + +# Remove onnxruntime install in order to fix onnxruntime-gpu +RUN export ONNXRUNTIME_VER=$(python3 -c 'import onnxruntime; print(onnxruntime.__version__)') && \ + python3 -m pip uninstall -y onnxruntime && \ + python3 -m pip --no-cache-dir install onnxruntime-gpu==$ONNXRUNTIME_VER + diff --git a/packaging/dockers/README.md b/packaging/dockers/README.md new file mode 100644 index 00000000000..d2085e09b3d --- /dev/null +++ b/packaging/dockers/README.md @@ -0,0 +1,137 @@ +AIMET Docker creation +===================== + +This page provides instructions to build a docker image with AIMET packages and start the development docker container. + +Setup workspace +--------------- + +```console +WORKSPACE="" +mkdir $WORKSPACE && cd $WORKSPACE +git clone https://github.com/quic/aimet.git +cd aimet/packaging/dockers +``` + +Make sure no wheel file is present in present working directory +```console +rm -rf *.whl +``` + +Set variant +------------ + +Set the ** to ONE of the following depending on your desired variant + +* For the PyTorch 1.13 GPU variant, use **torch_gpu** +* For the PyTorch 1.13 CPU variant, use **torch_cpu** +* For the PyTorch 1.9 GPU variant, use **torch_gpu_pt19** +* For the PyTorch 1.9 CPU variant, use **torch_cpu_pt19** +* For the TensorFlow GPU variant, use **tf_gpu** +* For the TensorFlow CPU variant, use **tf_cpu** +* For the ONNX GPU variant, use **onnx_gpu** +* For the ONNX CPU variant, use **onnx_cpu** + +```console +export AIMET_VARIANT= +``` + +Download AIMET packages +------------------------ + +Go to https://github.com/quic/aimet/releases and identify the release tag of the package you want to install. + + +Replace in the steps below with the appropriate tag: + +```console +export release_tag= +``` + +Set the package download URL as follows: + +```console +export download_url="https://github.com/quic/aimet/releases/download/${release_tag}" +``` + +Set the common suffix for the package files as follows: + +```console +export wheel_file_suffix="cp38-cp38-linux_x86_64.whl" +``` + +Download the AIMET packages in the order specified below: + +```console +wget ${download_url}/AimetCommon-${AIMET_VARIANT}_${release_tag}-${wheel_file_suffix} + + +# Download ONE of the following depending on the variant +wget ${download_url}/AimetTorch-${AIMET_VARIANT}_${release_tag}-${wheel_file_suffix} + +# OR + +wget ${download_url}/AimetTensorflow-${AIMET_VARIANT}_${release_tag}-${wheel_file_suffix} + +# OR + +wget ${download_url}/AimetOnnx-${AIMET_VARIANT}_${release_tag}-${wheel_file_suffix} + + +wget ${download_url}/Aimet-${AIMET_VARIANT}_${release_tag}-${wheel_file_suffix} +``` + +Build docker image +------------------ + +Follow these instructions in order to build the docker image locally. If not, skip to the next section. + +```console +docker_image_name="aimet-prod-docker-${AIMET_VARIANT}:" +docker_container_name="aimet-prod-${AIMET_VARIANT}-" + +docker build -t ${docker_image_name} -f Dockerfile.${AIMET_VARIANT} . +``` + +**NOTE:** Feel free to modify the *docker_image_name* and *docker_container_name* as needed. + +Start docker container +----------------------- + +Ensure that a docker named *$docker_container_name* is not already running; otherwise remove the existing container and then start a new container as follows: + +```console +docker ps -a | grep ${docker_container_name} && docker kill ${docker_container_name} + +docker run --rm -it -u $(id -u ${USER}):$(id -g ${USER}) \ +-v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \ +-v ${HOME}:${HOME} -v ${WORKSPACE}:${WORKSPACE} \ +-v "/local/mnt/workspace":"/local/mnt/workspace" \ +--entrypoint /bin/bash -w ${WORKSPACE} --hostname ${docker_container_name} ${docker_image_name} +``` + +**NOTE:** +* Feel free to modify the above *docker run* command based on the environment and filesystem on your host machine. +* If nvidia-docker 2.0 is installed, then add *--gpus all* to the *docker run* commands in order to enable GPU access inside the docker container. +* If nvidia-docker 1.0 is installed, then replace *docker run* with *nvidia-docker run* in order to enable GPU access inside the docker container. +* Port forwarding needs to be done in order to run the Visualization APIs from docker container. This can be achieved by running the docker container as follows: + +```console + +port_id="" + +docker run -p ${port_id}:${port_id} --rm -it -u $(id -u ${USER}):$(id -g ${USER}) \ +-v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \ +-v ${HOME}:${HOME} -v ${WORKSPACE}:${WORKSPACE} \ +-v "/local/mnt/workspace":"/local/mnt/workspace" \ +--entrypoint /bin/bash -w ${WORKSPACE} --hostname ${docker_container_name} ${docker_image_name} +``` + +Environment setup +------------------ + +Set the common environment variables as follows: + +```console +source /usr/local/lib/python3.8/dist-packages/aimet_common/bin/envsetup.sh +```