diff --git a/.github/workflows/docker_publish.yml b/.github/workflows/docker_publish.yml new file mode 100644 index 000000000..520045d86 --- /dev/null +++ b/.github/workflows/docker_publish.yml @@ -0,0 +1,91 @@ +# Check this guide for more information about publishing to ghcr.io with GitHub Actions: +# https://docs.github.com/en/packages/managing-github-packages-using-github-actions-workflows/publishing-and-installing-a-package-with-github-actions#upgrading-a-workflow-that-accesses-ghcrio + +# Build the Docker image and push it to the registry +name: docker_publish + +on: + # Trigger the workflow on tags push that match the pattern v*, for example v1.0.0 + push: + tags: + - "v*" + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + # Only run this job on tags + docker-tag: + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags/') + + # Sets the permissions granted to the GITHUB_TOKEN for the actions in this job. + permissions: + contents: read + packages: write + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: true + + # We require additional space due to the large size of our image. (~10GB) + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + tool-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: true + swap-storage: true + + - name: Docker meta:${{ github.ref_name }} + id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/${{ github.repository_owner }}/kohya-ss-gui + flavor: | + latest=auto + prefix= + suffix= + # https://github.com/docker/metadata-action/tree/v5/?tab=readme-ov-file#tags-input + tags: | + type=semver,pattern=v{{major}} + type=semver,pattern={{raw}} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + # You may need to manage write and read access of GitHub Actions for repositories in the container settings. + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push + uses: docker/build-push-action@v5 + id: publish + with: + context: . + file: ./Dockerfile + push: true + target: final + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + build-args: | + VERSION=${{ github.ref_name }} + RELEASE=${{ github.run_number }} + platforms: linux/amd64 + # Cache to regietry instead of gha to avoid the capacity limit. + cache-from: type=registry,ref=ghcr.io/${{ github.repository_owner }}/kohya-ss-gui:cache + cache-to: type=registry,ref=ghcr.io/${{ github.repository_owner }}/kohya-ss-gui:cache,mode=max + sbom: true + provenance: true diff --git a/Dockerfile b/Dockerfile index 0ff872a34..6607b5c25 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,15 +22,13 @@ RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/v apt-get update && apt-get upgrade -y && \ apt-get install -y --no-install-recommends python3-launchpadlib git curl -# Install PyTorch and TensorFlow +# Install PyTorch # The versions must align and be in sync with the requirements_linux_docker.txt # hadolint ignore=SC2102 RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \ pip install -U --extra-index-url https://download.pytorch.org/whl/cu121 --extra-index-url https://pypi.nvidia.com \ torch==2.1.2 torchvision==0.16.2 \ xformers==0.0.23.post1 \ - # Why [and-cuda]: https://github.com/tensorflow/tensorflow/issues/61468#issuecomment-1759462485 - tensorflow[and-cuda]==2.15.0.post1 \ ninja \ pip setuptools wheel @@ -114,14 +112,17 @@ ENV PYTHONPATH="${PYTHONPATH}:/home/$UID/.local/lib/python3.10/site-packages" ENV LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" ENV LD_PRELOAD=libtcmalloc.so ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +# Rich logging +# https://rich.readthedocs.io/en/stable/console.html#interactive-mode +ENV FORCE_COLOR="true" +ENV COLUMNS="100" WORKDIR /app VOLUME [ "/dataset" ] # 7860: Kohya GUI -# 6006: TensorBoard -EXPOSE 7860 6006 +EXPOSE 7860 USER $UID diff --git a/README.md b/README.md index b6b71f9e6..f7d35906f 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,9 @@ The GUI allows you to set the training parameters and generate and run the requi - [Manual installation](#manual-installation) - [Pre-built Runpod template](#pre-built-runpod-template) - [Docker](#docker) + - [Get your Docker ready for GPU support](#get-your-docker-ready-for-gpu-support) + - [Design of our Dockerfile](#design-of-our-dockerfile) + - [Use the pre-built Docker image](#use-the-pre-built-docker-image) - [Local docker build](#local-docker-build) - [ashleykleynhans runpod docker builds](#ashleykleynhans-runpod-docker-builds) - [Upgrading](#upgrading) @@ -229,34 +232,67 @@ To run from a pre-built Runpod template, you can: ### Docker -#### Local docker build +#### Get your Docker ready for GPU support -If you prefer to use Docker, follow the instructions below: +##### Windows -1. Ensure that you have Git and Docker installed on your Windows or Linux system. +Once you have installed [**Docker Desktop**](https://www.docker.com/products/docker-desktop/), [**CUDA Toolkit**](https://developer.nvidia.com/cuda-downloads), [**NVIDIA Windows Driver**](https://www.nvidia.com.tw/Download/index.aspx), and ensured that your Docker is running with [**WSL2**](https://docs.docker.com/desktop/wsl/#turn-on-docker-desktop-wsl-2), you are ready to go. -2. Open your OS shell (Command Prompt or Terminal) and run the following commands: +Here is the official documentation for further reference. + + - ```bash - git clone --recursive https://github.com/bmaltais/kohya_ss.git - cd kohya_ss - docker compose up -d --build - ``` +##### Linux, OSX + +Install an NVIDIA GPU Driver if you do not already have one installed. + + +Install the NVIDIA Container Toolkit with this guide. + + +#### Design of our Dockerfile + +- It is required that all training data is stored in the `dataset` subdirectory, which is mounted into the container at `/dataset`. +- Please note that the file picker functionality is not available. Instead, you will need to manually input the folder path and configuration file path. +- TensorBoard has been separated from the project. + - TensorBoard is not included in the Docker image. + - The "Start TensorBoard" button has been hidden. + - TensorBoard is launched from a distinct container [as shown here](/docker-compose.yaml#L41). +- The browser won't be launched automatically. You will need to manually open the browser and navigate to [http://localhost:7860/](http://localhost:7860/) and [http://localhost:6006/](http://localhost:6006/) +- This Dockerfile has been designed to be easily disposable. You can discard the container at any time and restart it with the new code version. + +#### Use the pre-built Docker image - Note: The initial run may take up to 20 minutes to complete. +```bash +git clone https://github.com/bmaltais/kohya_ss.git +cd kohya_ss +docker compose up -d +``` + +To update the system, do `docker compose down && docker compose up -d --pull always` + +#### Local docker build + +> [!IMPORTANT] +> Clone the Git repository ***recursively*** to include submodules: +> `git clone --recursive https://github.com/bmaltais/kohya_ss.git` + +```bash +git clone --recursive https://github.com/bmaltais/kohya_ss.git +cd kohya_ss +docker compose up -d --build +``` - Please be aware of the following limitations when using Docker: +> [!NOTE] +> Building the image may take up to 20 minutes to complete. - - All training data must be placed in the `dataset` subdirectory, as the Docker container cannot access files from other directories. - - The file picker feature is not functional. You need to manually set the folder path and config file path. - - Dialogs may not work as expected, and it is recommended to use unique file names to avoid conflicts. - - This Dockerfile has been designed to be easily disposable. You can discard the container at any time and docker build it with a new version of the code. To update the system, run update scripts outside of Docker and rebuild using `docker compose down && docker compose up -d --build`. +To update the system, ***checkout to the new code version*** and rebuild using `docker compose down && docker compose up -d --build --pull always` - If you are running Linux, an alternative Docker container port with fewer limitations is available [here](https://github.com/P2Enjoy/kohya_ss-docker). +> If you are running on Linux, an alternative Docker container port with fewer limitations is available [here](https://github.com/P2Enjoy/kohya_ss-docker). #### ashleykleynhans runpod docker builds -You may want to use the following Dockerfile repositories to build the images: +You may want to use the following repositories when running on runpod: - Standalone Kohya_ss template: - Auto1111 + Kohya_ss GUI template: diff --git a/docker-compose.yaml b/docker-compose.yaml index e042f2670..ddd20f227 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,17 +1,18 @@ -version: "3.8" - services: kohya-ss-gui: container_name: kohya-ss-gui - image: kohya-ss-gui:latest + image: ghcr.io/bmaltais/kohya-ss-gui:latest user: 1000:0 build: context: . args: - UID=1000 + cache_from: + - ghcr.io/bmaltais/kohya-ss-gui:cache + cache_to: + - type=inline ports: - 7860:7860 - - 6006:6006 environment: SAFETENSORS_FAST_GPU: 1 tmpfs: @@ -35,4 +36,20 @@ services: devices: - driver: nvidia capabilities: [gpu] - device_ids: ['all'] + device_ids: ["all"] + + tensorboard: + container_name: tensorboard + image: tensorflow/tensorflow:latest-gpu + ports: + - 6006:6006 + volumes: + - ./dataset/logs:/app/logs + command: tensorboard --logdir=/app/logs --bind_all + deploy: + resources: + reservations: + devices: + - driver: nvidia + capabilities: [gpu] + device_ids: ["all"] diff --git a/requirements_linux_docker.txt b/requirements_linux_docker.txt index d818d846f..779ed6d8b 100644 --- a/requirements_linux_docker.txt +++ b/requirements_linux_docker.txt @@ -1,5 +1,4 @@ xformers>=0.0.20 bitsandbytes==0.43.0 accelerate==0.25.0 -tensorboard==2.15.2 -tensorflow==2.15.0.post1 \ No newline at end of file +tensorboard \ No newline at end of file