From 60077ff42946a65b99479f04f54f65a4364981c7 Mon Sep 17 00:00:00 2001
From: Adam Bouhenguel <adam@bouhenguel.com>
Date: Thu, 14 Dec 2023 16:00:45 -0500
Subject: [PATCH] llamafile 0.4

---
 Dockerfile | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 3705a44..9754c1e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -108,8 +108,11 @@ ENTRYPOINT ["/bin/sh", "-c", "exec \"$@\"", "sh", "/usr/bin/llamafile"]
 
 FROM cosmos-scratch as llamafile-gguf
 LABEL org.opencontainers.image.source https://github.com/ajbouh/cosmos
-ADD --checksum=sha256:c7151d48677e352e492731bd999d9d74c792fa1440715a858dbf3b92ee274abe --chmod=0755 https://github.com/Mozilla-Ocho/llamafile/releases/download/0.2.1/llamafile-main-0.2.1 /usr/bin/llamafile-main
-ADD --checksum=sha256:2b3c692e50d903cbf6ac3d8908f8394101b5be5f8a4573b472975fa8c9f09e68 --chmod=0755 https://github.com/Mozilla-Ocho/llamafile/releases/download/0.2.1/llamafile-server-0.2.1 /usr/bin/llamafile-server
+ARG LLAMAFILE_VERSION=0.4
+ARG LLAMAFILE_CHECKSUM=sha256:0751da4dacf8c71707ec57eb1f456dc4fc4959928ae7e25e3bbc505849227a9e
+ARG LLAMAFILE_SERVER_CHECKSUM=sha256:af25718508cbc6af659366058abb9c106bbdac8220babd8a2e2176b07cfd2544
+ADD --checksum=${LLAMAFILE_CHECKSUM} --chmod=0755 https://github.com/Mozilla-Ocho/llamafile/releases/download/${LLAMAFILE_VERSION}/llamafile-${LLAMAFILE_VERSION} /usr/bin/llamafile
+ADD --checksum=${LLAMAFILE_SERVER_CHECKSUM} --chmod=0755 https://github.com/Mozilla-Ocho/llamafile/releases/download/${LLAMAFILE_VERSION}/llamafile-server-${LLAMAFILE_VERSION} /usr/bin/llamafile-server
 ARG GGUF_URL
 ARG GGUF_CHECKSUM
 ADD --checksum=${GGUF_CHECKSUM} --chmod=0755 ${GGUF_URL} /model.gguf
@@ -117,13 +120,12 @@ EXPOSE 8080
 ENTRYPOINT ["/bin/sh", "-c", "exec \"$@\"", "sh", "/usr/bin/llamafile-server", "-m", "/model.gguf", "--port", "8080", "--host", "0.0.0.0", "--nobrowser"]
 
 FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 as devel-llamafile
-ADD --checksum=sha256:c7151d48677e352e492731bd999d9d74c792fa1440715a858dbf3b92ee274abe --chmod=0755 https://github.com/Mozilla-Ocho/llamafile/releases/download/0.2.1/llamafile-main-0.2.1 /usr/bin/llamafile-main
-ADD --checksum=sha256:2b3c692e50d903cbf6ac3d8908f8394101b5be5f8a4573b472975fa8c9f09e68 --chmod=0755 https://github.com/Mozilla-Ocho/llamafile/releases/download/0.2.1/llamafile-server-0.2.1 /usr/bin/llamafile-server
+COPY --from=llamafile-gguf /usr/bin/llamafile /usr/bin/llamafile-server /usr/bin/
 # HACK we need to assimilate so this can run on github actions...
 COPY --from=unpack-cosmos /usr/bin/assimilate /usr/bin/
-RUN /usr/bin/assimilate -c /usr/bin/llamafile-main
+RUN /usr/bin/assimilate -c /usr/bin/llamafile
 # HACK get llamafile to build stubs we can use at runtime. would be better to use a "only compile stubs" entrypoint
-RUN (/usr/bin/llamafile-main -m /dev/null --n-gpu-layers 1 || true) \
+RUN (/usr/bin/llamafile -m /dev/null --n-gpu-layers 1 || true) \
   && [ -e /root/.cosmo ] && [ -e /root/.llamafile ]
 
 FROM cosmos-scratch as llamafile-cuda-scratch
@@ -153,8 +155,7 @@ ENTRYPOINT ["/bin/sh", "-c", "exec \"$@\" --n-gpu-layers $LLAMAFILE_N_GPU_LAYERS
 
 FROM llamafile-cuda-scratch as llamafile-gguf-cuda
 LABEL org.opencontainers.image.source https://github.com/ajbouh/cosmos
-ADD --checksum=sha256:c7151d48677e352e492731bd999d9d74c792fa1440715a858dbf3b92ee274abe --chmod=0755 https://github.com/Mozilla-Ocho/llamafile/releases/download/0.2.1/llamafile-main-0.2.1 /usr/bin/llamafile-main
-ADD --checksum=sha256:2b3c692e50d903cbf6ac3d8908f8394101b5be5f8a4573b472975fa8c9f09e68 --chmod=0755 https://github.com/Mozilla-Ocho/llamafile/releases/download/0.2.1/llamafile-server-0.2.1 /usr/bin/llamafile-server
+COPY --from=llamafile-gguf /usr/bin/llamafile /usr/bin/llamafile-server /usr/bin/
 ARG GGUF_URL
 ARG GGUF_CHECKSUM
 ADD --checksum=${GGUF_CHECKSUM} --chmod=0755 ${GGUF_URL} /model.gguf