oc apply -k deploy
OLLAMA_HOST=http://$(oc get route -n ollama --output=custom-columns=':.spec.host' --no-headers)
echo ${OLLAMA_HOST}
Pull and test the minilm
model. It it used to convert words to vectors.
curl -sL ${OLLAMA_HOST}/api/pull -d '{"name": "all-minilm"}'
curl -sL ${OLLAMA_HOST}/api/embed -d '{ "model": "all-minilm", "input": "hello" }'
Pull and test the granite3-dense:8b
large language model.
PROMPT="hello"
curl -sL ${OLLAMA_HOST}/api/pull -d '{"name": "granite3-dense:8b"}'
curl -sL ${OLLAMA_HOST}/api/generate -d '{"model": "granite3-dense:8b", "prompt": "'${PROMPT}'", "stream": false }' | jq .response
View available cached models.
curl ${OLLAMA_HOST}/api/tags | jq
cd ollama
podman-compose up
Run gradio chat client
OLLAMA_HOST=http://localhost:11434
python client/00-ollama-chat.py
Localhost (compose)