K8s AI Models
Ialacol-build

Repository

helm repo add ialacol https://chenhunghan.github.io/ialacol
helm repo update
helm install llama-2-7b-chat ialacol/ialacol
kubectl port-forward svc/llama-2-7b-chat 8000:8000
curl -X POST \
     -H 'Content-Type: application/json' \
     -d '{ "messages": [{"role": "user", "content": "How are you?"}], "model": "llama-2-7b-chat.ggmlv3.q4_0.bin", "stream": false}' \
     http://localhost:8000/v1/chat/completions
openai -k "sk-fake" \
     -b http://localhost:8000/v1 -vvvvv \
     api chat_completions.create -m llama-2-7b-chat.ggmlv3.q4_0.bin \
     -g user "Hello world!"
curl -X POST \
     -H 'Content-Type: application/json' \
     -d '{ "messages": [{"role": "user", "content": "Tell me a story."}], "model": "llama-2-7b-chat.ggmlv3.q4_0.bin", "stream": false, "temperature": "2", "top_p": "1.0", "top_k": "0" }' \
     http://localhost:8000/v1/chat/completions
docker run --rm -it -p 8000:8000 \
     -e DEFAULT_MODEL_HG_REPO_ID="TheBloke/Llama-2-7B-Chat-GGML" \
     -e DEFAULT_MODEL_FILE="llama-2-7b-chat.ggmlv3.q4_0.bin" \
     ghcr.io/chenhunghan/ialacol:latest
python3 -m venv .venv
source .venv/bin/activate
python3 -m pip install -r requirements.txt
DEFAULT_MODEL_HG_REPO_ID="TheBloke/stablecode-completion-alpha-3b-4k-GGML" DEFAULT_MODEL_FILE="stablecode-completion-alpha-3b-4k.ggmlv1.q4_0.bin" LOGGING_LEVEL="DEBUG" THREAD=4 uvicorn main:app --reload --host 0.0.0.0 --port 9999
docker build --file ./Dockerfile -t ialacol .
export DEFAULT_MODEL_HG_REPO_ID="TheBloke/orca_mini_3B-GGML"
export DEFAULT_MODEL_FILE="orca-mini-3b.ggmlv3.q4_0.bin"
docker run --rm -it -p 8000:8000 \
     -e DEFAULT_MODEL_HG_REPO_ID=$DEFAULT_MODEL_HG_REPO_ID \
     -e DEFAULT_MODEL_FILE=$DEFAULT_MODEL_FILE ialacol
helm install llama2-7b-chat-cuda12 ialacol/ialacol -f examples/values/llama2-7b-chat-cuda12.yaml
helm install starcoderplus-guanaco-cuda12 ialacol/ialacol -f examples/values/starcoderplus-guanaco-cuda12.yaml
helm install llama2-7b-chat-metal ialacol/ialacol -f examples/values/llama2-7b-chat-metal.yaml.yaml
helm install llama2-7b-chat-gptq ialacol/ialacol -f examples/values/llama2-7b-chat-gptq.yaml.yaml
kubectl port-forward svc/llama2-7b-chat-gptq 8000:8000
openai -k "sk-fake" -b http://localhost:8000/v1 -vvvvv api chat_completions.create -m gptq_model-4bit-128g.safetensors -g user "Hello world!"
gh repo clone chenhunghan/ialacol && cd ialacol && python3 -m venv .venv && source .venv/bin/activate && python3 -m pip install -r requirements.txt
LOGGING_LEVEL="DEBUG"
THREAD=2
DEFAULT_MODEL_HG_REPO_ID="TheBloke/stablecode-completion-alpha-3b-4k-GGML"
DEFAULT_MODEL_FILE="stablecode-completion-alpha-3b-4k.ggmlv1.q4_0.bin"
TRUNCATE_PROMPT_LENGTH=100 # optional
uvicorn main:app --host 0.0.0.0 --port 9998
uvicorn main:app --host 0.0.0.0 --port 9999
gh repo clone ialacol/text-inference-batcher && cd text-inference-batcher && npm install
UPSTREAMS="http://localhost:9998,http://localhost:9999" npm start
"github.copilot.advanced": {
     "debug.overrideEngine": "stablecode-completion-alpha-3b-4k.ggmlv1.q4_0.bin",
     "debug.testOverrideProxyUrl": "http://localhost:8000",
     "debug.overrideProxyUrl": "http://localhost:8000"
}
curl -X POST \
     -H 'Content-Type: application/json' \
     -d '{ "messages": [{"role": "user", "content": "Tell me a story."}], "model": "llama-2-7b-chat.ggmlv3.q4_0.bin", "stream": false, "temperature": "2", "top_p": "1.0", "top_k": "0" }' \
     http://localhost:8000/v1/chat/completions
curl -X POST \
     -H 'Content-Type: application/json' \
     -d '{ "messages": [{"role": "user", "content": "Tell me a story."}], "model": "llama-2-7b-chat.ggmlv3.q4_0.bin", "stream": false, "temperature": "0.1", "top_p": "0.1", "top_k": "40" }' \
     http://localhost:8000/v1/chat/completions
helm repo add ialacol https://chenhunghan.github.io/ialacol
helm repo update
helm install llama2-7b-chat ialacol/ialacol -f examples/values/llama2-7b-chat.yaml
helm repo add ialacol https://chenhunghan.github.io/ialacol
helm repo update
helm install llama2-13b-chat ialacol/ialacol -f examples/values/llama2-13b-chat.yaml
helm repo add ialacol https://chenhunghan.github.io/ialacol
helm repo update
helm install llama2-70b-chat ialacol/ialacol -f examples/values/llama2-70b-chat.yaml
helm repo add ialacol https://chenhunghan.github.io/ialacol
helm repo update
helm install openllama-7b ialacol/ialacol -f examples/values/openllama-7b.yaml
helm repo add ialacol https://chenhunghan.github.io/ialacol
helm repo update
helm install openllama-13b-instruct ialacol/ialacol -f examples/values/openllama-13b-instruct.yaml
helm repo add ialacol https://chenhunghan.github.io/ialacol
helm repo update
helm install mpt-7b ialacol/ialacol -f examples/values/mpt-7b.yaml
helm repo add ialacol https://chenhunghan.github.io/ialacol
helm repo update
helm install mpt-30b-chat ialacol/ialacol -f examples/values/mpt-30b-chat.yaml
helm repo add ialacol https://chenhunghan.github.io/ialacol
helm repo update
helm install falcon-7b ialacol/ialacol -f examples/values/falcon-7b.yaml
helm repo add ialacol https://chenhunghan.github.io/ialacol
helm repo update
helm install falcon-40b ialacol/ialacol -f examples/values/falcon-40b.yaml
helm repo add starchat https://chenhunghan.github.io/ialacol
helm repo update
helm install starchat-beta ialacol/ialacol -f examples/values/starchat-beta.yaml
helm repo add starchat https://chenhunghan.github.io/ialacol
helm repo update
helm install wizard-coder-15b ialacol/ialacol -f examples/values/wizard-coder-15b.yaml
helm repo add ialacol https://chenhunghan.github.io/ialacol
helm repo update
helm install pythia70m ialacol/ialacol -f examples/values/pythia-70m.yaml
helm repo add ialacol https://chenhunghan.github.io/ialacol
helm repo update
helm install redpajama-3b ialacol/ialacol -f examples/values/redpajama-3b.yaml
helm repo add ialacol https://chenhunghan.github.io/ialacol
helm repo update
helm install stablelm-7b ialacol/ialacol -f examples/values/stablelm-7b.yaml
python3 -m venv .venv
source .venv/bin/activate
python3 -m pip install -r requirements.txt
pip freeze > requirements.txt