diff --git a/README.md b/README.md
index 1c3439d9b701795e3bbb305d092a97f85a3d4d8a..1c518f76a81e4218684cadcccd82398c50b7faa9 100644
--- a/README.md
+++ b/README.md
@@ -86,13 +86,23 @@ To enable GPU/CUDA acceleration, you need to use the container image built for G
 - `deployment.image` = `ghcr.io/chenhunghan/ialacol-cuda12:latest`
 - `deployment.env.GPU_LAYERS` is the layer to off loading to GPU.
 
-For example
+Only `llama`, `falcon`, `mpt` and `gpt_bigcode`(StarCoder/StarChat) support CUDA.
+
+#### Llama with CUDA12
+
+```sh
+helm install llama2-7b-chat-cuda12 ialacol/ialacol -f examples/values/llama2-7b-chat-cuda12.yaml
+```
+
+Deploys llama2 7b model with 40 layers offloadind to GPU. The inference is accelerated by CUDA 12.
+
+#### StarCoderPlus with CUDA12
 
 ```sh
-helm install llama2-7b-chat-cuda11 ialacol/ialacol -f examples/values/llama2-7b-chat-cuda11.yaml
+helm install starcoderplus-guanaco-cuda12 ialacol/ialacol -f examples/values/starcoderplus-guanaco-cuda12.yaml
 ```
 
-Deploys llama2 7b model with 40 layers offloadind to GPU. The inference is accelerated by CUDA 11.
+Deploys [Starcoderplus-Guanaco-GPT4-15B-V1.0 model](https://huggingface.co/LoupGarou/Starcoderplus-Guanaco-GPT4-15B-V1.0) with 40 layers offloadind to GPU. The inference is accelerated by CUDA 12.
 
 ### CUDA Driver Issues
 
diff --git a/charts/ialacol/Chart.yaml b/charts/ialacol/Chart.yaml
index 736a142c132da9c97b1acea524ea3ef8972c245a..38c8bb01f8dc7f72804112532c79e7776d33ede6 100644
--- a/charts/ialacol/Chart.yaml
+++ b/charts/ialacol/Chart.yaml
@@ -1,6 +1,6 @@
 apiVersion: v2
-appVersion: 0.9.0
+appVersion: 0.10.0
 description: A Helm chart for ialacol
 name: ialacol
 type: application
-version: 0.9.0
+version: 0.10.0
diff --git a/charts/ialacol/values.yaml b/charts/ialacol/values.yaml
index 31519fb846dc992772910d8dd39fb66e910fd407..4e16a4a3c4e55d0ca9b315091ffde1cfdbd26b0f 100644
--- a/charts/ialacol/values.yaml
+++ b/charts/ialacol/values.yaml
@@ -2,7 +2,7 @@ replicas: 1
 
 deployment:
   image: quay.io/chenhunghan/ialacol:latest
-  # or use CUDA11 image `ghcr.io/chenhunghan/ialacol-cuda11:latest`
+  # or use CUDA image `ghcr.io/chenhunghan/ialacol-cuda12:latest`
   # env:
     # DEFAULT_MODEL_HG_REPO_ID: TheBloke/Llama-2-7B-Chat-GGML
     # DEFAULT_MODEL_FILE: llama-2-7b-chat.ggmlv3.q4_0.bin
diff --git a/examples/values/llama2-7b-chat-cuda11.yaml b/examples/values/llama2-7b-chat-cuda12.yaml
similarity index 91%
rename from examples/values/llama2-7b-chat-cuda11.yaml
rename to examples/values/llama2-7b-chat-cuda12.yaml
index 81dfe01ecd626aa63a0e54bafd2a75c635d1181f..5fb64da584f9caf1e57a9753aa8fe8119a4a1439 100644
--- a/examples/values/llama2-7b-chat-cuda11.yaml
+++ b/examples/values/llama2-7b-chat-cuda12.yaml
@@ -1,6 +1,6 @@
 replicas: 1
 deployment:
-  image: ghcr.io/chenhunghan/ialacol-cuda11:latest
+  image: ghcr.io/chenhunghan/ialacol-cuda12:latest
   env:
     DEFAULT_MODEL_HG_REPO_ID: TheBloke/Llama-2-7B-Chat-GGML
     DEFAULT_MODEL_FILE: llama-2-7b-chat.ggmlv3.q4_0.bin
diff --git a/examples/values/starcoderplus-guanaco-cuda12.yaml b/examples/values/starcoderplus-guanaco-cuda12.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..74ca5833f0ceced1cd3f4edcb0e1311ea782a71a
--- /dev/null
+++ b/examples/values/starcoderplus-guanaco-cuda12.yaml
@@ -0,0 +1,30 @@
+replicas: 1
+deployment:
+  image: quay.io/chenhunghan/ialacol-cuda12:latest
+  env:
+    DEFAULT_MODEL_HG_REPO_ID: TheBloke/Starcoderplus-Guanaco-GPT4-15B-V1.0-GGML
+    DEFAULT_MODEL_FILE: starcoderplus-guanaco-gpt4.ggmlv1.q4_0.bin
+    GPU_LAYERS: 40
+resources:
+  {}
+cache:
+  persistence:
+    size: 20Gi
+    accessModes:
+      - ReadWriteOnce
+    storageClassName: ~
+cacheMountPath: /app/cache
+model:
+  persistence:
+    size: 20Gi
+    accessModes:
+      - ReadWriteOnce
+    storageClassName: ~
+modelMountPath: /app/models
+service:
+  type: ClusterIP
+  port: 8000
+  annotations: {}
+nodeSelector: {}
+tolerations: []
+affinity: {}
diff --git a/get_llm.py b/get_llm.py
index b82a73d233721e64bd4cb388bd3b39237589ccff..6159ef51deea688e72670ad7fb9386bee3da6c00 100644
--- a/get_llm.py
+++ b/get_llm.py
@@ -25,7 +25,7 @@ async def get_llm(
         or "WizardCoder" in body.model
         or "minotaur-15" in body.model
     ):
-        ctransformer_model_type = "starcoder"
+        ctransformer_model_type = "gpt_bigcode"
     if "llama" in body.model:
         ctransformer_model_type = "llama"
     if "mpt" in body.model:
diff --git a/requirements.txt b/requirements.txt
index 8e9d76531253e000c571e335d9b60a729ea8a4c8..1c96282fffda11266fa46a13c75d68a5d5fb4199 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,7 +3,7 @@ blake3==0.3.3
 certifi==2023.7.22
 charset-normalizer==3.1.0
 click==8.1.3
-ctransformers==0.2.21
+ctransformers==0.2.22
 fastapi==0.95.2
 filelock==3.12.0
 fsspec==2023.5.0