diff --git a/examples/values/falcon-40b.yaml b/examples/values/falcon-40b.yaml
index 8fed4f11a4bcfc9b220df63c967e32a0b008e3e8..1c309d350a57bbdf0ff4c3ec5a50adddcd5087d1 100644
--- a/examples/values/falcon-40b.yaml
+++ b/examples/values/falcon-40b.yaml
@@ -3,7 +3,7 @@ deployment:
image: quay.io/chenhunghan/ialacol:latest
env:
DEFAULT_MODEL_HG_REPO_ID: TheBloke/WizardLM-Uncensored-Falcon-40B-GGML
- DEFAULT_MODEL_FILE: wizard-falcon40b.ggmlv3.q4_K_S.bin
+ DEFAULT_MODEL_FILE: wizardlm-uncensored-falcon-40b.ggccv1.q4_0.bin
resources:
{}
cache:
diff --git a/examples/values/falcon-7b.yaml b/examples/values/falcon-7b.yaml
index 1b59128d62222beb673ef88b53fc4a2d99c63997..d7b8d4803a720cf4998ee05f6460fdfed5fd8a3b 100644
--- a/examples/values/falcon-7b.yaml
+++ b/examples/values/falcon-7b.yaml
@@ -3,7 +3,9 @@ deployment:
image: quay.io/chenhunghan/ialacol:latest
env:
DEFAULT_MODEL_HG_REPO_ID: TheBloke/WizardLM-Uncensored-Falcon-7B-GGML
- DEFAULT_MODEL_FILE: wizard-falcon-7b.ggmlv3.q4_1.bin
+ DEFAULT_MODEL_FILE: wizardlm-7b-uncensored.ggccv1.q4_0.bin
+ # the file name of the model file is missing `falcon` so we need to set the model type
+ MODE_TYPE: falcon
resources:
{}
cache:
diff --git a/get_llm.py b/get_llm.py
index eef6bd544bd7ee71955463ce244d96387e8cc366..f112c98aeb4fbfa8e4b4c23f19b98e2ac5f442eb 100644
--- a/get_llm.py
+++ b/get_llm.py
@@ -38,7 +38,10 @@ async def get_llm(
ctransformer_model_type = "dolly-v2"
if "stablelm" in body.model:
ctransformer_model_type = "gpt_neox"
-
+
+ MODE_TYPE = get_env("MODE_TYPE", "")
+ if len(MODE_TYPE) > 0:
+ ctransformer_model_type = MODE_TYPE
MODELS_FOLDER = get_env("MODELS_FOLDER", "models")
return AutoModelForCausalLM.from_pretrained(