Fixes for falcon model(s) (#40)

Signed-off-by: Hung-Han (Henry) Chen <chenhungh@gmail.com>

Fixes for falcon model(s) (#40)
f81a5d25 · Henry Chen · GitHub · e1bbb6e3 · f81a5d25 · f81a5d25
Unverified Commit f81a5d25 authored 1 year ago by Henry Chen Committed by GitHub 1 year ago
--- a/examples/values/falcon-40b.yaml
+++ b/examples/values/falcon-40b.yaml
@@ -3,7 +3,7 @@ deployment:
  image: quay.io/chenhunghan/ialacol:latest
  env:
    DEFAULT_MODEL_HG_REPO_ID: TheBloke/WizardLM-Uncensored-Falcon-40B-GGML
-    DEFAULT_MODEL_FILE: wizard-falcon40b.ggmlv3.q4_K_S.bin
+    DEFAULT_MODEL_FILE: wizardlm-uncensored-falcon-40b.ggccv1.q4_0.bin
 resources:
  {}
 cache:

--- a/examples/values/falcon-7b.yaml
+++ b/examples/values/falcon-7b.yaml
@@ -3,7 +3,9 @@ deployment:
  image: quay.io/chenhunghan/ialacol:latest
  env:
    DEFAULT_MODEL_HG_REPO_ID: TheBloke/WizardLM-Uncensored-Falcon-7B-GGML
-    DEFAULT_MODEL_FILE: wizard-falcon-7b.ggmlv3.q4_1.bin
+    DEFAULT_MODEL_FILE: wizardlm-7b-uncensored.ggccv1.q4_0.bin
+    # the file name of the model file is missing `falcon` so we need to set the model type
+    MODE_TYPE: falcon
 resources:
  {}
 cache:

--- a/get_llm.py
+++ b/get_llm.py
@@ -38,7 +38,10 @@ async def get_llm(
        ctransformer_model_type = "dolly-v2"
    if "stablelm" in body.model:
        ctransformer_model_type = "gpt_neox"
+    MODE_TYPE = get_env("MODE_TYPE", "")
+    if len(MODE_TYPE) > 0:
+        ctransformer_model_type = MODE_TYPE
    MODELS_FOLDER = get_env("MODELS_FOLDER", "models")
    return AutoModelForCausalLM.from_pretrained(