Add experimental support for GPTQ models (#50)

Signed-off-by: Hung-Han (Henry) Chen <chenhungh@gmail.com>

Add experimental support for GPTQ models (#50)
28d515db · Henry Chen · GitHub · c1fa9ba3 · 28d515db · 28d515db
Unverified Commit 28d515db authored 1 year ago by Henry Chen Committed by GitHub 1 year ago
--- a/.github/workflows/gptq_image.yaml
+++ b/.github/workflows/gptq_image.yaml
+name: Build and Push GPTQ Image to Github Container Registry
+on:
+  push:
+    branches:
+      - main
+    paths:
+    - '**.py'
+    - 'requirements.txt'
+    - 'Dockerfile.gptq'
+    - '.github/workflows/gptq_image.yaml'
+env:
+  REGISTRY: ghcr.io
+  GPTQ_IMAGE_NAME: ialacol-gptq
+jobs:
+  gptq_image_to_gcr:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - uses: docker/login-action@v2
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v4
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.GPTQ_IMAGE_NAME }}
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          file: ./Dockerfile.gptq
+          push: true
+          tags: |
+            ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.GPTQ_IMAGE_NAME }}:${{ github.sha }}
+            ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.GPTQ_IMAGE_NAME }}:latest
+          labels: ${{ steps.meta.outputs.labels }}
--- a/Dockerfile.gptq
+++ b/Dockerfile.gptq
+# syntax=docker/dockerfile:1
+FROM python:3.11-slim
+WORKDIR /app
+COPY requirements.txt requirements.txt
+RUN pip3 install -r requirements.txt
+# https://github.com/marella/ctransformers#gptq
+RUN pip3 install ctransformers[gptq]
+COPY . .
+EXPOSE 8000
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/README.md
+++ b/README.md
@@ -103,6 +103,24 @@ For example
 helm install llama2-7b-chat-metal ialacol/ialacol -f examples/values/llama2-7b-chat-metal.yaml.yaml
 ```
+### GPTQ
+To use GPTQ, you must
+- `deployment.image` = `ghcr.io/chenhunghan/ialacol-gptq:latest`
+- `deployment.env.MODEL_TYPE` = `gptq`
+For example
+```sh
+helm install llama2-7b-chat-gptq ialacol/ialacol -f examples/values/llama2-7b-chat-gptq.yaml.yaml
+```
+```sh
+kubectl port-forward svc/llama2-7b-chat-gptq 8000:8000
+openai -k "sk-fake" -b http://localhost:8000/v1 -vvvvv api chat_completions.create -m gptq_model-4bit-128g.safetensors -g user "Hello world!"
+```
 ## Tips
 ### Creative v.s. Conservative

--- a/charts/ialacol/Chart.yaml
+++ b/charts/ialacol/Chart.yaml
 apiVersion: v2
-appVersion: 0.8.0
+appVersion: 0.9.0
 description: A Helm chart for ialacol
 name: ialacol
 type: application
-version: 0.8.0
+version: 0.9.0
--- a/examples/values/llama2-7b-chat-gptq.yaml
+++ b/examples/values/llama2-7b-chat-gptq.yaml
+replicas: 1
+deployment:
+  image: ghcr.io/chenhunghan/ialacol-gptq:latest
+  env:
+    DEFAULT_MODEL_HG_REPO_ID: TheBloke/Llama-2-7b-Chat-GPTQ
+    DEFAULT_MODEL_FILE: gptq_model-4bit-128g.safetensors
+    MODEL_TYPE: "gptq"
+resources:
+  {}
+cache:
+  persistence:
+    size: 5Gi
+    accessModes:
+      - ReadWriteOnce
+    storageClassName: ~
+cacheMountPath: /app/cache
+model:
+  persistence:
+    size: 5Gi
+    accessModes:
+      - ReadWriteOnce
+    storageClassName: ~
+modelMountPath: /app/models
+service:
+  type: ClusterIP
+  port: 8000
+  annotations: {}
+nodeSelector: {}
+tolerations: []
+affinity: {}