diff --git a/.github/workflows/gptq_image.yaml b/.github/workflows/gptq_image.yaml new file mode 100644 index 0000000000000000000000000000000000000000..35c973c59a619e0b233c49e054ae233f67179858 --- /dev/null +++ b/.github/workflows/gptq_image.yaml @@ -0,0 +1,46 @@ +name: Build and Push GPTQ Image to Github Container Registry + +on: + push: + branches: + - main + paths: + - '**.py' + - 'requirements.txt' + - 'Dockerfile.gptq' + - '.github/workflows/gptq_image.yaml' + +env: + REGISTRY: ghcr.io + GPTQ_IMAGE_NAME: ialacol-gptq +jobs: + gptq_image_to_gcr: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + - uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v4 + with: + images: ${{ env.REGISTRY }}/${{ env.GPTQ_IMAGE_NAME }} + - name: Build and push Docker image + uses: docker/build-push-action@v4 + with: + context: . + file: ./Dockerfile.gptq + push: true + tags: | + ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.GPTQ_IMAGE_NAME }}:${{ github.sha }} + ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.GPTQ_IMAGE_NAME }}:latest + labels: ${{ steps.meta.outputs.labels }} diff --git a/Dockerfile.gptq b/Dockerfile.gptq new file mode 100644 index 0000000000000000000000000000000000000000..964a3b7dc57d9600640a60d5566a4367e099cc73 --- /dev/null +++ b/Dockerfile.gptq @@ -0,0 +1,11 @@ +# syntax=docker/dockerfile:1 + +FROM python:3.11-slim +WORKDIR /app +COPY requirements.txt requirements.txt +RUN pip3 install -r requirements.txt +# https://github.com/marella/ctransformers#gptq +RUN pip3 install ctransformers[gptq] +COPY . . +EXPOSE 8000 +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/README.md b/README.md index 876c20d6fcc4a53a596c6024ff778553988fd329..5e7a9d6f1b005c0440c1ce15e186c7a98c3a9839 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,24 @@ For example helm install llama2-7b-chat-metal ialacol/ialacol -f examples/values/llama2-7b-chat-metal.yaml.yaml ``` +### GPTQ + +To use GPTQ, you must + +- `deployment.image` = `ghcr.io/chenhunghan/ialacol-gptq:latest` +- `deployment.env.MODEL_TYPE` = `gptq` + +For example + +```sh +helm install llama2-7b-chat-gptq ialacol/ialacol -f examples/values/llama2-7b-chat-gptq.yaml.yaml +``` + +```sh +kubectl port-forward svc/llama2-7b-chat-gptq 8000:8000 +openai -k "sk-fake" -b http://localhost:8000/v1 -vvvvv api chat_completions.create -m gptq_model-4bit-128g.safetensors -g user "Hello world!" +``` + ## Tips ### Creative v.s. Conservative diff --git a/charts/ialacol/Chart.yaml b/charts/ialacol/Chart.yaml index 316ae40c241b4fc1503a9878973eaaa2fd01122e..736a142c132da9c97b1acea524ea3ef8972c245a 100644 --- a/charts/ialacol/Chart.yaml +++ b/charts/ialacol/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 -appVersion: 0.8.0 +appVersion: 0.9.0 description: A Helm chart for ialacol name: ialacol type: application -version: 0.8.0 +version: 0.9.0 diff --git a/examples/values/llama2-7b-chat-gptq.yaml b/examples/values/llama2-7b-chat-gptq.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c3184357438713d088090ff36861d362ee5fba2f --- /dev/null +++ b/examples/values/llama2-7b-chat-gptq.yaml @@ -0,0 +1,30 @@ +replicas: 1 +deployment: + image: ghcr.io/chenhunghan/ialacol-gptq:latest + env: + DEFAULT_MODEL_HG_REPO_ID: TheBloke/Llama-2-7b-Chat-GPTQ + DEFAULT_MODEL_FILE: gptq_model-4bit-128g.safetensors + MODEL_TYPE: "gptq" +resources: + {} +cache: + persistence: + size: 5Gi + accessModes: + - ReadWriteOnce + storageClassName: ~ +cacheMountPath: /app/cache +model: + persistence: + size: 5Gi + accessModes: + - ReadWriteOnce + storageClassName: ~ +modelMountPath: /app/models +service: + type: ClusterIP + port: 8000 + annotations: {} +nodeSelector: {} +tolerations: [] +affinity: {}