From c1fa9ba3046e5ba85cc7d79f60f8cf5dd4aa451b Mon Sep 17 00:00:00 2001
From: Henry Chen <1474479+chenhunghan@users.noreply.github.com>
Date: Wed, 9 Aug 2023 19:32:21 +0300
Subject: [PATCH] Add experimental metal support (#49)

Signed-off-by: Hung-Han (Henry) Chen <chenhungh@gmail.com>
---
 .github/workflows/metal_image.yaml        | 46 +++++++++++++++++++++++
 Dockerfile.metal                          | 11 ++++++
 README.md                                 | 12 ++++++
 charts/ialacol/Chart.yaml                 |  4 +-
 examples/values/llama2-7b-chat-metal.yaml | 29 ++++++++++++++
 5 files changed, 100 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/metal_image.yaml
 create mode 100644 Dockerfile.metal
 create mode 100644 examples/values/llama2-7b-chat-metal.yaml

diff --git a/.github/workflows/metal_image.yaml b/.github/workflows/metal_image.yaml
new file mode 100644
index 0000000..81a1cf4
--- /dev/null
+++ b/.github/workflows/metal_image.yaml
@@ -0,0 +1,46 @@
+name: Build and Push Metal Image to Github Container Registry
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+    - '**.py'
+    - 'requirements.txt'
+    - 'Dockerfile.metal'
+    - '.github/workflows/metal_image.yaml'
+
+env:
+  REGISTRY: ghcr.io
+  METAL_IMAGE_NAME: ialacol-metal
+jobs:
+  metal_image_to_gcr:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - uses: docker/login-action@v2
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v4
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.METAL_IMAGE_NAME }}
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          file: ./Dockerfile.metal
+          push: true
+          tags: |
+            ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.METAL_IMAGE_NAME }}:${{ github.sha }}
+            ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.METAL_IMAGE_NAME }}:latest
+          labels: ${{ steps.meta.outputs.labels }}
diff --git a/Dockerfile.metal b/Dockerfile.metal
new file mode 100644
index 0000000..1257b29
--- /dev/null
+++ b/Dockerfile.metal
@@ -0,0 +1,11 @@
+# syntax=docker/dockerfile:1
+
+FROM python:3.11-slim
+WORKDIR /app
+COPY requirements.txt requirements.txt
+RUN pip3 install -r requirements.txt
+# https://github.com/marella/ctransformers#metal
+RUN CT_METAL=1 pip3 install ctransformers --no-binary ctransformers
+COPY . .
+EXPOSE 8000
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/README.md b/README.md
index 32f23b7..876c20d 100644
--- a/README.md
+++ b/README.md
@@ -91,6 +91,18 @@ If you see `CUDA driver version is insufficient for CUDA runtime version` when m
 
 Upgrade the driver manually on the node (See [here](https://github.com/awslabs/amazon-eks-ami/issues/1060) if you are using CUDA11 + AMI). Or try different version of CUDA.
 
+### Metal
+
+To enable Metal support, use the image `ialacol-metal` built for metal.
+
+- `deployment.image` = `ghcr.io/chenhunghan/ialacol-metal:latest`
+
+For example
+
+```sh
+helm install llama2-7b-chat-metal ialacol/ialacol -f examples/values/llama2-7b-chat-metal.yaml.yaml
+```
+
 ## Tips
 
 ### Creative v.s. Conservative
diff --git a/charts/ialacol/Chart.yaml b/charts/ialacol/Chart.yaml
index 0350174..316ae40 100644
--- a/charts/ialacol/Chart.yaml
+++ b/charts/ialacol/Chart.yaml
@@ -1,6 +1,6 @@
 apiVersion: v2
-appVersion: 0.7.2
+appVersion: 0.8.0
 description: A Helm chart for ialacol
 name: ialacol
 type: application
-version: 0.7.3
+version: 0.8.0
diff --git a/examples/values/llama2-7b-chat-metal.yaml b/examples/values/llama2-7b-chat-metal.yaml
new file mode 100644
index 0000000..82c0854
--- /dev/null
+++ b/examples/values/llama2-7b-chat-metal.yaml
@@ -0,0 +1,29 @@
+replicas: 1
+deployment:
+  image: ghcr.io/chenhunghan/ialacol-metal:latest
+  env:
+    DEFAULT_MODEL_HG_REPO_ID: TheBloke/Llama-2-7B-Chat-GGML
+    DEFAULT_MODEL_FILE: llama-2-7b-chat.ggmlv3.q4_0.bin
+resources:
+  {}
+cache:
+  persistence:
+    size: 5Gi
+    accessModes:
+      - ReadWriteOnce
+    storageClassName: ~
+cacheMountPath: /app/cache
+model:
+  persistence:
+    size: 5Gi
+    accessModes:
+      - ReadWriteOnce
+    storageClassName: ~
+modelMountPath: /app/models
+service:
+  type: ClusterIP
+  port: 8000
+  annotations: {}
+nodeSelector: {}
+tolerations: []
+affinity: {}
-- 
GitLab