From c1fa9ba3046e5ba85cc7d79f60f8cf5dd4aa451b Mon Sep 17 00:00:00 2001 From: Henry Chen <1474479+chenhunghan@users.noreply.github.com> Date: Wed, 9 Aug 2023 19:32:21 +0300 Subject: [PATCH] Add experimental metal support (#49) Signed-off-by: Hung-Han (Henry) Chen <chenhungh@gmail.com> --- .github/workflows/metal_image.yaml | 46 +++++++++++++++++++++++ Dockerfile.metal | 11 ++++++ README.md | 12 ++++++ charts/ialacol/Chart.yaml | 4 +- examples/values/llama2-7b-chat-metal.yaml | 29 ++++++++++++++ 5 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/metal_image.yaml create mode 100644 Dockerfile.metal create mode 100644 examples/values/llama2-7b-chat-metal.yaml diff --git a/.github/workflows/metal_image.yaml b/.github/workflows/metal_image.yaml new file mode 100644 index 0000000..81a1cf4 --- /dev/null +++ b/.github/workflows/metal_image.yaml @@ -0,0 +1,46 @@ +name: Build and Push Metal Image to Github Container Registry + +on: + push: + branches: + - main + paths: + - '**.py' + - 'requirements.txt' + - 'Dockerfile.metal' + - '.github/workflows/metal_image.yaml' + +env: + REGISTRY: ghcr.io + METAL_IMAGE_NAME: ialacol-metal +jobs: + metal_image_to_gcr: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + - uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v4 + with: + images: ${{ env.REGISTRY }}/${{ env.METAL_IMAGE_NAME }} + - name: Build and push Docker image + uses: docker/build-push-action@v4 + with: + context: . + file: ./Dockerfile.metal + push: true + tags: | + ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.METAL_IMAGE_NAME }}:${{ github.sha }} + ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.METAL_IMAGE_NAME }}:latest + labels: ${{ steps.meta.outputs.labels }} diff --git a/Dockerfile.metal b/Dockerfile.metal new file mode 100644 index 0000000..1257b29 --- /dev/null +++ b/Dockerfile.metal @@ -0,0 +1,11 @@ +# syntax=docker/dockerfile:1 + +FROM python:3.11-slim +WORKDIR /app +COPY requirements.txt requirements.txt +RUN pip3 install -r requirements.txt +# https://github.com/marella/ctransformers#metal +RUN CT_METAL=1 pip3 install ctransformers --no-binary ctransformers +COPY . . +EXPOSE 8000 +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/README.md b/README.md index 32f23b7..876c20d 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,18 @@ If you see `CUDA driver version is insufficient for CUDA runtime version` when m Upgrade the driver manually on the node (See [here](https://github.com/awslabs/amazon-eks-ami/issues/1060) if you are using CUDA11 + AMI). Or try different version of CUDA. +### Metal + +To enable Metal support, use the image `ialacol-metal` built for metal. + +- `deployment.image` = `ghcr.io/chenhunghan/ialacol-metal:latest` + +For example + +```sh +helm install llama2-7b-chat-metal ialacol/ialacol -f examples/values/llama2-7b-chat-metal.yaml.yaml +``` + ## Tips ### Creative v.s. Conservative diff --git a/charts/ialacol/Chart.yaml b/charts/ialacol/Chart.yaml index 0350174..316ae40 100644 --- a/charts/ialacol/Chart.yaml +++ b/charts/ialacol/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 -appVersion: 0.7.2 +appVersion: 0.8.0 description: A Helm chart for ialacol name: ialacol type: application -version: 0.7.3 +version: 0.8.0 diff --git a/examples/values/llama2-7b-chat-metal.yaml b/examples/values/llama2-7b-chat-metal.yaml new file mode 100644 index 0000000..82c0854 --- /dev/null +++ b/examples/values/llama2-7b-chat-metal.yaml @@ -0,0 +1,29 @@ +replicas: 1 +deployment: + image: ghcr.io/chenhunghan/ialacol-metal:latest + env: + DEFAULT_MODEL_HG_REPO_ID: TheBloke/Llama-2-7B-Chat-GGML + DEFAULT_MODEL_FILE: llama-2-7b-chat.ggmlv3.q4_0.bin +resources: + {} +cache: + persistence: + size: 5Gi + accessModes: + - ReadWriteOnce + storageClassName: ~ +cacheMountPath: /app/cache +model: + persistence: + size: 5Gi + accessModes: + - ReadWriteOnce + storageClassName: ~ +modelMountPath: /app/models +service: + type: ClusterIP + port: 8000 + annotations: {} +nodeSelector: {} +tolerations: [] +affinity: {} -- GitLab