diff --git a/.github/workflows/metal_image.yaml b/.github/workflows/metal_image.yaml new file mode 100644 index 0000000000000000000000000000000000000000..81a1cf4287840a7dc004bb169394f17b5b940e45 --- /dev/null +++ b/.github/workflows/metal_image.yaml @@ -0,0 +1,46 @@ +name: Build and Push Metal Image to Github Container Registry + +on: + push: + branches: + - main + paths: + - '**.py' + - 'requirements.txt' + - 'Dockerfile.metal' + - '.github/workflows/metal_image.yaml' + +env: + REGISTRY: ghcr.io + METAL_IMAGE_NAME: ialacol-metal +jobs: + metal_image_to_gcr: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + - uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v4 + with: + images: ${{ env.REGISTRY }}/${{ env.METAL_IMAGE_NAME }} + - name: Build and push Docker image + uses: docker/build-push-action@v4 + with: + context: . + file: ./Dockerfile.metal + push: true + tags: | + ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.METAL_IMAGE_NAME }}:${{ github.sha }} + ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.METAL_IMAGE_NAME }}:latest + labels: ${{ steps.meta.outputs.labels }} diff --git a/Dockerfile.metal b/Dockerfile.metal new file mode 100644 index 0000000000000000000000000000000000000000..1257b2992470b0377743d3ccba1419f245a1f6e9 --- /dev/null +++ b/Dockerfile.metal @@ -0,0 +1,11 @@ +# syntax=docker/dockerfile:1 + +FROM python:3.11-slim +WORKDIR /app +COPY requirements.txt requirements.txt +RUN pip3 install -r requirements.txt +# https://github.com/marella/ctransformers#metal +RUN CT_METAL=1 pip3 install ctransformers --no-binary ctransformers +COPY . . +EXPOSE 8000 +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/README.md b/README.md index 32f23b7c89919f9436b323e7cbdf2f5401073015..876c20d6fcc4a53a596c6024ff778553988fd329 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,18 @@ If you see `CUDA driver version is insufficient for CUDA runtime version` when m Upgrade the driver manually on the node (See [here](https://github.com/awslabs/amazon-eks-ami/issues/1060) if you are using CUDA11 + AMI). Or try different version of CUDA. +### Metal + +To enable Metal support, use the image `ialacol-metal` built for metal. + +- `deployment.image` = `ghcr.io/chenhunghan/ialacol-metal:latest` + +For example + +```sh +helm install llama2-7b-chat-metal ialacol/ialacol -f examples/values/llama2-7b-chat-metal.yaml.yaml +``` + ## Tips ### Creative v.s. Conservative diff --git a/charts/ialacol/Chart.yaml b/charts/ialacol/Chart.yaml index 0350174a13f458d4f01a0f1701122af8bd638493..316ae40c241b4fc1503a9878973eaaa2fd01122e 100644 --- a/charts/ialacol/Chart.yaml +++ b/charts/ialacol/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 -appVersion: 0.7.2 +appVersion: 0.8.0 description: A Helm chart for ialacol name: ialacol type: application -version: 0.7.3 +version: 0.8.0 diff --git a/examples/values/llama2-7b-chat-metal.yaml b/examples/values/llama2-7b-chat-metal.yaml new file mode 100644 index 0000000000000000000000000000000000000000..82c08547ab0b3413ed3c066e715fdce046d771c4 --- /dev/null +++ b/examples/values/llama2-7b-chat-metal.yaml @@ -0,0 +1,29 @@ +replicas: 1 +deployment: + image: ghcr.io/chenhunghan/ialacol-metal:latest + env: + DEFAULT_MODEL_HG_REPO_ID: TheBloke/Llama-2-7B-Chat-GGML + DEFAULT_MODEL_FILE: llama-2-7b-chat.ggmlv3.q4_0.bin +resources: + {} +cache: + persistence: + size: 5Gi + accessModes: + - ReadWriteOnce + storageClassName: ~ +cacheMountPath: /app/cache +model: + persistence: + size: 5Gi + accessModes: + - ReadWriteOnce + storageClassName: ~ +modelMountPath: /app/models +service: + type: ClusterIP + port: 8000 + annotations: {} +nodeSelector: {} +tolerations: [] +affinity: {}