From f76e8981fc28da26850982bb1b3a16fd94919a7f Mon Sep 17 00:00:00 2001 From: Henry Chen <1474479+chenhunghan@users.noreply.github.com> Date: Tue, 8 Aug 2023 21:51:38 +0300 Subject: [PATCH] Fix the cuda 12 base image (#46) Signed-off-by: Hung-Han (Henry) Chen <chenhungh@gmail.com> --- Dockerfile.cuda12 | 2 +- README.md | 6 ++++++ charts/ialacol/Chart.yaml | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/Dockerfile.cuda12 b/Dockerfile.cuda12 index dee0612..3fcf01f 100644 --- a/Dockerfile.cuda12 +++ b/Dockerfile.cuda12 @@ -1,6 +1,6 @@ # syntax=docker/dockerfile:1 -FROM nvidia/cuda:12.2.0-base-ubuntu20.04 +FROM nvidia/cuda:12.2.0-base-ubuntu22.04 RUN apt-get update && apt-get install -y -q python3 python3-pip WORKDIR /app COPY requirements.txt requirements.txt diff --git a/README.md b/README.md index d43acff..32f23b7 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,12 @@ helm install llama2-7b-chat-cuda11 ialacol/ialacol -f examples/values/llama2-7b- Deploys llama2 7b model with 40 layers offloadind to GPU. The inference is accelerated by CUDA 11. +### CUDA Driver Issues + +If you see `CUDA driver version is insufficient for CUDA runtime version` when making the request, you are likely using a Nvidia Driver that is not [compatible with the CUDA version](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html). + +Upgrade the driver manually on the node (See [here](https://github.com/awslabs/amazon-eks-ami/issues/1060) if you are using CUDA11 + AMI). Or try different version of CUDA. + ## Tips ### Creative v.s. Conservative diff --git a/charts/ialacol/Chart.yaml b/charts/ialacol/Chart.yaml index e5a02a2..1d2d517 100644 --- a/charts/ialacol/Chart.yaml +++ b/charts/ialacol/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 -appVersion: 0.7.0 +appVersion: 0.7.1 description: A Helm chart for ialacol name: ialacol type: application -version: 0.7.1 +version: 0.7.2 -- GitLab