diff --git a/Dockerfile.cuda12 b/Dockerfile.cuda12 index dee0612f9e13daa213563e9e63daf329aa28f422..3fcf01f3b3c9e8a79feb4b8f9eaadcc8840ec592 100644 --- a/Dockerfile.cuda12 +++ b/Dockerfile.cuda12 @@ -1,6 +1,6 @@ # syntax=docker/dockerfile:1 -FROM nvidia/cuda:12.2.0-base-ubuntu20.04 +FROM nvidia/cuda:12.2.0-base-ubuntu22.04 RUN apt-get update && apt-get install -y -q python3 python3-pip WORKDIR /app COPY requirements.txt requirements.txt diff --git a/README.md b/README.md index d43acff7ce181be0df7ee6db2acd3631585ae20e..32f23b7c89919f9436b323e7cbdf2f5401073015 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,12 @@ helm install llama2-7b-chat-cuda11 ialacol/ialacol -f examples/values/llama2-7b- Deploys llama2 7b model with 40 layers offloadind to GPU. The inference is accelerated by CUDA 11. +### CUDA Driver Issues + +If you see `CUDA driver version is insufficient for CUDA runtime version` when making the request, you are likely using a Nvidia Driver that is not [compatible with the CUDA version](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html). + +Upgrade the driver manually on the node (See [here](https://github.com/awslabs/amazon-eks-ami/issues/1060) if you are using CUDA11 + AMI). Or try different version of CUDA. + ## Tips ### Creative v.s. Conservative diff --git a/charts/ialacol/Chart.yaml b/charts/ialacol/Chart.yaml index e5a02a2766c2e93d436841e1d597015b8cbfda7a..1d2d517811bcf575cee38c7bfedf8aa163509588 100644 --- a/charts/ialacol/Chart.yaml +++ b/charts/ialacol/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 -appVersion: 0.7.0 +appVersion: 0.7.1 description: A Helm chart for ialacol name: ialacol type: application -version: 0.7.1 +version: 0.7.2