1
0
mirror of https://github.com/moby/buildkit.git synced 2025-04-18 18:04:03 +03:00

contrib: check if nvidia drivers are already installed

Signed-off-by: CrazyMax <1951866+crazy-max@users.noreply.github.com>
This commit is contained in:
CrazyMax 2025-03-27 15:28:11 +01:00
parent 7f1278d3e8
commit f85a66c6a5
No known key found for this signature in database
GPG Key ID: ADE44D8C9D44FBE4

View File

@ -25,10 +25,13 @@ import (
// This is example of experimental on-demand setup of a CDI devices.
// This code is not currently shipping with BuildKit and will probably change.
const (
cdiKind = "nvidia.com/gpu"
defaultVersion = "570.0"
)
const cdiKind = "nvidia.com/gpu"
// https://github.com/ollama/ollama/blob/b816ff86c923e0290f58f2275e831fc17c29ba37/discover/gpu_linux.go#L33-L43
var libcudaGlobs = []string{
"/usr/lib/*-linux-gnu/libcuda.so*",
"/usr/lib/wsl/drivers/*/libcuda.so*",
}
func init() {
cdidevices.Register(cdiKind, &setup{})
@ -92,51 +95,32 @@ func (s *setup) Run(ctx context.Context) (err error) {
return errors.Errorf("NVIDIA setup is currently only supported on Debian/Ubuntu")
}
var needsDriver bool
if nvidiaSmi, err := exec.LookPath("nvidia-smi"); err == nil && nvidiaSmi != "" {
if err := run(ctx, []string{nvidiaSmi, "-L"}, pw, dgst); err != nil {
needsDriver = true
needsDriver := true
if _, err := os.Stat("/proc/driver/nvidia"); err == nil {
needsDriver = false
} else if nvidiaSmi, err := exec.LookPath("nvidia-smi"); err == nil && nvidiaSmi != "" {
if err := run(ctx, []string{nvidiaSmi, "-L"}, pw, dgst); err == nil {
needsDriver = false
}
} else if _, err := os.Stat("/proc/driver/nvidia"); err != nil {
needsDriver = true
}
var arch string
switch runtime.GOARCH {
case "amd64":
arch = "x86_64"
case "arm64":
arch = "sbsa"
// for non-sbsa could use https://nvidia.github.io/libnvidia-container/stable/deb
}
if arch == "" {
return errors.Errorf("unsupported architecture: %s", runtime.GOARCH)
}
if needsDriver {
pw.Write(identity.NewID(), client.VertexWarning{
Vertex: dgst,
Short: []byte("NVIDIA Drivers not found. Installing prebuilt drivers is not recommended"),
})
if hasWSLGPU() {
return errors.Errorf("NVIDIA drivers are required for WSL with non PCI-based GPUs")
}
return errors.Errorf("NVIDIA drivers are required. Try loading NVIDIA kernel module with \"modprobe nvidia\" command")
}
var dv string
if !hasWSLGPU() {
if !hasLibsInstalled() && !hasWSLGPU() {
version, err := readVersion()
if err != nil && !needsDriver {
if err != nil {
return errors.Wrapf(err, "failed to read NVIDIA driver version")
}
if version == "" {
version = defaultVersion
}
var ok bool
dv, _, ok = strings.Cut(version, ".")
if !ok {
return errors.Errorf("failed to parse NVIDIA driver version %q", version)
}
} else if needsDriver {
return errors.Errorf("NVIDIA drivers are required for WSL with non PCI-based GPUs")
}
if err := run(ctx, []string{"apt-get", "update"}, pw, dgst); err != nil {
@ -147,67 +131,7 @@ func (s *setup) Run(ctx context.Context) (err error) {
return err
}
const aptDistro = "ubuntu2404"
aptURL := "https://developer.download.nvidia.com/compute/cuda/repos/" + aptDistro + "/" + arch + "/"
keyTarget := "/usr/share/keyrings/nvidia-cuda-keyring.gpg"
if _, err := os.Stat(keyTarget); err != nil {
fmt.Fprintf(newStream(pw, 2, dgst), "Downloading NVIDIA GPG key\n")
req, err := http.NewRequestWithContext(ctx, http.MethodGet, aptURL+"3bf863cc.pub", nil)
if err != nil {
return errors.Wrapf(err, "failed to create request for NVIDIA GPG key")
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return errors.Wrapf(err, "failed to download NVIDIA GPG key")
}
cmd := exec.CommandContext(ctx, "gpg", "--dearmor", "-o", keyTarget)
cmd.Stdin = resp.Body
cmd.Stderr = newStream(pw, 2, dgst)
if err := cmd.Run(); err != nil {
return errors.Wrapf(err, "failed to install NVIDIA GPG key")
}
resp.Body.Close()
}
if err := os.WriteFile("/etc/apt/sources.list.d/nvidia-cuda.list", []byte("deb [signed-by="+keyTarget+"] "+aptURL+" /"), 0644); err != nil {
return errors.Wrapf(err, "failed to add NVIDIA apt repo")
}
if err := run(ctx, []string{"apt-get", "update"}, pw, dgst); err != nil {
return err
}
if needsDriver && dv != "" {
// this pretty much never works, is it even worth having?
// better approach could be to try to create another chroot/container that is built with same kernel packages as the host
// could nvidia-headless-no-dkms- be reusable
if err := run(ctx, []string{"apt-get", "install", "-y", "nvidia-driver-" + dv}, pw, dgst); err != nil {
return err
}
_, err := os.Stat("/proc/driver/nvidia")
if err != nil {
return errors.Wrapf(err, "failed to install NVIDIA kernel module. Please install NVIDIA drivers manually")
}
}
pkgs := []string{
"nvidia-container-toolkit-base",
}
if dv != "" {
pkgs = append(pkgs, []string{
"libnvidia-compute-" + dv,
"libnvidia-extra-" + dv,
"libnvidia-gl-" + dv,
"nvidia-utils-" + dv,
}...)
}
if err := run(ctx, append([]string{"apt-get", "install", "-y", "--no-install-recommends"}, pkgs...), pw, dgst); err != nil {
if err := installPackages(ctx, dv, pw, dgst); err != nil {
return err
}
@ -243,6 +167,67 @@ func run(ctx context.Context, args []string, pw progress.Writer, dgst digest.Dig
return cmd.Run()
}
func installPackages(ctx context.Context, dv string, pw progress.Writer, dgst digest.Digest) error {
const aptDistro = "ubuntu2404"
var arch string
switch runtime.GOARCH {
case "amd64":
arch = "x86_64"
case "arm64":
arch = "sbsa"
// for non-sbsa could use https://nvidia.github.io/libnvidia-container/stable/deb
}
if arch == "" {
return errors.Errorf("unsupported architecture: %s", runtime.GOARCH)
}
aptURL := "https://developer.download.nvidia.com/compute/cuda/repos/" + aptDistro + "/" + arch + "/"
keyTarget := "/usr/share/keyrings/nvidia-cuda-keyring.gpg"
if _, err := os.Stat(keyTarget); err != nil {
fmt.Fprintf(newStream(pw, 2, dgst), "Downloading NVIDIA GPG key\n")
req, err := http.NewRequestWithContext(ctx, http.MethodGet, aptURL+"3bf863cc.pub", nil)
if err != nil {
return errors.Wrapf(err, "failed to create request for NVIDIA GPG key")
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return errors.Wrapf(err, "failed to download NVIDIA GPG key")
}
cmd := exec.CommandContext(ctx, "gpg", "--dearmor", "-o", keyTarget)
cmd.Stdin = resp.Body
cmd.Stderr = newStream(pw, 2, dgst)
if err := cmd.Run(); err != nil {
return errors.Wrapf(err, "failed to install NVIDIA GPG key")
}
resp.Body.Close()
}
if err := os.WriteFile("/etc/apt/sources.list.d/nvidia-cuda.list", []byte("deb [signed-by="+keyTarget+"] "+aptURL+" /"), 0644); err != nil {
return errors.Wrapf(err, "failed to add NVIDIA apt repo")
}
if err := run(ctx, []string{"apt-get", "update"}, pw, dgst); err != nil {
return err
}
pkgs := []string{"nvidia-container-toolkit-base"}
if dv != "" {
pkgs = append(pkgs, []string{
"libnvidia-compute-" + dv,
"libnvidia-extra-" + dv,
"libnvidia-gl-" + dv,
"nvidia-utils-" + dv,
}...)
}
return run(ctx, append([]string{"apt-get", "install", "-y", "--no-install-recommends"}, pkgs...), pw, dgst)
}
func readVersion() (string, error) {
dt, err := os.ReadFile("/proc/driver/nvidia/version")
if err != nil {
@ -326,3 +311,13 @@ func hasWSLGPU() bool {
_, err := os.Stat("/dev/dxg")
return err == nil
}
func hasLibsInstalled() bool {
// Check for libcuda in the standard locations to confirm NVIDIA GPU drivers
for _, p := range libcudaGlobs {
if matches, err := filepath.Glob(p); err == nil && len(matches) > 0 {
return true
}
}
return false
}