mirror of
https://github.com/moby/buildkit.git
synced 2025-04-18 18:04:03 +03:00
contrib: check if nvidia drivers are already installed
Signed-off-by: CrazyMax <1951866+crazy-max@users.noreply.github.com>
This commit is contained in:
parent
7f1278d3e8
commit
f85a66c6a5
@ -25,10 +25,13 @@ import (
|
||||
// This is example of experimental on-demand setup of a CDI devices.
|
||||
// This code is not currently shipping with BuildKit and will probably change.
|
||||
|
||||
const (
|
||||
cdiKind = "nvidia.com/gpu"
|
||||
defaultVersion = "570.0"
|
||||
)
|
||||
const cdiKind = "nvidia.com/gpu"
|
||||
|
||||
// https://github.com/ollama/ollama/blob/b816ff86c923e0290f58f2275e831fc17c29ba37/discover/gpu_linux.go#L33-L43
|
||||
var libcudaGlobs = []string{
|
||||
"/usr/lib/*-linux-gnu/libcuda.so*",
|
||||
"/usr/lib/wsl/drivers/*/libcuda.so*",
|
||||
}
|
||||
|
||||
func init() {
|
||||
cdidevices.Register(cdiKind, &setup{})
|
||||
@ -92,51 +95,32 @@ func (s *setup) Run(ctx context.Context) (err error) {
|
||||
return errors.Errorf("NVIDIA setup is currently only supported on Debian/Ubuntu")
|
||||
}
|
||||
|
||||
var needsDriver bool
|
||||
if nvidiaSmi, err := exec.LookPath("nvidia-smi"); err == nil && nvidiaSmi != "" {
|
||||
if err := run(ctx, []string{nvidiaSmi, "-L"}, pw, dgst); err != nil {
|
||||
needsDriver = true
|
||||
needsDriver := true
|
||||
if _, err := os.Stat("/proc/driver/nvidia"); err == nil {
|
||||
needsDriver = false
|
||||
} else if nvidiaSmi, err := exec.LookPath("nvidia-smi"); err == nil && nvidiaSmi != "" {
|
||||
if err := run(ctx, []string{nvidiaSmi, "-L"}, pw, dgst); err == nil {
|
||||
needsDriver = false
|
||||
}
|
||||
} else if _, err := os.Stat("/proc/driver/nvidia"); err != nil {
|
||||
needsDriver = true
|
||||
}
|
||||
|
||||
var arch string
|
||||
switch runtime.GOARCH {
|
||||
case "amd64":
|
||||
arch = "x86_64"
|
||||
case "arm64":
|
||||
arch = "sbsa"
|
||||
// for non-sbsa could use https://nvidia.github.io/libnvidia-container/stable/deb
|
||||
}
|
||||
|
||||
if arch == "" {
|
||||
return errors.Errorf("unsupported architecture: %s", runtime.GOARCH)
|
||||
}
|
||||
|
||||
if needsDriver {
|
||||
pw.Write(identity.NewID(), client.VertexWarning{
|
||||
Vertex: dgst,
|
||||
Short: []byte("NVIDIA Drivers not found. Installing prebuilt drivers is not recommended"),
|
||||
})
|
||||
if hasWSLGPU() {
|
||||
return errors.Errorf("NVIDIA drivers are required for WSL with non PCI-based GPUs")
|
||||
}
|
||||
return errors.Errorf("NVIDIA drivers are required. Try loading NVIDIA kernel module with \"modprobe nvidia\" command")
|
||||
}
|
||||
|
||||
var dv string
|
||||
if !hasWSLGPU() {
|
||||
if !hasLibsInstalled() && !hasWSLGPU() {
|
||||
version, err := readVersion()
|
||||
if err != nil && !needsDriver {
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to read NVIDIA driver version")
|
||||
}
|
||||
if version == "" {
|
||||
version = defaultVersion
|
||||
}
|
||||
var ok bool
|
||||
dv, _, ok = strings.Cut(version, ".")
|
||||
if !ok {
|
||||
return errors.Errorf("failed to parse NVIDIA driver version %q", version)
|
||||
}
|
||||
} else if needsDriver {
|
||||
return errors.Errorf("NVIDIA drivers are required for WSL with non PCI-based GPUs")
|
||||
}
|
||||
|
||||
if err := run(ctx, []string{"apt-get", "update"}, pw, dgst); err != nil {
|
||||
@ -147,67 +131,7 @@ func (s *setup) Run(ctx context.Context) (err error) {
|
||||
return err
|
||||
}
|
||||
|
||||
const aptDistro = "ubuntu2404"
|
||||
aptURL := "https://developer.download.nvidia.com/compute/cuda/repos/" + aptDistro + "/" + arch + "/"
|
||||
|
||||
keyTarget := "/usr/share/keyrings/nvidia-cuda-keyring.gpg"
|
||||
|
||||
if _, err := os.Stat(keyTarget); err != nil {
|
||||
fmt.Fprintf(newStream(pw, 2, dgst), "Downloading NVIDIA GPG key\n")
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, aptURL+"3bf863cc.pub", nil)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to create request for NVIDIA GPG key")
|
||||
}
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to download NVIDIA GPG key")
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, "gpg", "--dearmor", "-o", keyTarget)
|
||||
cmd.Stdin = resp.Body
|
||||
cmd.Stderr = newStream(pw, 2, dgst)
|
||||
if err := cmd.Run(); err != nil {
|
||||
return errors.Wrapf(err, "failed to install NVIDIA GPG key")
|
||||
}
|
||||
resp.Body.Close()
|
||||
}
|
||||
|
||||
if err := os.WriteFile("/etc/apt/sources.list.d/nvidia-cuda.list", []byte("deb [signed-by="+keyTarget+"] "+aptURL+" /"), 0644); err != nil {
|
||||
return errors.Wrapf(err, "failed to add NVIDIA apt repo")
|
||||
}
|
||||
|
||||
if err := run(ctx, []string{"apt-get", "update"}, pw, dgst); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if needsDriver && dv != "" {
|
||||
// this pretty much never works, is it even worth having?
|
||||
// better approach could be to try to create another chroot/container that is built with same kernel packages as the host
|
||||
// could nvidia-headless-no-dkms- be reusable
|
||||
if err := run(ctx, []string{"apt-get", "install", "-y", "nvidia-driver-" + dv}, pw, dgst); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := os.Stat("/proc/driver/nvidia")
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to install NVIDIA kernel module. Please install NVIDIA drivers manually")
|
||||
}
|
||||
}
|
||||
|
||||
pkgs := []string{
|
||||
"nvidia-container-toolkit-base",
|
||||
}
|
||||
if dv != "" {
|
||||
pkgs = append(pkgs, []string{
|
||||
"libnvidia-compute-" + dv,
|
||||
"libnvidia-extra-" + dv,
|
||||
"libnvidia-gl-" + dv,
|
||||
"nvidia-utils-" + dv,
|
||||
}...)
|
||||
}
|
||||
|
||||
if err := run(ctx, append([]string{"apt-get", "install", "-y", "--no-install-recommends"}, pkgs...), pw, dgst); err != nil {
|
||||
if err := installPackages(ctx, dv, pw, dgst); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@ -243,6 +167,67 @@ func run(ctx context.Context, args []string, pw progress.Writer, dgst digest.Dig
|
||||
return cmd.Run()
|
||||
}
|
||||
|
||||
func installPackages(ctx context.Context, dv string, pw progress.Writer, dgst digest.Digest) error {
|
||||
const aptDistro = "ubuntu2404"
|
||||
|
||||
var arch string
|
||||
switch runtime.GOARCH {
|
||||
case "amd64":
|
||||
arch = "x86_64"
|
||||
case "arm64":
|
||||
arch = "sbsa"
|
||||
// for non-sbsa could use https://nvidia.github.io/libnvidia-container/stable/deb
|
||||
}
|
||||
if arch == "" {
|
||||
return errors.Errorf("unsupported architecture: %s", runtime.GOARCH)
|
||||
}
|
||||
|
||||
aptURL := "https://developer.download.nvidia.com/compute/cuda/repos/" + aptDistro + "/" + arch + "/"
|
||||
keyTarget := "/usr/share/keyrings/nvidia-cuda-keyring.gpg"
|
||||
|
||||
if _, err := os.Stat(keyTarget); err != nil {
|
||||
fmt.Fprintf(newStream(pw, 2, dgst), "Downloading NVIDIA GPG key\n")
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, aptURL+"3bf863cc.pub", nil)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to create request for NVIDIA GPG key")
|
||||
}
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to download NVIDIA GPG key")
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, "gpg", "--dearmor", "-o", keyTarget)
|
||||
cmd.Stdin = resp.Body
|
||||
cmd.Stderr = newStream(pw, 2, dgst)
|
||||
if err := cmd.Run(); err != nil {
|
||||
return errors.Wrapf(err, "failed to install NVIDIA GPG key")
|
||||
}
|
||||
resp.Body.Close()
|
||||
}
|
||||
|
||||
if err := os.WriteFile("/etc/apt/sources.list.d/nvidia-cuda.list", []byte("deb [signed-by="+keyTarget+"] "+aptURL+" /"), 0644); err != nil {
|
||||
return errors.Wrapf(err, "failed to add NVIDIA apt repo")
|
||||
}
|
||||
|
||||
if err := run(ctx, []string{"apt-get", "update"}, pw, dgst); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
pkgs := []string{"nvidia-container-toolkit-base"}
|
||||
if dv != "" {
|
||||
pkgs = append(pkgs, []string{
|
||||
"libnvidia-compute-" + dv,
|
||||
"libnvidia-extra-" + dv,
|
||||
"libnvidia-gl-" + dv,
|
||||
"nvidia-utils-" + dv,
|
||||
}...)
|
||||
}
|
||||
|
||||
return run(ctx, append([]string{"apt-get", "install", "-y", "--no-install-recommends"}, pkgs...), pw, dgst)
|
||||
}
|
||||
|
||||
func readVersion() (string, error) {
|
||||
dt, err := os.ReadFile("/proc/driver/nvidia/version")
|
||||
if err != nil {
|
||||
@ -326,3 +311,13 @@ func hasWSLGPU() bool {
|
||||
_, err := os.Stat("/dev/dxg")
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func hasLibsInstalled() bool {
|
||||
// Check for libcuda in the standard locations to confirm NVIDIA GPU drivers
|
||||
for _, p := range libcudaGlobs {
|
||||
if matches, err := filepath.Glob(p); err == nil && len(matches) > 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user