1
0
mirror of https://github.com/vladmandic/sdnext.git synced 2026-01-29 05:02:09 +03:00
Files
sdnext/modules/api/nvml.py
Vladimir Mandic ceaf0238fa update nvml
Signed-off-by: Vladimir Mandic <mandic00@live.com>
2025-02-01 10:36:12 -05:00

95 lines
3.7 KiB
Python

try:
from installer import install, log
except Exception:
def install(*args, **kwargs): # pylint: disable=unused-argument
pass
import logging
log = logging.getLogger(__name__)
nvml_initialized = False
def get_reason(val):
throttle = {
1: 'gpu idle',
2: 'applications clocks setting',
4: 'sw power cap',
8: 'hw slowdown',
16: 'sync boost',
32: 'sw thermal slowdown',
64: 'hw thermal slowdown',
128: 'hw power brake slowdown',
256: 'display clock setting',
}
reason = ', '.join([throttle[i] for i in throttle if i & val])
return reason if len(reason) > 0 else 'ok'
def get_nvml():
global nvml_initialized # pylint: disable=global-statement
try:
if not nvml_initialized:
install('pynvml', quiet=True)
import pynvml # pylint: disable=redefined-outer-name
pynvml.nvmlInit()
log.debug('NVML initialized')
nvml_initialized = True
else:
import pynvml
devices = []
for i in range(pynvml.nvmlDeviceGetCount()):
dev = pynvml.nvmlDeviceGetHandleByIndex(i)
try:
name = pynvml.nvmlDeviceGetName(dev)
except Exception:
name = ''
device = {
'name': name,
'version': {
'cuda': pynvml.nvmlSystemGetCudaDriverVersion(),
'driver': pynvml.nvmlSystemGetDriverVersion(),
'vbios': pynvml.nvmlDeviceGetVbiosVersion(dev),
'rom': pynvml.nvmlDeviceGetInforomImageVersion(dev),
'capabilities': pynvml.nvmlDeviceGetCudaComputeCapability(dev),
},
'pci': {
'link': pynvml.nvmlDeviceGetCurrPcieLinkGeneration(dev),
'width': pynvml.nvmlDeviceGetCurrPcieLinkWidth(dev),
'busid': pynvml.nvmlDeviceGetPciInfo(dev).busId,
'deviceid': pynvml.nvmlDeviceGetPciInfo(dev).pciDeviceId,
},
'memory': {
'total': round(pynvml.nvmlDeviceGetMemoryInfo(dev).total/1024/1024, 2),
'free': round(pynvml.nvmlDeviceGetMemoryInfo(dev).free/1024/1024,2),
'used': round(pynvml.nvmlDeviceGetMemoryInfo(dev).used/1024/1024,2),
},
'clock': { # gpu, sm, memory
'gpu': [pynvml.nvmlDeviceGetClockInfo(dev, 0), pynvml.nvmlDeviceGetMaxClockInfo(dev, 0)],
'sm': [pynvml.nvmlDeviceGetClockInfo(dev, 1), pynvml.nvmlDeviceGetMaxClockInfo(dev, 1)],
'memory': [pynvml.nvmlDeviceGetClockInfo(dev, 2), pynvml.nvmlDeviceGetMaxClockInfo(dev, 2)],
},
'load': {
'gpu': round(pynvml.nvmlDeviceGetUtilizationRates(dev).gpu),
'memory': round(pynvml.nvmlDeviceGetUtilizationRates(dev).memory),
'temp': pynvml.nvmlDeviceGetTemperature(dev, 0),
'fan': pynvml.nvmlDeviceGetFanSpeed(dev),
},
'power': [round(pynvml.nvmlDeviceGetPowerUsage(dev)/1000, 2), round(pynvml.nvmlDeviceGetEnforcedPowerLimit(dev)/1000, 2)],
'state': get_reason(pynvml.nvmlDeviceGetCurrentClocksThrottleReasons(dev)),
}
devices.append(device)
# log.debug(f'nmvl: {devices}')
return devices
except Exception as e:
log.error(f'NVML: {e}')
return []
if __name__ == '__main__':
nvml_initialized = True
import pynvml # pylint: disable=redefined-outer-name
pynvml.nvmlInit()
from rich import print as rprint
for gpu in get_nvml():
rprint(gpu)