From e0f6516826fec2bb24bbb09a2419b2880127fe3f Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 11 Oct 2023 22:06:09 +0200 Subject: [PATCH] Generate updated code Signed-off-by: Evan Lezar --- pkg/nvml/init.go | 172 +------------------------- pkg/nvml/lib.go | 273 ++++++++++++++++++++++++++++++++++++++++++ pkg/nvml/types_gen.go | 2 +- 3 files changed, 277 insertions(+), 170 deletions(-) create mode 100644 pkg/nvml/lib.go diff --git a/pkg/nvml/init.go b/pkg/nvml/init.go index 1572f81..3d52c1b 100644 --- a/pkg/nvml/init.go +++ b/pkg/nvml/init.go @@ -16,43 +16,23 @@ package nvml import ( "fmt" - - "github.com/NVIDIA/go-nvml/pkg/dl" ) -import "C" - -const ( - nvmlLibraryName = "libnvidia-ml.so.1" - nvmlLibraryLoadFlags = dl.RTLD_LAZY | dl.RTLD_GLOBAL -) - -var nvml *dl.DynamicLibrary - // nvml.Init() func Init() Return { - lib := dl.New(nvmlLibraryName, nvmlLibraryLoadFlags) - err := lib.Open() - if err != nil { + if err := nvml.Open(); err != nil { return ERROR_LIBRARY_NOT_FOUND } - nvml = lib - updateVersionedSymbols() - return nvmlInit() } // nvml.InitWithFlags() func InitWithFlags(Flags uint32) Return { - lib := dl.New(nvmlLibraryName, nvmlLibraryLoadFlags) - err := lib.Open() - if err != nil { + if err := nvml.Open(); err != nil { return ERROR_LIBRARY_NOT_FOUND } - nvml = lib - return nvmlInitWithFlags(Flags) } @@ -65,154 +45,8 @@ func Shutdown() Return { err := nvml.Close() if err != nil { - panic(fmt.Sprintf("error closing %s: %v", nvmlLibraryName, err)) + panic(fmt.Sprintf("error closing %s: %v", nvml, err)) } return ret } - -// Default all versioned APIs to v1 (to infer the types) -var nvmlInit = nvmlInit_v1 -var nvmlDeviceGetPciInfo = nvmlDeviceGetPciInfo_v1 -var nvmlDeviceGetCount = nvmlDeviceGetCount_v1 -var nvmlDeviceGetHandleByIndex = nvmlDeviceGetHandleByIndex_v1 -var nvmlDeviceGetHandleByPciBusId = nvmlDeviceGetHandleByPciBusId_v1 -var nvmlDeviceGetNvLinkRemotePciInfo = nvmlDeviceGetNvLinkRemotePciInfo_v1 -var nvmlDeviceRemoveGpu = nvmlDeviceRemoveGpu_v1 -var nvmlDeviceGetGridLicensableFeatures = nvmlDeviceGetGridLicensableFeatures_v1 -var nvmlEventSetWait = nvmlEventSetWait_v1 -var nvmlDeviceGetAttributes = nvmlDeviceGetAttributes_v1 -var nvmlComputeInstanceGetInfo = nvmlComputeInstanceGetInfo_v1 -var DeviceGetComputeRunningProcesses = deviceGetComputeRunningProcesses_v1 -var DeviceGetGraphicsRunningProcesses = deviceGetGraphicsRunningProcesses_v1 -var DeviceGetMPSComputeRunningProcesses = deviceGetMPSComputeRunningProcesses_v1 -var GetBlacklistDeviceCount = GetExcludedDeviceCount -var GetBlacklistDeviceInfoByIndex = GetExcludedDeviceInfoByIndex -var nvmlDeviceGetGpuInstancePossiblePlacements = nvmlDeviceGetGpuInstancePossiblePlacements_v1 -var nvmlVgpuInstanceGetLicenseInfo = nvmlVgpuInstanceGetLicenseInfo_v1 - -type BlacklistDeviceInfo = ExcludedDeviceInfo -type ProcessInfo_v1Slice []ProcessInfo_v1 -type ProcessInfo_v2Slice []ProcessInfo_v2 - -func (pis ProcessInfo_v1Slice) ToProcessInfoSlice() []ProcessInfo { - var newInfos []ProcessInfo - for _, pi := range pis { - info := ProcessInfo{ - Pid: pi.Pid, - UsedGpuMemory: pi.UsedGpuMemory, - GpuInstanceId: 0xFFFFFFFF, // GPU instance ID is invalid in v1 - ComputeInstanceId: 0xFFFFFFFF, // Compute instance ID is invalid in v1 - } - newInfos = append(newInfos, info) - } - return newInfos -} - -func (pis ProcessInfo_v2Slice) ToProcessInfoSlice() []ProcessInfo { - var newInfos []ProcessInfo - for _, pi := range pis { - info := ProcessInfo{ - Pid: pi.Pid, - UsedGpuMemory: pi.UsedGpuMemory, - GpuInstanceId: pi.GpuInstanceId, - ComputeInstanceId: pi.ComputeInstanceId, - } - newInfos = append(newInfos, info) - } - return newInfos -} - -// updateVersionedSymbols() -func updateVersionedSymbols() { - err := nvml.Lookup("nvmlInit_v2") - if err == nil { - nvmlInit = nvmlInit_v2 - } - err = nvml.Lookup("nvmlDeviceGetPciInfo_v2") - if err == nil { - nvmlDeviceGetPciInfo = nvmlDeviceGetPciInfo_v2 - } - err = nvml.Lookup("nvmlDeviceGetPciInfo_v3") - if err == nil { - nvmlDeviceGetPciInfo = nvmlDeviceGetPciInfo_v3 - } - err = nvml.Lookup("nvmlDeviceGetCount_v2") - if err == nil { - nvmlDeviceGetCount = nvmlDeviceGetCount_v2 - } - err = nvml.Lookup("nvmlDeviceGetHandleByIndex_v2") - if err == nil { - nvmlDeviceGetHandleByIndex = nvmlDeviceGetHandleByIndex_v2 - } - err = nvml.Lookup("nvmlDeviceGetHandleByPciBusId_v2") - if err == nil { - nvmlDeviceGetHandleByPciBusId = nvmlDeviceGetHandleByPciBusId_v2 - } - err = nvml.Lookup("nvmlDeviceGetNvLinkRemotePciInfo_v2") - if err == nil { - nvmlDeviceGetNvLinkRemotePciInfo = nvmlDeviceGetNvLinkRemotePciInfo_v2 - } - // Unable to overwrite nvmlDeviceRemoveGpu() because the v2 function takes - // a different set of parameters than the v1 function. - //err = nvml.Lookup("nvmlDeviceRemoveGpu_v2") - //if err == nil { - // nvmlDeviceRemoveGpu = nvmlDeviceRemoveGpu_v2 - //} - err = nvml.Lookup("nvmlDeviceGetGridLicensableFeatures_v2") - if err == nil { - nvmlDeviceGetGridLicensableFeatures = nvmlDeviceGetGridLicensableFeatures_v2 - } - err = nvml.Lookup("nvmlDeviceGetGridLicensableFeatures_v3") - if err == nil { - nvmlDeviceGetGridLicensableFeatures = nvmlDeviceGetGridLicensableFeatures_v3 - } - err = nvml.Lookup("nvmlDeviceGetGridLicensableFeatures_v4") - if err == nil { - nvmlDeviceGetGridLicensableFeatures = nvmlDeviceGetGridLicensableFeatures_v4 - } - err = nvml.Lookup("nvmlEventSetWait_v2") - if err == nil { - nvmlEventSetWait = nvmlEventSetWait_v2 - } - err = nvml.Lookup("nvmlDeviceGetAttributes_v2") - if err == nil { - nvmlDeviceGetAttributes = nvmlDeviceGetAttributes_v2 - } - err = nvml.Lookup("nvmlComputeInstanceGetInfo_v2") - if err == nil { - nvmlComputeInstanceGetInfo = nvmlComputeInstanceGetInfo_v2 - } - err = nvml.Lookup("nvmlDeviceGetComputeRunningProcesses_v2") - if err == nil { - DeviceGetComputeRunningProcesses = deviceGetComputeRunningProcesses_v2 - } - err = nvml.Lookup("nvmlDeviceGetComputeRunningProcesses_v3") - if err == nil { - DeviceGetComputeRunningProcesses = deviceGetComputeRunningProcesses_v3 - } - err = nvml.Lookup("nvmlDeviceGetGraphicsRunningProcesses_v2") - if err == nil { - DeviceGetGraphicsRunningProcesses = deviceGetGraphicsRunningProcesses_v2 - } - err = nvml.Lookup("nvmlDeviceGetGraphicsRunningProcesses_v3") - if err == nil { - DeviceGetGraphicsRunningProcesses = deviceGetGraphicsRunningProcesses_v3 - } - err = nvml.Lookup("nvmlDeviceGetMPSComputeRunningProcesses_v2") - if err == nil { - DeviceGetMPSComputeRunningProcesses = deviceGetMPSComputeRunningProcesses_v2 - } - err = nvml.Lookup("nvmlDeviceGetMPSComputeRunningProcesses_v3") - if err == nil { - DeviceGetMPSComputeRunningProcesses = deviceGetMPSComputeRunningProcesses_v3 - } - err = nvml.Lookup("nvmlDeviceGetGpuInstancePossiblePlacements_v2") - if err == nil { - nvmlDeviceGetGpuInstancePossiblePlacements = nvmlDeviceGetGpuInstancePossiblePlacements_v2 - } - err = nvml.Lookup("nvmlVgpuInstanceGetLicenseInfo_v2") - if err == nil { - nvmlVgpuInstanceGetLicenseInfo = nvmlVgpuInstanceGetLicenseInfo_v2 - } -} diff --git a/pkg/nvml/lib.go b/pkg/nvml/lib.go new file mode 100644 index 0000000..4b9ec24 --- /dev/null +++ b/pkg/nvml/lib.go @@ -0,0 +1,273 @@ +/** +# Copyright 2023 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package nvml + +import ( + "fmt" + "sync" + + "github.com/NVIDIA/go-nvml/pkg/dl" +) + +import "C" + +const ( + defaultNvmlLibraryName = "libnvidia-ml.so.1" + defaultNvmlLibraryLoadFlags = dl.RTLD_LAZY | dl.RTLD_GLOBAL +) + +type nvmlLib struct { + dl.Config + mu sync.Mutex + lib *dl.DynamicLibrary +} + +// nvml represents the global nvml library. +var nvml = nvmlLib{ + Config: dl.Config{ + Name: defaultNvmlLibraryName, + Flags: defaultNvmlLibraryLoadFlags, + }, +} + +// Open ensures that the nvml library is initialized. +// If it is not initialized, the library is opened using dlopen and the versioned +// symbols are updated. +func (l *nvmlLib) Open() error { + l.mu.Lock() + defer l.mu.Unlock() + + if l.lib != nil { + return nil + } + + lib := dl.New(l.Config.Name, l.Config.Flags) + err := lib.Open() + if err != nil { + return fmt.Errorf("error opening %s: %v", l.Config.Name, err) + } + l.lib = lib + + l.updateVersionedSymbols() + + return nil +} + +// Close uninitialized the nvml library if required. +// This closes the underlying library and ensures that the global pointer to the +// library is set to nil to ensure that subsequent calls to init will reinitialize it. +func (l *nvmlLib) Close() error { + l.mu.Lock() + defer l.mu.Unlock() + + if l.lib == nil { + return nil + } + + err := l.lib.Close() + if err != nil { + return fmt.Errorf("error closing %s: %v", l.Config.Name, err) + } + l.lib = nil + return nil +} + +// Lookup delegates to the underlying library if it is inialized. +// If the library is uninitilized, an error is returned. +func (l *nvmlLib) Lookup(name string) error { + if l.lib == nil { + return fmt.Errorf("error looking up %s: library not initialized", name) + } + return l.Lookup(name) +} + +// Default all versioned APIs to v1 (to infer the types) +var nvmlInit = nvmlInit_v1 +var nvmlDeviceGetPciInfo = nvmlDeviceGetPciInfo_v1 +var nvmlDeviceGetCount = nvmlDeviceGetCount_v1 +var nvmlDeviceGetHandleByIndex = nvmlDeviceGetHandleByIndex_v1 +var nvmlDeviceGetHandleByPciBusId = nvmlDeviceGetHandleByPciBusId_v1 +var nvmlDeviceGetNvLinkRemotePciInfo = nvmlDeviceGetNvLinkRemotePciInfo_v1 +var nvmlDeviceRemoveGpu = nvmlDeviceRemoveGpu_v1 +var nvmlDeviceGetGridLicensableFeatures = nvmlDeviceGetGridLicensableFeatures_v1 +var nvmlEventSetWait = nvmlEventSetWait_v1 +var nvmlDeviceGetAttributes = nvmlDeviceGetAttributes_v1 +var nvmlComputeInstanceGetInfo = nvmlComputeInstanceGetInfo_v1 +var DeviceGetComputeRunningProcesses = deviceGetComputeRunningProcesses_v1 +var DeviceGetGraphicsRunningProcesses = deviceGetGraphicsRunningProcesses_v1 +var DeviceGetMPSComputeRunningProcesses = deviceGetMPSComputeRunningProcesses_v1 +var GetBlacklistDeviceCount = GetExcludedDeviceCount +var GetBlacklistDeviceInfoByIndex = GetExcludedDeviceInfoByIndex +var nvmlDeviceGetGpuInstancePossiblePlacements = nvmlDeviceGetGpuInstancePossiblePlacements_v1 +var nvmlVgpuInstanceGetLicenseInfo = nvmlVgpuInstanceGetLicenseInfo_v1 + +// BlacklistDeviceInfo was replaced by ExcludedDeviceInfo +type BlacklistDeviceInfo = ExcludedDeviceInfo + +type ProcessInfo_v1Slice []ProcessInfo_v1 +type ProcessInfo_v2Slice []ProcessInfo_v2 + +func (pis ProcessInfo_v1Slice) ToProcessInfoSlice() []ProcessInfo { + var newInfos []ProcessInfo + for _, pi := range pis { + info := ProcessInfo{ + Pid: pi.Pid, + UsedGpuMemory: pi.UsedGpuMemory, + GpuInstanceId: 0xFFFFFFFF, // GPU instance ID is invalid in v1 + ComputeInstanceId: 0xFFFFFFFF, // Compute instance ID is invalid in v1 + } + newInfos = append(newInfos, info) + } + return newInfos +} + +func (pis ProcessInfo_v2Slice) ToProcessInfoSlice() []ProcessInfo { + var newInfos []ProcessInfo + for _, pi := range pis { + info := ProcessInfo{ + Pid: pi.Pid, + UsedGpuMemory: pi.UsedGpuMemory, + GpuInstanceId: pi.GpuInstanceId, + ComputeInstanceId: pi.ComputeInstanceId, + } + newInfos = append(newInfos, info) + } + return newInfos +} + +// updateVersionedSymbols checks for versioned symbols in the loaded dynamic library. +// If newer versioned symbols exist, these replace the default `v1` symbols initialized above. +// When new versioned symbols are added, these would have to be initialized above and have +// corresponding checks and subsequent assignments added below. +func (l *nvmlLib) updateVersionedSymbols() { + err := l.Lookup("nvmlInit_v2") + if err == nil { + nvmlInit = nvmlInit_v2 + } + err = l.Lookup("nvmlDeviceGetPciInfo_v2") + if err == nil { + nvmlDeviceGetPciInfo = nvmlDeviceGetPciInfo_v2 + } + err = l.Lookup("nvmlDeviceGetPciInfo_v3") + if err == nil { + nvmlDeviceGetPciInfo = nvmlDeviceGetPciInfo_v3 + } + err = l.Lookup("nvmlDeviceGetCount_v2") + if err == nil { + nvmlDeviceGetCount = nvmlDeviceGetCount_v2 + } + err = l.Lookup("nvmlDeviceGetHandleByIndex_v2") + if err == nil { + nvmlDeviceGetHandleByIndex = nvmlDeviceGetHandleByIndex_v2 + } + err = l.Lookup("nvmlDeviceGetHandleByPciBusId_v2") + if err == nil { + nvmlDeviceGetHandleByPciBusId = nvmlDeviceGetHandleByPciBusId_v2 + } + err = l.Lookup("nvmlDeviceGetNvLinkRemotePciInfo_v2") + if err == nil { + nvmlDeviceGetNvLinkRemotePciInfo = nvmlDeviceGetNvLinkRemotePciInfo_v2 + } + // Unable to overwrite nvmlDeviceRemoveGpu() because the v2 function takes + // a different set of parameters than the v1 function. + //err = l.Lookup("nvmlDeviceRemoveGpu_v2") + //if err == nil { + // nvmlDeviceRemoveGpu = nvmlDeviceRemoveGpu_v2 + //} + err = l.Lookup("nvmlDeviceGetGridLicensableFeatures_v2") + if err == nil { + nvmlDeviceGetGridLicensableFeatures = nvmlDeviceGetGridLicensableFeatures_v2 + } + err = l.Lookup("nvmlDeviceGetGridLicensableFeatures_v3") + if err == nil { + nvmlDeviceGetGridLicensableFeatures = nvmlDeviceGetGridLicensableFeatures_v3 + } + err = l.Lookup("nvmlDeviceGetGridLicensableFeatures_v4") + if err == nil { + nvmlDeviceGetGridLicensableFeatures = nvmlDeviceGetGridLicensableFeatures_v4 + } + err = l.Lookup("nvmlEventSetWait_v2") + if err == nil { + nvmlEventSetWait = nvmlEventSetWait_v2 + } + err = l.Lookup("nvmlDeviceGetAttributes_v2") + if err == nil { + nvmlDeviceGetAttributes = nvmlDeviceGetAttributes_v2 + } + err = l.Lookup("nvmlComputeInstanceGetInfo_v2") + if err == nil { + nvmlComputeInstanceGetInfo = nvmlComputeInstanceGetInfo_v2 + } + err = l.Lookup("nvmlDeviceGetComputeRunningProcesses_v2") + if err == nil { + DeviceGetComputeRunningProcesses = deviceGetComputeRunningProcesses_v2 + } + err = l.Lookup("nvmlDeviceGetComputeRunningProcesses_v3") + if err == nil { + DeviceGetComputeRunningProcesses = deviceGetComputeRunningProcesses_v3 + } + err = l.Lookup("nvmlDeviceGetGraphicsRunningProcesses_v2") + if err == nil { + DeviceGetGraphicsRunningProcesses = deviceGetGraphicsRunningProcesses_v2 + } + err = l.Lookup("nvmlDeviceGetGraphicsRunningProcesses_v3") + if err == nil { + DeviceGetGraphicsRunningProcesses = deviceGetGraphicsRunningProcesses_v3 + } + err = l.Lookup("nvmlDeviceGetMPSComputeRunningProcesses_v2") + if err == nil { + DeviceGetMPSComputeRunningProcesses = deviceGetMPSComputeRunningProcesses_v2 + } + err = l.Lookup("nvmlDeviceGetMPSComputeRunningProcesses_v3") + if err == nil { + DeviceGetMPSComputeRunningProcesses = deviceGetMPSComputeRunningProcesses_v3 + } + err = l.Lookup("nvmlDeviceGetGpuInstancePossiblePlacements_v2") + if err == nil { + nvmlDeviceGetGpuInstancePossiblePlacements = nvmlDeviceGetGpuInstancePossiblePlacements_v2 + } + err = l.Lookup("nvmlVgpuInstanceGetLicenseInfo_v2") + if err == nil { + nvmlVgpuInstanceGetLicenseInfo = nvmlVgpuInstanceGetLicenseInfo_v2 + } +} + +// Option represents a functional option to configure the underlying NVML library +type Option func(*nvmlLib) + +// WithLibName provides an option to set the library name to be used by the NVML library. +func WithLibName(libName string) Option { + return func(l *nvmlLib) { + l.Config.Name = libName + } +} + +// SetLibOptions applies the specified options to the NVML library. +func SetLibOptions(opts ...Option) { + nvml.mu.Lock() + defer nvml.mu.Unlock() + for _, opt := range opts { + opt(&nvml) + } + + if nvml.Config.Name == "" { + nvml.Config.Name = defaultNvmlLibraryName + } + if nvml.Config.Flags == 0 { + nvml.Config.Flags = defaultNvmlLibraryLoadFlags + } +} diff --git a/pkg/nvml/types_gen.go b/pkg/nvml/types_gen.go index 396886d..6144bfe 100644 --- a/pkg/nvml/types_gen.go +++ b/pkg/nvml/types_gen.go @@ -1,5 +1,5 @@ // Code generated by cmd/cgo -godefs; DO NOT EDIT. -// cgo -godefs types.go +// cgo -godefs /Users/elezar/src/go-nvml/pkg/nvml/types.go package nvml