Skip to content

Commit 291a1bc

Browse files
committed
Add optional resource prefix support to "fake-mode"
Adding prefix (e.g. "fake_") to resource names has pros and cons. With different resource name, real GPU workloads do not end on faked devices, so one can easily run both real GPU plugin and workloads in the same cluster with the fake ones. However, GAS hard-codes resource names i.e. changing the name(s) with a prefix will break GPU plugin resource management, so one should use prefix only when fractional resources are not needed for faked devices. Signed-off-by: Eero Tamminen <eero.t.tamminen@intel.com>
1 parent c011712 commit 291a1bc

File tree

1 file changed

+20
-9
lines changed

1 file changed

+20
-9
lines changed

cmd/gpu_plugin/gpu_plugin.go

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ const (
5555

5656
type cliOptions struct {
5757
preferredAllocationPolicy string
58+
resourcePrefix string
5859
sharedDevNum int
5960
enableMonitoring bool
6061
resourceManagement bool
@@ -176,7 +177,7 @@ func newDevicePlugin(sysfsDir, devfsDir string, options cliOptions) *devicePlugi
176177
if options.resourceManagement {
177178
var err error
178179

179-
dp.resMan, err = rm.NewResourceManager(monitorID, namespace+"/"+deviceType)
180+
dp.resMan, err = rm.NewResourceManager(monitorID, namespace+"/"+options.resourcePrefix+deviceType)
180181
if err != nil {
181182
klog.Errorf("Failed to create resource manager: %+v", err)
182183
return nil
@@ -330,7 +331,8 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
330331
}
331332

332333
if dp.options.enableMonitoring {
333-
klog.V(4).Infof("Adding %s to GPU %s/%s", devPath, monitorType, monitorID)
334+
klog.V(4).Infof("Adding %s to GPU resource %s%s/%s", devPath,
335+
dp.options.resourcePrefix, monitorType, monitorID)
334336

335337
monitor = append(monitor, devSpec)
336338
}
@@ -343,7 +345,7 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
343345
devID := fmt.Sprintf("%s-%d", f.Name(), i)
344346
// Currently only one device type (i915) is supported.
345347
// TODO: check model ID to differentiate device models.
346-
devTree.AddDevice(deviceType, devID, deviceInfo)
348+
devTree.AddDevice(dp.options.resourcePrefix+deviceType, devID, deviceInfo)
347349

348350
rmDevInfos[devID] = rm.NewDeviceInfo(nodes, nil, nil)
349351
}
@@ -352,7 +354,7 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
352354
// all Intel GPUs are under single monitoring resource
353355
if len(monitor) > 0 {
354356
deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, monitor, nil, nil, nil)
355-
devTree.AddDevice(monitorType, monitorID, deviceInfo)
357+
devTree.AddDevice(dp.options.resourcePrefix+monitorType, monitorID, deviceInfo)
356358
}
357359

358360
if dp.resMan != nil {
@@ -371,10 +373,10 @@ func (dp *devicePlugin) Allocate(request *pluginapi.AllocateRequest) (*pluginapi
371373
}
372374

373375
func main() {
374-
var prefix string
376+
var faked string
375377
var opts cliOptions
376378

377-
flag.StringVar(&fprefix, "fake-mode", "", "Prefix for devfs & sysfs paths")
379+
flag.StringVar(&faked, "fake-mode", "", "Comma separated prefix for devfs & sysfs paths + prefix for resources names")
378380
flag.BoolVar(&opts.enableMonitoring, "enable-monitoring", false, "whether to enable 'i915_monitoring' (= all GPUs) resource")
379381
flag.BoolVar(&opts.resourceManagement, "resource-manager", false, "fractional GPU resource management")
380382
flag.IntVar(&opts.sharedDevNum, "shared-dev-num", 1, "number of containers sharing the same GPU device")
@@ -400,12 +402,21 @@ func main() {
400402
klog.V(1).Infof("GPU device plugin started with %s preferred allocation policy", opts.preferredAllocationPolicy)
401403

402404
var sysfs, devfs string
403-
if prefix != "" {
404-
sysfs = prefix + sysfsDrmDirectory
405-
devfs = prefix + devfsDriDirectory
405+
if faked != "" {
406+
prefixes := strings.Split(faked, ",")
407+
if len(prefixes) != 2 {
408+
klog.Fatalf("%d commas in fake-mode option value, not one", len(prefixes)-1)
409+
}
410+
sysfs = prefixes[0] + sysfsDrmDirectory
411+
devfs = prefixes[0] + devfsDriDirectory
412+
opts.resourcePrefix = prefixes[1]
413+
if opts.resourceManagement && opts.resourcePrefix != "" {
414+
klog.Warning("Resource name prefix breaks resource management as it hard-codes their names")
415+
}
406416
} else {
407417
sysfs = sysfsDrmDirectory
408418
devfs = devfsDriDirectory
419+
opts.resourcePrefix = ""
409420
}
410421
plugin := newDevicePlugin(sysfs, devfs, opts)
411422
manager := dpapi.NewManager(namespace, plugin)

0 commit comments

Comments
 (0)