From 1e57cf7e9e752041dc13b71c72d76d0dff3692ac Mon Sep 17 00:00:00 2001 From: Tim Serong Date: Wed, 11 Sep 2024 15:21:30 +1000 Subject: [PATCH] fix: use BusPath to resolve dm devices to real devices If multipathd is running and has taken over a device, ResolvePersistentDevPath() will end up returning a "/dev/dm-*" device path, which we don't want. We want the real underyling device (e.g. "/dev/sda"). If we take a "/dev/dm-*" path and later update the blockdevice CR with it, we lose all the interesting DeviceStatus information, like the WWN. Happily, in this case, we can figure out the right path to use by checking "/dev/disk/by-path/" + the device's bus path. This has the added advantage of also working for block devices that have no WWN. Related issue: https://github.com/harvester/harvester/issues/6531 Signed-off-by: Tim Serong --- pkg/controller/blockdevice/controller.go | 3 - pkg/provisioner/common.go | 98 ++++++++++++++++++------ 2 files changed, 74 insertions(+), 27 deletions(-) diff --git a/pkg/controller/blockdevice/controller.go b/pkg/controller/blockdevice/controller.go index 8ef4c184..f06e3c79 100644 --- a/pkg/controller/blockdevice/controller.go +++ b/pkg/controller/blockdevice/controller.go @@ -137,9 +137,6 @@ func (c *Controller) OnBlockDeviceChange(_ string, device *diskv1.BlockDevice) ( if err != nil { return nil, err } - if devPath == "" { - return nil, fmt.Errorf("failed to resolve persistent dev path for block device %s", device.Name) - } logrus.Debugf("Checking to format device %s", device.Name) if formatted, requeue, err := provisionerInst.Format(devPath); !formatted { diff --git a/pkg/provisioner/common.go b/pkg/provisioner/common.go index faa3a1ce..145e8b48 100644 --- a/pkg/provisioner/common.go +++ b/pkg/provisioner/common.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "path/filepath" + "strings" "sync" ghwutil "github.com/jaypipes/ghw/pkg/util" @@ -200,24 +201,83 @@ func convertMountStr(mountOP NeedMountUpdateOP) string { } } +// ResolvePersistentDevPath tries to determine the currently active short +// device path (e.g. "/dev/sda") for a given block device. When the scanner +// first finds a new block device, device.Spec.DevPath is set to the short +// device path at that time, and device.Status.DeviceStatus.Details is filled +// in with data that uniquely identifies the device (e.g.: WWN). It's possible +// that on subsequent reboots, the short path will change, for example if +// devices are added or removed, so we have this function to try to figure +// out the _current_ short device path based on the unique identifying +// information in device.Status.DeviceStatus.Details. func ResolvePersistentDevPath(device *diskv1.BlockDevice) (string, error) { switch device.Status.DeviceStatus.Details.DeviceType { case diskv1.DeviceTypeDisk: - // Disk naming priority. - // #1 WWN (REF: https://en.wikipedia.org/wiki/World_Wide_Name#Formats) - // #2 filesystem UUID (UUID) (REF: https://wiki.archlinux.org/title/Persistent_block_device_naming#by-uuid) - // #3 partition table UUID (PTUUID) (DEPRECATED) - // #4 PtUUID as UUID to query disk info (DEPRECATED) - // (NDM might reuse PtUUID as UUID to format a disk) - if wwn := device.Status.DeviceStatus.Details.WWN; valueExists(wwn) { - if device.Status.DeviceStatus.Details.StorageController == string(diskv1.StorageControllerNVMe) { - return filepath.EvalSymlinks("/dev/disk/by-id/nvme-" + wwn) + // The following closure is the original implementation to resolve disk + // device paths, but there is a problem: it multipathd has taken over a + // device, the calls to filepath.EvalSymlinks() with "/dev/disk/by-id/" + // or "/dev/disk/by-uuid" paths will actually return a "/dev/dm-*" + // device, which we don't want. We want the real underlying device + // (e.g. "/dev/sda"). If we take a "/dev/dm-*" path and later update + // the blockdevice CR with it, we lose all the interesting DeviceStatus + // information, like the WWN. + path, err := func() (string, error) { + // Disk naming priority. + // #1 WWN (REF: https://en.wikipedia.org/wiki/World_Wide_Name#Formats) + // #2 filesystem UUID (UUID) (REF: https://wiki.archlinux.org/title/Persistent_block_device_naming#by-uuid) + // #3 partition table UUID (PTUUID) (DEPRECATED) + // #4 PtUUID as UUID to query disk info (DEPRECATED) + // (NDM might reuse PtUUID as UUID to format a disk) + if wwn := device.Status.DeviceStatus.Details.WWN; valueExists(wwn) { + if device.Status.DeviceStatus.Details.StorageController == string(diskv1.StorageControllerNVMe) { + return filepath.EvalSymlinks("/dev/disk/by-id/nvme-" + wwn) + } + return filepath.EvalSymlinks("/dev/disk/by-id/wwn-" + wwn) } - return filepath.EvalSymlinks("/dev/disk/by-id/wwn-" + wwn) + if fsUUID := device.Status.DeviceStatus.Details.UUID; valueExists(fsUUID) { + path, err := filepath.EvalSymlinks("/dev/disk/by-uuid/" + fsUUID) + if err == nil { + return path, nil + } + if !errors.Is(err, os.ErrNotExist) { + return "", err + } + } + + if ptUUID := device.Status.DeviceStatus.Details.PtUUID; valueExists(ptUUID) { + path, err := block.GetDevPathByPTUUID(ptUUID) + if err != nil { + return "", err + } + if path != "" { + return path, nil + } + return filepath.EvalSymlinks("/dev/disk/by-uuid/" + ptUUID) + } + + // If we haven't resolved a path by now, it means the device has no WWN, + // no FSUUID and no PTUUID, but there is still one more thing we can try... + return "", nil + }() + + if err != nil { + return "", err + } + + // ...at this point, if there's no error, we've either got the device we're + // interested in (e.g. "/dev/sda", "/dev/nvme0n1", etc.), _or_ we've got a + // "/dev/dm-*" device, _or_ we've got no path... + if path != "" && !strings.HasPrefix(path, "/dev/dm-") { + logrus.Debugf("Resolved device path %s for %s", path, device.Name) + return path, nil } - if fsUUID := device.Status.DeviceStatus.Details.UUID; valueExists(fsUUID) { - path, err := filepath.EvalSymlinks("/dev/disk/by-uuid/" + fsUUID) + // ...in the latter two cases, we can try to resolve via "/dev/disk/by-path/...", + // which works for devices that don't have a WWN, and also in the dm case will + // return the path to the underlying device that we're actually interested in. + if busPath := device.Status.DeviceStatus.Details.BusPath; valueExists(busPath) { + path, err = filepath.EvalSymlinks("/dev/disk/by-path/" + busPath) if err == nil { + logrus.Debugf("Resolved BusPath %s to %s for %s", busPath, path, device.Name) return path, nil } if !errors.Is(err, os.ErrNotExist) { @@ -225,17 +285,7 @@ func ResolvePersistentDevPath(device *diskv1.BlockDevice) (string, error) { } } - if ptUUID := device.Status.DeviceStatus.Details.PtUUID; valueExists(ptUUID) { - path, err := block.GetDevPathByPTUUID(ptUUID) - if err != nil { - return "", err - } - if path != "" { - return path, nil - } - return filepath.EvalSymlinks("/dev/disk/by-uuid/" + ptUUID) - } - return "", fmt.Errorf("WWN/UUID/PTUUID was not found on device %s", device.Name) + return "", fmt.Errorf("WWN/UUID/PTUUID/BusPath was not found on device %s", device.Name) case diskv1.DeviceTypePart: partUUID := device.Status.DeviceStatus.Details.PartUUID if partUUID == "" { @@ -243,6 +293,6 @@ func ResolvePersistentDevPath(device *diskv1.BlockDevice) (string, error) { } return filepath.EvalSymlinks("/dev/disk/by-partuuid/" + partUUID) default: - return "", nil + return "", fmt.Errorf("failed to resolve persistent dev path for block device %s", device.Name) } }