From 3cd51a309eed5c6ce9584d0698deefb03844c7fa Mon Sep 17 00:00:00 2001 From: Tim Serong Date: Mon, 21 Oct 2024 19:38:22 +1100 Subject: [PATCH] fix: Call `wipefs -a` in LonghornV2Provisioner.Format() Format should really be a no-op for V2 disks given they just take the whole device, but for NVMe devices where Longhorn decides to use the nvme bdev driver, device activation will fail if there's an existing filesystem on the device, so we need to make sure to wipe before use. Without this, we'll potentially have devices stuck unschedulable with an obscure error message like this: Disk c1fd02d173a3f4b9176705e9b2e37d39(0003:03:00.0) on node altra is not ready: failed to generate disk config: error: rpc error: code = Internal desc = rpc error: code = Internal desc = failed to add disk block device: failed to create disk bdev: failed to attach NVMe disk 0003:03:00.0: error sending message, id 3126, method bdev_nvme_attach_controller, params {c1fd02d173a3f4b9176705e9b2e37d39 {PCIe 0003:03:00.0 } 30 2 15 disable}: {"code": -19,"message": "No such device"} Related issue: https://github.com/harvester/harvester/issues/6828 Signed-off-by: Tim Serong (cherry picked from commit b25858097ae3c56d555be134a3d8a3559040d2c3) --- pkg/provisioner/longhornv2.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/pkg/provisioner/longhornv2.go b/pkg/provisioner/longhornv2.go index 2b7dee9d..788018b6 100644 --- a/pkg/provisioner/longhornv2.go +++ b/pkg/provisioner/longhornv2.go @@ -13,6 +13,7 @@ import ( diskv1 "github.com/harvester/node-disk-manager/pkg/apis/harvesterhci.io/v1beta1" "github.com/harvester/node-disk-manager/pkg/block" ctllonghornv1 "github.com/harvester/node-disk-manager/pkg/generated/controllers/longhorn.io/v1beta2" + "github.com/harvester/node-disk-manager/pkg/utils" ) type LonghornV2Provisioner struct { @@ -55,11 +56,15 @@ func NewLHV2Provisioner( }, nil } -// Format is a no-op for V2 disks, but we still need to return -// isFormatComplete == true to indicate the disk is ready for use. -func (p *LonghornV2Provisioner) Format(string) (isFormatComplete, isRequeueNeeded bool, err error) { - isFormatComplete = true - return +// Format should really be a no-op for V2 disks given they just take the +// whole device, but for NVMe devices where Longhorn decides to use the +// nvme bdev driver, device activation will fail if there's an existing +// filesystem on the device, so we need to make sure to wipe before use. +func (p *LonghornV2Provisioner) Format(devPath string) (isFormatComplete, isRequeueNeeded bool, err error) { + if _, err = utils.NewExecutor().Execute("wipefs", []string{"-a", devPath}); err != nil { + return false, false, err + } + return true, false, nil } // UnFormat is a no-op for V2 disks