diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 8e8e0a10fb..9e3b2ef9cc 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -561,10 +561,13 @@ func (dn *Daemon) shouldSkipReconciliation(latestState *sriovnetworkv1.SriovNetw // Verify changes in the spec of the SriovNetworkNodeState CR. if dn.currentNodeState.GetGeneration() == latestState.GetGeneration() && !dn.isDrainCompleted() { - genericPlugin, ok := dn.loadedPlugins[GenericPluginName] - if ok { + for _, p := range dn.loadedPlugins { // Verify changes in the status of the SriovNetworkNodeState CR. - if genericPlugin.CheckStatusChanges(latestState) { + changed, err := p.CheckStatusChanges(latestState) + if err != nil { + return false, err + } + if changed { return false, nil } } diff --git a/pkg/plugins/fake/fake_plugin.go b/pkg/plugins/fake/fake_plugin.go index 11c30d6eac..0aa218f284 100644 --- a/pkg/plugins/fake/fake_plugin.go +++ b/pkg/plugins/fake/fake_plugin.go @@ -21,8 +21,8 @@ func (f *FakePlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeState return false, false, nil } -func (f *FakePlugin) CheckStatusChanges(new *sriovnetworkv1.SriovNetworkNodeState) bool { - return false +func (f *FakePlugin) CheckStatusChanges(new *sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + return false, nil } func (f *FakePlugin) Apply() error { diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index 20ef4a49c7..64619838fa 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -140,28 +140,33 @@ func (p *GenericPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeSt } // CheckStatusChanges verify whether SriovNetworkNodeState CR status present changes on configured VFs. -func (p *GenericPlugin) CheckStatusChanges(new *sriovnetworkv1.SriovNetworkNodeState) bool { +func (p *GenericPlugin) CheckStatusChanges(current *sriovnetworkv1.SriovNetworkNodeState) (bool, error) { log.Log.Info("generic-plugin CheckStatusChanges()") - changed := false - for _, iface := range new.Spec.Interfaces { + for _, iface := range current.Spec.Interfaces { found := false - for _, ifaceStatus := range new.Status.Interfaces { + for _, ifaceStatus := range current.Status.Interfaces { if iface.PciAddress == ifaceStatus.PciAddress && !iface.ExternallyManaged { found = true if sriovnetworkv1.NeedToUpdateSriov(&iface, &ifaceStatus) { - changed = true log.Log.Info("CheckStatusChanges(): status changed for interface", "address", iface.PciAddress) + return true, nil } break } } - if !found { log.Log.Info("CheckStatusChanges(): no status found for interface", "address", iface.PciAddress) } } - return changed + + missingKernelArgs, err := p.getMissingKernelArgs() + if err != nil { + log.Log.Error(err, "generic-plugin CheckStatusChanges(): failed to verify missing kernel arguments") + return false, err + } + + return len(missingKernelArgs) != 0, nil } func (p *GenericPlugin) syncDriverState() error { @@ -266,37 +271,48 @@ func (p *GenericPlugin) addToDesiredKernelArgs(karg string) { } } -// syncDesiredKernelArgs Should be called to set all the kernel arguments. Returns bool if node update is needed. -func (p *GenericPlugin) syncDesiredKernelArgs() (bool, error) { - needReboot := false +// getMissingKernelArgs gets Kernel arguments that have not been set. +func (p *GenericPlugin) getMissingKernelArgs() ([]string, error) { + missingArgs := make([]string, 0, len(p.DesiredKernelArgs)) if len(p.DesiredKernelArgs) == 0 { - return false, nil + return nil, nil } + kargs, err := p.helpers.GetCurrentKernelArgs() if err != nil { - return false, err + return nil, err } - for desiredKarg, attempted := range p.DesiredKernelArgs { - set := p.helpers.IsKernelArgsSet(kargs, desiredKarg) - if !set { - if attempted { - log.Log.V(2).Info("generic plugin syncDesiredKernelArgs(): previously attempted to set kernel arg", - "karg", desiredKarg) - } - // There is a case when we try to set the kernel argument here, the daemon could decide to not reboot because - // the daemon encountered a potentially one-time error. However we always want to make sure that the kernel - // argument is set once the daemon goes through node state sync again. - update, err := setKernelArg(desiredKarg) - if err != nil { - log.Log.Error(err, "generic plugin syncDesiredKernelArgs(): fail to set kernel arg", "karg", desiredKarg) - return false, err - } - if update { - needReboot = true - log.Log.V(2).Info("generic plugin syncDesiredKernelArgs(): need reboot for setting kernel arg", "karg", desiredKarg) - } - p.DesiredKernelArgs[desiredKarg] = true + + for desiredKarg := range p.DesiredKernelArgs { + if !p.helpers.IsKernelArgsSet(kargs, desiredKarg) { + missingArgs = append(missingArgs, desiredKarg) + } + } + return missingArgs, nil +} + +// syncDesiredKernelArgs should be called to set all the kernel arguments. Returns bool if node update is needed. +func (p *GenericPlugin) syncDesiredKernelArgs(kargs []string) (bool, error) { + needReboot := false + + for _, karg := range kargs { + if p.DesiredKernelArgs[karg] { + log.Log.V(2).Info("generic-plugin syncDesiredKernelArgs(): previously attempted to set kernel arg", + "karg", karg) + } + // There is a case when we try to set the kernel argument here, the daemon could decide to not reboot because + // the daemon encountered a potentially one-time error. However we always want to make sure that the kernel + // argument is set once the daemon goes through node state sync again. + update, err := setKernelArg(karg) + if err != nil { + log.Log.Error(err, "generic-plugin syncDesiredKernelArgs(): fail to set kernel arg", "karg", karg) + return false, err + } + if update { + needReboot = true + log.Log.V(2).Info("generic-plugin syncDesiredKernelArgs(): need reboot for setting kernel arg", "karg", karg) } + p.DesiredKernelArgs[karg] = true } return needReboot, nil } @@ -365,19 +381,28 @@ func (p *GenericPlugin) addVfioDesiredKernelArg(state *sriovnetworkv1.SriovNetwo } } -func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeState) (needReboot bool, err error) { - needReboot = false +func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + needReboot := false + p.addVfioDesiredKernelArg(state) - updateNode, err := p.syncDesiredKernelArgs() + missingKernelArgs, err := p.getMissingKernelArgs() if err != nil { - log.Log.Error(err, "generic plugin needRebootNode(): failed to set the desired kernel arguments") + log.Log.Error(err, "generic-plugin needRebootNode(): failed to verify missing kernel arguments") return false, err } - if updateNode { - log.Log.V(2).Info("generic plugin needRebootNode(): need reboot for updating kernel arguments") - needReboot = true + + if len(missingKernelArgs) != 0 { + needReboot, err = p.syncDesiredKernelArgs(missingKernelArgs) + if err != nil { + log.Log.Error(err, "generic-plugin needRebootNode(): failed to set the desired kernel arguments") + return false, err + } + if needReboot { + log.Log.V(2).Info("generic-plugin needRebootNode(): need reboot for updating kernel arguments") + } } + return needReboot, nil } diff --git a/pkg/plugins/generic/generic_plugin_test.go b/pkg/plugins/generic/generic_plugin_test.go index 6b75535bb1..d2e9c1bbee 100644 --- a/pkg/plugins/generic/generic_plugin_test.go +++ b/pkg/plugins/generic/generic_plugin_test.go @@ -8,6 +8,7 @@ import ( . "github.com/onsi/gomega" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" mock_helper "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper/mock" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" ) @@ -178,12 +179,12 @@ var _ = Describe("Generic plugin", func() { }, } - changed := genericPlugin.CheckStatusChanges(networkNodeState) + changed, err := genericPlugin.CheckStatusChanges(networkNodeState) Expect(err).ToNot(HaveOccurred()) Expect(changed).To(BeFalse()) }) - It("should detect changes on status", func() { + It("should detect changes on status due to spec mismatch", func() { networkNodeState := &sriovnetworkv1.SriovNetworkNodeState{ Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{ Interfaces: sriovnetworkv1.Interfaces{{ @@ -232,7 +233,65 @@ var _ = Describe("Generic plugin", func() { }, } - changed := genericPlugin.CheckStatusChanges(networkNodeState) + changed, err := genericPlugin.CheckStatusChanges(networkNodeState) + Expect(err).ToNot(HaveOccurred()) + Expect(changed).To(BeTrue()) + }) + + It("should detect changes on status due to missing kernel args", func() { + networkNodeState := &sriovnetworkv1.SriovNetworkNodeState{ + Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{ + Interfaces: sriovnetworkv1.Interfaces{{ + PciAddress: "0000:00:00.0", + NumVfs: 2, + Mtu: 1500, + VfGroups: []sriovnetworkv1.VfGroup{{ + DeviceType: "vfio-pci", + PolicyName: "policy-1", + ResourceName: "resource-1", + VfRange: "0-1", + Mtu: 1500, + }}}}, + }, + Status: sriovnetworkv1.SriovNetworkNodeStateStatus{ + Interfaces: sriovnetworkv1.InterfaceExts{{ + PciAddress: "0000:00:00.0", + NumVfs: 2, + TotalVfs: 2, + DeviceID: "159b", + Vendor: "8086", + Name: "sriovif1", + Mtu: 1500, + Mac: "0c:42:a1:55:ee:46", + Driver: "ice", + EswitchMode: "legacy", + LinkSpeed: "25000 Mb/s", + LinkType: "ETH", + VFs: []sriovnetworkv1.VirtualFunction{{ + PciAddress: "0000:00:00.1", + DeviceID: "1889", + Vendor: "8086", + VfID: 0, + Driver: "vfio-pci", + }, { + PciAddress: "0000:00:00.2", + DeviceID: "1889", + Vendor: "8086", + VfID: 1, + Driver: "vfio-pci", + }}, + }}, + }, + } + + // Load required kernel args. + genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(networkNodeState) + + hostHelper.EXPECT().GetCurrentKernelArgs().Return("", nil) + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIntelIommu).Return(false) + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPt).Return(false) + + changed, err := genericPlugin.CheckStatusChanges(networkNodeState) Expect(err).ToNot(HaveOccurred()) Expect(changed).To(BeTrue()) }) diff --git a/pkg/plugins/intel/intel_plugin.go b/pkg/plugins/intel/intel_plugin.go index a9174bdba2..e88a186c94 100644 --- a/pkg/plugins/intel/intel_plugin.go +++ b/pkg/plugins/intel/intel_plugin.go @@ -41,8 +41,8 @@ func (p *IntelPlugin) OnNodeStateChange(*sriovnetworkv1.SriovNetworkNodeState) ( } // OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. -func (p *IntelPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) bool { - return false +func (p *IntelPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + return false, nil } // Apply config change diff --git a/pkg/plugins/k8s/k8s_plugin.go b/pkg/plugins/k8s/k8s_plugin.go index c2dad85b56..dbc09a9f8e 100644 --- a/pkg/plugins/k8s/k8s_plugin.go +++ b/pkg/plugins/k8s/k8s_plugin.go @@ -155,8 +155,8 @@ func (p *K8sPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeState) } // OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. -func (p *K8sPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) bool { - return false +func (p *K8sPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + return false, nil } // Apply config change diff --git a/pkg/plugins/mellanox/mellanox_plugin.go b/pkg/plugins/mellanox/mellanox_plugin.go index aabc4073ac..8043494057 100644 --- a/pkg/plugins/mellanox/mellanox_plugin.go +++ b/pkg/plugins/mellanox/mellanox_plugin.go @@ -172,8 +172,8 @@ func (p *MellanoxPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeS } // OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. -func (p *MellanoxPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) bool { - return false +func (p *MellanoxPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + return false, nil } // Apply config change diff --git a/pkg/plugins/mock/mock_plugin.go b/pkg/plugins/mock/mock_plugin.go index 044d30f0e6..a6ae38202f 100644 --- a/pkg/plugins/mock/mock_plugin.go +++ b/pkg/plugins/mock/mock_plugin.go @@ -49,11 +49,12 @@ func (mr *MockVendorPluginMockRecorder) Apply() *gomock.Call { } // CheckStatusChanges mocks base method. -func (m *MockVendorPlugin) CheckStatusChanges(arg0 *v1.SriovNetworkNodeState) bool { +func (m *MockVendorPlugin) CheckStatusChanges(arg0 *v1.SriovNetworkNodeState) (bool, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "CheckStatusChanges", arg0) ret0, _ := ret[0].(bool) - return ret0 + ret1, _ := ret[1].(error) + return ret0, ret1 } // CheckStatusChanges indicates an expected call of CheckStatusChanges. diff --git a/pkg/plugins/plugin.go b/pkg/plugins/plugin.go index 3136dfd46f..830f7dd681 100644 --- a/pkg/plugins/plugin.go +++ b/pkg/plugins/plugin.go @@ -15,5 +15,5 @@ type VendorPlugin interface { // Apply config change Apply() error // CheckStatusChanges checks status changes on the SriovNetworkNodeState CR for configured VFs. - CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) bool + CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) (bool, error) } diff --git a/pkg/plugins/virtual/virtual_plugin.go b/pkg/plugins/virtual/virtual_plugin.go index cd43b120b8..211cbee2c5 100644 --- a/pkg/plugins/virtual/virtual_plugin.go +++ b/pkg/plugins/virtual/virtual_plugin.go @@ -68,8 +68,8 @@ func (p *VirtualPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeSt } // OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. -func (p *VirtualPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) bool { - return false +func (p *VirtualPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + return false, nil } // Apply config change