From fad55a8611ff6231546ff16d5194116cf7d31b11 Mon Sep 17 00:00:00 2001 From: Marcelo Guerrero Date: Mon, 30 Oct 2023 16:58:16 +0100 Subject: [PATCH] Verify status changes on managed interfaces Users could modify the settings of VFs which have been configured by the sriov operator. This PR starts the reonciliation loop when this happens. Signed-off-by: Marcelo Guerrero --- pkg/daemon/daemon.go | 113 +++++++++++++++--------- pkg/plugins/fake/fake_plugin.go | 4 + pkg/plugins/generic/generic_plugin.go | 25 ++++++ pkg/plugins/intel/intel_plugin.go | 8 +- pkg/plugins/k8s/k8s_plugin.go | 6 ++ pkg/plugins/mellanox/mellanox_plugin.go | 6 ++ pkg/plugins/plugin.go | 10 ++- pkg/plugins/virtual/virtual_plugin.go | 6 ++ 8 files changed, 131 insertions(+), 47 deletions(-) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 3ec5719240..1f1fbd9d7b 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -455,8 +455,8 @@ func (dn *Daemon) nodeStateSyncHandler() error { log.Log.Error(err, "nodeStateSyncHandler(): Failed to fetch node state", "name", dn.name) return err } - latest := latestState.GetGeneration() - log.Log.V(0).Info("nodeStateSyncHandler(): new generation", "generation", latest) + + log.Log.V(0).Info("nodeStateSyncHandler(): new generation", "generation", latestState.GetGeneration()) if utils.ClusterType == utils.ClusterTypeOpenshift && !dn.openshiftContext.IsHypershift() { if err = dn.getNodeMachinePool(); err != nil { @@ -464,7 +464,17 @@ func (dn *Daemon) nodeStateSyncHandler() error { } } - if dn.nodeState.GetGeneration() == latest { + // load plugins if it has not loaded + if len(dn.enabledPlugins) == 0 { + dn.enabledPlugins, err = enablePlugins(dn.platform, dn.useSystemdService, latestState, dn.hostManager, dn.storeManager) + if err != nil { + log.Log.Error(err, "nodeStateSyncHandler(): failed to enable vendor plugins") + return err + } + } + + // Do not reconcile if spec hasn't been updated or if status of interfaces have not been externally modified. + if dn.nodeState.GetGeneration() == latestState.GetGeneration() { if dn.useSystemdService { serviceEnabled, err := dn.serviceManager.IsServiceEnabled(systemd.SriovServicePath) if err != nil { @@ -497,38 +507,14 @@ func (dn *Daemon) nodeStateSyncHandler() error { return nil } } - log.Log.V(0).Info("nodeStateSyncHandler(): Interface not changed") - if latestState.Status.LastSyncError != "" || - latestState.Status.SyncStatus != syncStatusSucceeded { - dn.refreshCh <- Message{ - syncStatus: syncStatusSucceeded, - lastSyncError: "", - } - // wait for writer to refresh the status - <-dn.syncCh - } - - return nil } - if latestState.GetGeneration() == 1 && len(latestState.Spec.Interfaces) == 0 { - err = dn.storeManager.ClearPCIAddressFolder() - if err != nil { - log.Log.Error(err, "failed to clear the PCI address configuration") - return err - } - - log.Log.V(0).Info( - "nodeStateSyncHandler(): interface policy spec not yet set by controller for sriovNetworkNodeState", - "name", latestState.Name) - if latestState.Status.SyncStatus != "Succeeded" { - dn.refreshCh <- Message{ - syncStatus: "Succeeded", - lastSyncError: "", - } - // wait for writer to refresh status - <-dn.syncCh - } + skip, err := dn.skipReconciliation(latestState) + if err != nil { + return err + } + // Reconcile only when there are changes in the spec or status of managed VFs. + if skip { return nil } @@ -549,15 +535,6 @@ func (dn *Daemon) nodeStateSyncHandler() error { } latestState.Status = updatedState.Status - // load plugins if it has not loaded - if len(dn.enabledPlugins) == 0 { - dn.enabledPlugins, err = enablePlugins(dn.platform, dn.useSystemdService, latestState, dn.hostManager, dn.storeManager) - if err != nil { - log.Log.Error(err, "nodeStateSyncHandler(): failed to enable vendor plugins") - return err - } - } - reqReboot := false reqDrain := false @@ -724,6 +701,58 @@ func (dn *Daemon) nodeStateSyncHandler() error { return nil } +func (dn *Daemon) skipReconciliation(latestState *sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + var err error + + // Skip when SriovNetworkNodeState object has just been created. + if latestState.GetGeneration() == 1 && len(latestState.Spec.Interfaces) == 0 { + err = dn.storeManager.ClearPCIAddressFolder() + if err != nil { + log.Log.Error(err, "failed to clear the PCI address configuration") + return false, err + } + + log.Log.V(0).Info( + "nodeStateSyncHandler(): interface policy spec not yet set by controller for sriovNetworkNodeState", + "name", latestState.Name) + if latestState.Status.SyncStatus != "Succeeded" { + dn.refreshCh <- Message{ + syncStatus: "Succeeded", + lastSyncError: "", + } + // wait for writer to refresh status + <-dn.syncCh + } + return true, nil + } + + // Verify changes in the spec of the SriovNetworkNodeState CR. + if dn.nodeState.GetGeneration() == latestState.GetGeneration() { + genericPlugin, ok := dn.enabledPlugins[GenericPluginName] + if ok { + // Verify changes in the status of the SriovNetworkNodeState CR. + if genericPlugin.OnNodeStatusChange(latestState) { + return false, nil + } + } + + log.Log.V(0).Info("nodeStateSyncHandler(): Interface not changed") + if latestState.Status.LastSyncError != "" || + latestState.Status.SyncStatus != syncStatusSucceeded { + dn.refreshCh <- Message{ + syncStatus: syncStatusSucceeded, + lastSyncError: "", + } + // wait for writer to refresh the status + <-dn.syncCh + } + + return true, nil + } + + return false, nil +} + func (dn *Daemon) nodeHasAnnotation(annoKey string, value string) bool { // Check if node already contains annotation if anno, ok := dn.node.Annotations[annoKey]; ok && (anno == value) { diff --git a/pkg/plugins/fake/fake_plugin.go b/pkg/plugins/fake/fake_plugin.go index ca85768c17..8956ebaf32 100644 --- a/pkg/plugins/fake/fake_plugin.go +++ b/pkg/plugins/fake/fake_plugin.go @@ -19,6 +19,10 @@ func (f *FakePlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeState return false, false, nil } +func (f *FakePlugin) OnNodeStatusChange(new *sriovnetworkv1.SriovNetworkNodeState) bool { + return false +} + func (f *FakePlugin) Apply() error { return nil } diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index e21b0ff588..f899c3c6d4 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -124,6 +124,31 @@ func (p *GenericPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeSt return } +// OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. +func (p *GenericPlugin) OnNodeStatusChange(new *sriovnetworkv1.SriovNetworkNodeState) bool { + log.Log.Info("generic-plugin OnNodeStatusChange()") + + changed := false + for _, iface := range new.Spec.Interfaces { + found := false + for _, ifaceStatus := range new.Status.Interfaces { + if iface.PciAddress == ifaceStatus.PciAddress { + found = true + if utils.NeedUpdate(&iface, &ifaceStatus) { + changed = true + log.Log.Info("OnNodeStatusChange(): status changed for interface", "address", iface.PciAddress) + } + break + } + } + + if !found { + log.Log.Info("OnNodeStatusChange(): no status found for interface", "address", iface.PciAddress) + } + } + return changed +} + func (p *GenericPlugin) syncDriverState() error { for _, driverState := range p.DriverStateMap { if !driverState.DriverLoaded && driverState.NeedDriverFunc(p.DesireState, driverState) { diff --git a/pkg/plugins/intel/intel_plugin.go b/pkg/plugins/intel/intel_plugin.go index bf032652e7..e7af908263 100644 --- a/pkg/plugins/intel/intel_plugin.go +++ b/pkg/plugins/intel/intel_plugin.go @@ -34,11 +34,17 @@ func (p *IntelPlugin) Spec() string { } // OnNodeStateChange Invoked when SriovNetworkNodeState CR is created or updated, return if need dain and/or reboot node -func (p *IntelPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeState) (needDrain bool, needReboot bool, err error) { +func (p *IntelPlugin) OnNodeStateChange(*sriovnetworkv1.SriovNetworkNodeState) (needDrain bool, needReboot bool, err error) { log.Log.Info("intel-plugin OnNodeStateChange()") return false, false, nil } +// OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. +func (p *IntelPlugin) OnNodeStatusChange(*sriovnetworkv1.SriovNetworkNodeState) bool { + log.Log.Info("intel-plugin OnNodeStatusChange()") + return false +} + // Apply config change func (p *IntelPlugin) Apply() error { log.Log.Info("intel-plugin Apply()") diff --git a/pkg/plugins/k8s/k8s_plugin.go b/pkg/plugins/k8s/k8s_plugin.go index 4cbcc818eb..4d6961c49b 100644 --- a/pkg/plugins/k8s/k8s_plugin.go +++ b/pkg/plugins/k8s/k8s_plugin.go @@ -163,6 +163,12 @@ func (p *K8sPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeState) return } +// OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. +func (p *K8sPlugin) OnNodeStatusChange(*sriovnetworkv1.SriovNetworkNodeState) bool { + log.Log.Info("k8s-plugin OnNodeStatusChange()") + return false +} + // Apply config change func (p *K8sPlugin) Apply() error { log.Log.Info("k8s-plugin Apply()") diff --git a/pkg/plugins/mellanox/mellanox_plugin.go b/pkg/plugins/mellanox/mellanox_plugin.go index 4be72f1a6c..ca38c61451 100644 --- a/pkg/plugins/mellanox/mellanox_plugin.go +++ b/pkg/plugins/mellanox/mellanox_plugin.go @@ -180,6 +180,12 @@ func (p *MellanoxPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeS return } +// OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. +func (p *MellanoxPlugin) OnNodeStatusChange(*sriovnetworkv1.SriovNetworkNodeState) bool { + log.Log.Info("mellanox-Plugin OnNodeStatusChange()") + return false +} + // Apply config change func (p *MellanoxPlugin) Apply() error { if utils.IsKernelLockdownMode(false) { diff --git a/pkg/plugins/plugin.go b/pkg/plugins/plugin.go index 1723ca6109..083346014b 100644 --- a/pkg/plugins/plugin.go +++ b/pkg/plugins/plugin.go @@ -5,12 +5,14 @@ import ( ) type VendorPlugin interface { - // Return the name of plugin + // Name returns the name of plugin Name() string - // Return the SpecVersion followed by plugin + // Spec returns the SpecVersion followed by plugin Spec() string - // Invoked when SriovNetworkNodeState CR is created or updated, return if need dain and/or reboot node - OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeState) (bool, bool, error) + // OnNodeStateChange is invoked when SriovNetworkNodeState CR is created or updated, return if need dain and/or reboot node + OnNodeStateChange(*sriovnetworkv1.SriovNetworkNodeState) (bool, bool, error) // Apply config change Apply() error + // OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. + OnNodeStatusChange(*sriovnetworkv1.SriovNetworkNodeState) bool } diff --git a/pkg/plugins/virtual/virtual_plugin.go b/pkg/plugins/virtual/virtual_plugin.go index 9fb6cb7745..8172da8fc4 100644 --- a/pkg/plugins/virtual/virtual_plugin.go +++ b/pkg/plugins/virtual/virtual_plugin.go @@ -69,6 +69,12 @@ func (p *VirtualPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeSt return } +// OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. +func (p *VirtualPlugin) OnNodeStatusChange(*sriovnetworkv1.SriovNetworkNodeState) bool { + log.Log.Info("virtual-plugin OnNodeStatusChange()") + return false +} + // Apply config change func (p *VirtualPlugin) Apply() error { log.Log.Info("virtual-plugin Apply()", "desired-state", p.DesireState.Spec)