Skip to content

Commit

Permalink
reset admission check after deactivation (#3350)
Browse files Browse the repository at this point in the history
* reset admission check after deactivation

Signed-off-by: Kevin <kpostlet@redhat.com>

* adjust unit tests

Signed-off-by: Kevin <kpostlet@redhat.com>

* use deactivation as prefix

Signed-off-by: Kevin <kpostlet@redhat.com>

* use realClock in admissionCheck package

Signed-off-by: Kevin <kpostlet@redhat.com>

* remove rejected check because it's transitory

Signed-off-by: Kevin <kpostlet@redhat.com>

* swap order to avoid short circuit

Signed-off-by: Kevin <kpostlet@redhat.com>

* dont duplicate eviction function

Signed-off-by: Kevin <kpostlet@redhat.com>

* address comments

Signed-off-by: Kevin <kpostlet@redhat.com>

* add unit test for admission check reset

Signed-off-by: Kevin <kpostlet@redhat.com>

* adjust test description to reflect transitory nature

Signed-off-by: Kevin <kpostlet@redhat.com>

---------

Signed-off-by: Kevin <kpostlet@redhat.com>
  • Loading branch information
KPostOffice authored Nov 12, 2024
1 parent f9460bf commit 43b7374
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 66 deletions.
12 changes: 9 additions & 3 deletions pkg/controller/core/workload_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ func (r *WorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
wl.Status.RequeueState = nil
updated = true
}
updated = workload.ResetChecksOnEviction(&wl, r.clock.Now()) || updated
if updated {
if err := workload.ApplyAdmissionStatus(ctx, r.client, &wl, true); err != nil {
return ctrl.Result{}, fmt.Errorf("setting eviction: %w", err)
Expand Down Expand Up @@ -368,10 +369,15 @@ func (r *WorkloadReconciler) reconcileCheckBasedEviction(ctx context.Context, wl
log := ctrl.LoggerFrom(ctx)
log.V(3).Info("Workload is evicted due to admission checks")
if workload.HasRejectedChecks(wl) {
wl.Spec.Active = ptr.To(false)
if err := r.client.Update(ctx, wl); err != nil {
return false, err
var rejectedCheckNames []string
for _, check := range workload.RejectedChecks(wl) {
rejectedCheckNames = append(rejectedCheckNames, check.Name)
}
workload.SetDeactivationTarget(wl, kueue.WorkloadEvictedByAdmissionCheck, fmt.Sprintf("Admission check(s): %v, were rejected", rejectedCheckNames))
if err := workload.ApplyAdmissionStatus(ctx, r.client, wl, true); err != nil {
return false, client.IgnoreNotFound(err)
}
log.V(3).Info("Workload is evicted due to rejected admission checks", "workload", klog.KObj(wl), "rejectedChecks", rejectedCheckNames)
rejectedCheck := workload.RejectedChecks(wl)[0]
r.recorder.Eventf(wl, corev1.EventTypeWarning, "AdmissionCheckRejected", "Deactivating workload because AdmissionCheck for %v was Rejected: %s", rejectedCheck.Name, rejectedCheck.Message)
return true, nil
Expand Down
103 changes: 94 additions & 9 deletions pkg/controller/core/workload_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -470,11 +470,24 @@ func TestReconcile(t *testing.T) {
wantWorkload: utiltesting.MakeWorkload("wl", "ns").
ControllerReference(batchv1.SchemeGroupVersion.WithKind("Job"), "ownername", "owneruid").
ReserveQuota(utiltesting.MakeAdmission("q1").Obj()).
Active(false).
AdmissionCheck(kueue.AdmissionCheckState{
Name: "check",
State: kueue.CheckStateRejected,
}).
Conditions(
metav1.Condition{
Type: kueue.WorkloadQuotaReserved,
Status: metav1.ConditionTrue,
Reason: "AdmittedByTest",
Message: "Admitted by ClusterQueue q1",
},
metav1.Condition{
Type: kueue.WorkloadDeactivationTarget,
Status: metav1.ConditionTrue,
Reason: "AdmissionCheck",
Message: "Admission check(s): [check], were rejected",
},
).
Obj(),
wantEvents: []utiltesting.EventRecord{
{
Expand All @@ -498,24 +511,31 @@ func TestReconcile(t *testing.T) {
wantWorkload: utiltesting.MakeWorkload("wl", "ns").
ReserveQuota(utiltesting.MakeAdmission("q1").Obj()).
Admitted(true).
Active(false).
ControllerReference(batchv1.SchemeGroupVersion.WithKind("Job"), "ownername", "owneruid").
AdmissionCheck(kueue.AdmissionCheckState{
Name: "check",
State: kueue.CheckStateRejected,
}).
Conditions(metav1.Condition{
Type: kueue.WorkloadQuotaReserved,
Status: metav1.ConditionTrue,
Reason: "AdmittedByTest",
Message: "Admitted by ClusterQueue q1",
},
Conditions(
metav1.Condition{
Type: kueue.WorkloadQuotaReserved,
Status: metav1.ConditionTrue,
Reason: "AdmittedByTest",
Message: "Admitted by ClusterQueue q1",
},
metav1.Condition{
Type: kueue.WorkloadAdmitted,
Status: metav1.ConditionTrue,
Reason: "ByTest",
Message: "Admitted by ClusterQueue q1",
}).
},
metav1.Condition{
Type: kueue.WorkloadDeactivationTarget,
Status: metav1.ConditionTrue,
Reason: "AdmissionCheck",
Message: "Admission check(s): [check], were rejected",
},
).
Obj(),
wantEvents: []utiltesting.EventRecord{
{
Expand All @@ -526,6 +546,71 @@ func TestReconcile(t *testing.T) {
},
},
},
"workload with deactivation target condition should be deactivated and admission checks reset": {
workload: utiltesting.MakeWorkload("wl", "ns").
ReserveQuota(utiltesting.MakeAdmission("q1").Obj()).
Admitted(true).
Active(false).
ControllerReference(batchv1.SchemeGroupVersion.WithKind("Job"), "ownername", "owneruid").
AdmissionChecks(
kueue.AdmissionCheckState{
Name: "check-1",
State: kueue.CheckStateRejected,
},
kueue.AdmissionCheckState{
Name: "check-2",
State: kueue.CheckStateRetry,
},
).
Conditions(metav1.Condition{
Type: kueue.WorkloadDeactivationTarget,
Status: metav1.ConditionTrue,
Reason: kueue.WorkloadEvictedByAdmissionCheck,
Message: "Admission check(s): [check-1], were rejected",
}).
Obj(),
wantWorkload: utiltesting.MakeWorkload("wl", "ns").
ReserveQuota(utiltesting.MakeAdmission("q1").Obj()).
Admitted(true).
Active(false).
ControllerReference(batchv1.SchemeGroupVersion.WithKind("Job"), "ownername", "owneruid").
AdmissionChecks(
kueue.AdmissionCheckState{
Name: "check-1",
State: kueue.CheckStatePending,
Message: "Reset to Pending after eviction. Previously: Rejected",
},
kueue.AdmissionCheckState{
Name: "check-2",
State: kueue.CheckStatePending,
Message: "Reset to Pending after eviction. Previously: Retry",
},
).
Conditions(
metav1.Condition{
Type: kueue.WorkloadEvicted,
Status: metav1.ConditionTrue,
Reason: kueue.WorkloadEvictedByDeactivation + kueue.WorkloadEvictedByAdmissionCheck,
Message: "The workload is deactivated due to Admission check(s): [check-1], were rejected",
},
// In a real cluster this condition would be removed but it cant be in the fake cluster
metav1.Condition{
Type: kueue.WorkloadDeactivationTarget,
Status: metav1.ConditionTrue,
Reason: kueue.WorkloadEvictedByAdmissionCheck,
Message: "Admission check(s): [check-1], were rejected",
},
).
Obj(),
wantEvents: []utiltesting.EventRecord{
{
Key: types.NamespacedName{Namespace: "ns", Name: "wl"},
EventType: "Normal",
Reason: "EvictedDueToInactiveWorkloadAdmissionCheck",
Message: "The workload is deactivated due to Admission check(s): [check-1], were rejected",
},
},
},
"workload with retry checks should be evicted and checks should be pending": {
workload: utiltesting.MakeWorkload("wl", "ns").
ReserveQuota(utiltesting.MakeAdmission("q1").Obj()).
Expand Down
8 changes: 7 additions & 1 deletion pkg/workload/admissionchecks.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,22 @@ func FindAdmissionCheck(checks []kueue.AdmissionCheckState, checkName string) *k
}

// ResetChecksOnEviction sets all AdmissionChecks to Pending
func ResetChecksOnEviction(w *kueue.Workload, now time.Time) {
func ResetChecksOnEviction(w *kueue.Workload, now time.Time) bool {
checks := w.Status.AdmissionChecks
updated := false
for i := range checks {
if checks[i].State == kueue.CheckStatePending {
continue
}
checks[i] = kueue.AdmissionCheckState{
Name: checks[i].Name,
State: kueue.CheckStatePending,
LastTransitionTime: metav1.NewTime(now),
Message: "Reset to Pending after eviction. Previously: " + string(checks[i].State),
}
updated = true
}
return updated
}

// SetAdmissionCheckState - adds or updates newCheck in the provided checks list.
Expand Down
2 changes: 1 addition & 1 deletion pkg/workload/workload.go
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,7 @@ func IsActive(w *kueue.Workload) bool {
// IsEvictedByDeactivation returns true if the workload is evicted by deactivation.
func IsEvictedByDeactivation(w *kueue.Workload) bool {
cond := apimeta.FindStatusCondition(w.Status.Conditions, kueue.WorkloadEvicted)
return cond != nil && cond.Status == metav1.ConditionTrue && cond.Reason == kueue.WorkloadEvictedByDeactivation
return cond != nil && cond.Status == metav1.ConditionTrue && strings.HasPrefix(cond.Reason, kueue.WorkloadEvictedByDeactivation)
}

func IsEvictedByPodsReadyTimeout(w *kueue.Workload) (*metav1.Condition, bool) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -411,22 +411,14 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure
}, util.Timeout, util.Interval).Should(gomega.Succeed())
})

ginkgo.By("Checking the admission check", func() {
gomega.Eventually(func(g gomega.Gomega) {
g.Expect(k8sClient.Get(ctx, wlKey, &updatedWl)).To(gomega.Succeed())
state := workload.FindAdmissionCheck(updatedWl.Status.AdmissionChecks, ac.Name)
g.Expect(state).NotTo(gomega.BeNil())
g.Expect(state.State).To(gomega.Equal(kueue.CheckStateRejected))
}, util.Timeout, util.Interval).Should(gomega.Succeed())
})

ginkgo.By("Checking if workload is deactivated, has Rejected status in the status.admissionCheck[*] field, an event is emitted and a metric is increased", func() {
ginkgo.By("Checking if workload is deactivated, Rejected status was once in the status.admissionCheck[*] field, an event is emitted and a metric is increased", func() {
gomega.Eventually(func(g gomega.Gomega) {
g.Expect(k8sClient.Get(ctx, wlKey, &updatedWl)).To(gomega.Succeed())
g.Expect(updatedWl.Status.AdmissionChecks).To(gomega.ContainElement(gomega.BeComparableTo(
kueue.AdmissionCheckState{
Name: ac.Name,
State: kueue.CheckStateRejected,
Name: ac.Name,
State: kueue.CheckStatePending,
Message: "Reset to Pending after eviction. Previously: Rejected",
},
cmpopts.IgnoreFields(kueue.AdmissionCheckState{}, "LastTransitionTime", "PodSetUpdates"))))
g.Expect(workload.IsActive(&updatedWl)).To(gomega.BeFalse())
Expand All @@ -443,7 +435,7 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure
gomega.Eventually(func(g gomega.Gomega) {
g.Expect(k8sClient.Get(ctx, wlKey, &updatedWl)).To(gomega.Succeed())
g.Expect(workload.IsEvictedByDeactivation(&updatedWl)).To(gomega.BeTrue())
util.ExpectEvictedWorkloadsTotalMetric(cq.Name, kueue.WorkloadEvictedByDeactivation, 1)
util.ExpectEvictedWorkloadsTotalMetric(cq.Name, kueue.WorkloadEvictedByDeactivation+kueue.WorkloadEvictedByAdmissionCheck, 1)
}, util.Timeout, util.Interval).Should(gomega.Succeed())
})
})
Expand Down Expand Up @@ -495,8 +487,9 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure
g.Expect(k8sClient.Get(ctx, wlKey, &updatedWl)).To(gomega.Succeed())
g.Expect(updatedWl.Status.AdmissionChecks).To(gomega.ContainElement(gomega.BeComparableTo(
kueue.AdmissionCheckState{
Name: ac.Name,
State: kueue.CheckStateRejected,
Name: ac.Name,
State: kueue.CheckStatePending,
Message: "Reset to Pending after eviction. Previously: Rejected",
},
cmpopts.IgnoreFields(kueue.AdmissionCheckState{}, "LastTransitionTime", "PodSetUpdates"))))
g.Expect(workload.IsActive(&updatedWl)).To(gomega.BeFalse())
Expand All @@ -516,7 +509,7 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure
g.Expect(workload.IsEvictedByDeactivation(&updatedWl)).To(gomega.BeTrue())
}, util.Timeout, util.Interval).Should(gomega.Succeed())

util.ExpectEvictedWorkloadsTotalMetric(cq.Name, kueue.WorkloadEvictedByDeactivation, 1)
util.ExpectEvictedWorkloadsTotalMetric(cq.Name, kueue.WorkloadEvictedByDeactivation+kueue.WorkloadEvictedByAdmissionCheck, 1)
})
})

Expand Down Expand Up @@ -548,14 +541,15 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure
}, util.Timeout, util.Interval).Should(gomega.Succeed())
})

ginkgo.By("Checking if workload is deactivated, has Rejected status in the status.admissionCheck[*] field, an event is emitted and a metric is increased", func() {
ginkgo.By("Checking if workload is deactivated, once had Rejected status in the status.admissionCheck[*] field, an event is emitted and a metric is increased", func() {
gomega.Eventually(func(g gomega.Gomega) {
g.Expect(k8sClient.Get(ctx, wlKey, &updatedWl)).To(gomega.Succeed())

g.Expect(updatedWl.Status.AdmissionChecks).To(gomega.ContainElement(gomega.BeComparableTo(
kueue.AdmissionCheckState{
Name: ac.Name,
State: kueue.CheckStateRejected,
Name: ac.Name,
State: kueue.CheckStatePending,
Message: "Reset to Pending after eviction. Previously: Rejected",
},
cmpopts.IgnoreFields(kueue.AdmissionCheckState{}, "LastTransitionTime", "PodSetUpdates"))))
g.Expect(workload.IsActive(&updatedWl)).To(gomega.BeFalse())
Expand All @@ -575,7 +569,7 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure
g.Expect(workload.IsEvictedByDeactivation(&updatedWl)).To(gomega.BeTrue())
}, util.Timeout, util.Interval).Should(gomega.Succeed())

util.ExpectEvictedWorkloadsTotalMetric(cq.Name, kueue.WorkloadEvictedByDeactivation, 1)
util.ExpectEvictedWorkloadsTotalMetric(cq.Name, kueue.WorkloadEvictedByDeactivation+kueue.WorkloadEvictedByAdmissionCheck, 1)
})
})

Expand Down Expand Up @@ -1135,13 +1129,14 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure
}, util.Timeout, util.Interval).Should(gomega.Succeed())
})

ginkgo.By("Checking if workload is deactivated, has Rejected status in the status.admissionCheck[*] field, an event is emitted and a metric is increased", func() {
ginkgo.By("Checking if workload is deactivated, once had Rejected status in the status.admissionCheck[*] field, an event is emitted and a metric is increased", func() {
gomega.Eventually(func(g gomega.Gomega) {
g.Expect(k8sClient.Get(ctx, wlKey, &updatedWl)).To(gomega.Succeed())
g.Expect(updatedWl.Status.AdmissionChecks).To(gomega.ContainElement(gomega.BeComparableTo(
kueue.AdmissionCheckState{
Name: ac.Name,
State: kueue.CheckStateRejected,
Name: ac.Name,
State: kueue.CheckStatePending,
Message: "Reset to Pending after eviction. Previously: Rejected",
},
cmpopts.IgnoreFields(kueue.AdmissionCheckState{}, "LastTransitionTime", "PodSetUpdates"))))
g.Expect(workload.IsActive(&updatedWl)).To(gomega.BeFalse(), "The workload should be deactivated")
Expand All @@ -1161,7 +1156,7 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure
g.Expect(workload.IsEvictedByDeactivation(&updatedWl)).To(gomega.BeTrue(), "The workload should be evicted by deactivation")
}, util.Timeout, util.Interval).Should(gomega.Succeed())

util.ExpectEvictedWorkloadsTotalMetric(cq.Name, kueue.WorkloadEvictedByDeactivation, 1)
util.ExpectEvictedWorkloadsTotalMetric(cq.Name, kueue.WorkloadEvictedByDeactivation+kueue.WorkloadEvictedByAdmissionCheck, 1)
})
})
})
Expand Down Expand Up @@ -1416,23 +1411,15 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure
}, util.Timeout, util.Interval).Should(gomega.Succeed())
})

ginkgo.By("Checking the admission check is rejected", func() {
gomega.Eventually(func(g gomega.Gomega) {
g.Expect(k8sClient.Get(ctx, wlKey, &updatedWl)).To(gomega.Succeed())
state := workload.FindAdmissionCheck(updatedWl.Status.AdmissionChecks, ac.Name)
g.Expect(state).NotTo(gomega.BeNil())
g.Expect(state.State).To(gomega.Equal(kueue.CheckStateRejected))
}, util.Timeout, util.Interval).Should(gomega.Succeed())
})

ginkgo.By("Checking if workload is deactivated, has Rejected status in the status.admissionCheck[*] field, an event is emitted and a metric is increased", func() {
ginkgo.By("Checking if workload is deactivated, once had Rejected status in the status.admissionCheck[*] field, an event is emitted and a metric is increased", func() {
gomega.Eventually(func(g gomega.Gomega) {
g.Expect(k8sClient.Get(ctx, wlKey, &updatedWl)).To(gomega.Succeed())

g.Expect(updatedWl.Status.AdmissionChecks).To(gomega.ContainElement(gomega.BeComparableTo(
kueue.AdmissionCheckState{
Name: ac.Name,
State: kueue.CheckStateRejected,
Name: ac.Name,
State: kueue.CheckStatePending,
Message: "Reset to Pending after eviction. Previously: Rejected",
},
cmpopts.IgnoreFields(kueue.AdmissionCheckState{}, "LastTransitionTime", "PodSetUpdates"))))
g.Expect(workload.IsActive(&updatedWl)).To(gomega.BeFalse())
Expand All @@ -1450,7 +1437,7 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure
g.Expect(k8sClient.Get(ctx, wlKey, &updatedWl)).To(gomega.Succeed())

g.Expect(workload.IsEvictedByDeactivation(&updatedWl)).To(gomega.BeTrue())
util.ExpectEvictedWorkloadsTotalMetric(cq.Name, kueue.WorkloadEvictedByDeactivation, 1)
util.ExpectEvictedWorkloadsTotalMetric(cq.Name, kueue.WorkloadEvictedByDeactivation+kueue.WorkloadEvictedByAdmissionCheck, 1)
}, util.Timeout, util.Interval).Should(gomega.Succeed())
})
})
Expand Down Expand Up @@ -1877,23 +1864,15 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure
}, util.Timeout, util.Interval).Should(gomega.Succeed())
})

ginkgo.By("Checking the admission check is rejected", func() {
gomega.Eventually(func(g gomega.Gomega) {
g.Expect(k8sClient.Get(ctx, wlKey, &updatedWl)).To(gomega.Succeed())
check := workload.FindAdmissionCheck(updatedWl.Status.AdmissionChecks, ac.Name)
g.Expect(check).NotTo(gomega.BeNil())
g.Expect(check.State).To(gomega.Equal(kueue.CheckStateRejected))
}, util.Timeout, util.Interval).Should(gomega.Succeed())
})

ginkgo.By("Checking if the workload is deactivated, has Rejected status in the status.admissionCheck[*] field, an event is emitted and a metric is increased", func() {
ginkgo.By("Checking if the workload is deactivated, once had Rejected status in the status.admissionCheck[*] field, an event is emitted and a metric is increased", func() {
gomega.Eventually(func(g gomega.Gomega) {
g.Expect(k8sClient.Get(ctx, wlKey, &updatedWl)).To(gomega.Succeed())

g.Expect(updatedWl.Status.AdmissionChecks).To(gomega.ContainElement(gomega.BeComparableTo(
kueue.AdmissionCheckState{
Name: ac.Name,
State: kueue.CheckStateRejected,
Name: ac.Name,
State: kueue.CheckStatePending,
Message: "Reset to Pending after eviction. Previously: Rejected",
},
cmpopts.IgnoreFields(kueue.AdmissionCheckState{}, "LastTransitionTime", "PodSetUpdates"))))
g.Expect(workload.IsActive(&updatedWl)).To(gomega.BeFalse())
Expand All @@ -1911,7 +1890,7 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure
g.Expect(k8sClient.Get(ctx, wlKey, &updatedWl)).To(gomega.Succeed())

g.Expect(workload.IsEvictedByDeactivation(&updatedWl)).To(gomega.BeTrue())
util.ExpectEvictedWorkloadsTotalMetric(cq.Name, kueue.WorkloadEvictedByDeactivation, 1)
util.ExpectEvictedWorkloadsTotalMetric(cq.Name, kueue.WorkloadEvictedByDeactivation+kueue.WorkloadEvictedByAdmissionCheck, 1)
}, util.Timeout, util.Interval).Should(gomega.Succeed())
})
})
Expand Down
Loading

0 comments on commit 43b7374

Please sign in to comment.