From daf04d33558e322e06bd6392639e6b212c1cb0a1 Mon Sep 17 00:00:00 2001 From: Georgi Sabev Date: Fri, 26 Jan 2024 14:13:50 +0000 Subject: [PATCH] Consider a container crashed when in CrashLoopBackOff Looking at the terminated state of a container is not reliable as this state does not last very long. Once a workload container exits (regardless of the exit code) consistently, k8s puts the container into `Waiting` state with reason `CrashLoopBackOff` until the scheduler starts it again. As this is an exponential backoff, this state tends to last longer and longer if the app keeps crashing. With the previous implementation of looking at the `Terminated` state we could never see the instance as `crashed` in the `cf app` output when we intentionally kept crashing it. Issue: https://github.com/cloudfoundry/korifi/issues/3082 Co-authored-by: Danail Branekov --- api/actions/process_stats.go | 6 +++--- api/actions/process_stats_test.go | 18 ++++++++++++++---- tests/assets/golang/main.go | 25 +++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/api/actions/process_stats.go b/api/actions/process_stats.go index e1f273105..fd9b5e379 100644 --- a/api/actions/process_stats.go +++ b/api/actions/process_stats.go @@ -213,16 +213,16 @@ func getPodState(pod corev1.Pod) string { return stateDown } - if podHasTerminatedContainer(pod) { + if podHasCrashedContainer(pod) { return stateCrashed } return stateStarting } -func podHasTerminatedContainer(pod corev1.Pod) bool { +func podHasCrashedContainer(pod corev1.Pod) bool { for _, cond := range pod.Status.ContainerStatuses { - if cond.State.Terminated != nil { + if cond.State.Waiting != nil && cond.State.Waiting.Reason == "CrashLoopBackOff" { return true } } diff --git a/api/actions/process_stats_test.go b/api/actions/process_stats_test.go index 0cfc9219a..1f34c2458 100644 --- a/api/actions/process_stats_test.go +++ b/api/actions/process_stats_test.go @@ -268,21 +268,31 @@ var _ = Describe("ProcessStats", func() { }) }) - When("the pod has a terminated container", func() { + When("the pod has a container in waiting state", func() { BeforeEach(func() { podMetrics[0].Pod.Status.Conditions = makeConditions("Initialized") podMetrics[0].Pod.Status.ContainerStatuses = []corev1.ContainerStatus{ { Name: "application", State: corev1.ContainerState{ - Terminated: &corev1.ContainerStateTerminated{}, + Waiting: &corev1.ContainerStateWaiting{}, }, }, } }) - It("is crashed", func() { - Expect(responseRecords[0].State).To(Equal("CRASHED")) + It("is starting", func() { + Expect(responseRecords[0].State).To(Equal("STARTING")) + }) + + When("the reason is CrashLoopBackoff", func() { + BeforeEach(func() { + podMetrics[0].Pod.Status.ContainerStatuses[0].State.Waiting.Reason = "CrashLoopBackOff" + }) + + It("is crashed", func() { + Expect(responseRecords[0].State).To(Equal("CRASHED")) + }) }) }) diff --git a/tests/assets/golang/main.go b/tests/assets/golang/main.go index e21e4e6c5..96f17c360 100644 --- a/tests/assets/golang/main.go +++ b/tests/assets/golang/main.go @@ -6,6 +6,7 @@ import ( "net/http" "os" "path/filepath" + "strconv" "strings" ) @@ -16,6 +17,7 @@ func main() { http.HandleFunc("/env.json", envJsonHandler) http.HandleFunc("/servicebindingroot", serviceBindingRootHandler) http.HandleFunc("/servicebindings", serviceBindingsHandler) + http.HandleFunc("/exit", exitHandler) port := os.Getenv("PORT") if port == "" { @@ -25,6 +27,29 @@ func main() { http.ListenAndServe(fmt.Sprintf(":%s", port), nil) } +func exitHandler(w http.ResponseWriter, r *http.Request) { + err := r.ParseForm() + if err != nil { + fmt.Fprintf(w, "Failed to parse form: %v", err) + w.WriteHeader(http.StatusInternalServerError) + return + } + + code := r.Form.Get("code") + if code == "" { + code = "0" + } + + exitCode, err := strconv.Atoi(code) + if err != nil { + fmt.Fprintf(w, "Failed to parse exit code: %s: %v", code, err) + w.WriteHeader(http.StatusInternalServerError) + return + } + + os.Exit(exitCode) +} + func helloWorldHandler(w http.ResponseWriter, _ *http.Request) { fmt.Fprintln(w, "Hi, I'm Dorifi!") }