Skip to content

Commit

Permalink
Consider a container crashed when in CrashLoopBackOff
Browse files Browse the repository at this point in the history
Looking at the terminated state of a container is not reliable as this
state does not last very long. Once a workload container exits
(regardless of the exit code) consistently, k8s puts the container into
`Waiting` state with reason `CrashLoopBackOff` until the scheduler
starts it again. As this is an exponential backoff, this state tends to
last longer and longer if the app keeps crashing.

With the previous implementation of looking at the `Terminated` state we
could never see the instance as `crashed` in the `cf app` output when we
intentionally kept crashing it.

Issue: #3082

Co-authored-by: Danail Branekov <danailster@gmail.com>
  • Loading branch information
georgethebeatle and danail-branekov committed Jan 26, 2024
1 parent bb3f7b6 commit daf04d3
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 7 deletions.
6 changes: 3 additions & 3 deletions api/actions/process_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,16 +213,16 @@ func getPodState(pod corev1.Pod) string {
return stateDown
}

if podHasTerminatedContainer(pod) {
if podHasCrashedContainer(pod) {
return stateCrashed
}

return stateStarting
}

func podHasTerminatedContainer(pod corev1.Pod) bool {
func podHasCrashedContainer(pod corev1.Pod) bool {
for _, cond := range pod.Status.ContainerStatuses {
if cond.State.Terminated != nil {
if cond.State.Waiting != nil && cond.State.Waiting.Reason == "CrashLoopBackOff" {
return true
}
}
Expand Down
18 changes: 14 additions & 4 deletions api/actions/process_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,21 +268,31 @@ var _ = Describe("ProcessStats", func() {
})
})

When("the pod has a terminated container", func() {
When("the pod has a container in waiting state", func() {
BeforeEach(func() {
podMetrics[0].Pod.Status.Conditions = makeConditions("Initialized")
podMetrics[0].Pod.Status.ContainerStatuses = []corev1.ContainerStatus{
{
Name: "application",
State: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{},
Waiting: &corev1.ContainerStateWaiting{},
},
},
}
})

It("is crashed", func() {
Expect(responseRecords[0].State).To(Equal("CRASHED"))
It("is starting", func() {
Expect(responseRecords[0].State).To(Equal("STARTING"))
})

When("the reason is CrashLoopBackoff", func() {
BeforeEach(func() {
podMetrics[0].Pod.Status.ContainerStatuses[0].State.Waiting.Reason = "CrashLoopBackOff"
})

It("is crashed", func() {
Expect(responseRecords[0].State).To(Equal("CRASHED"))
})
})
})

Expand Down
25 changes: 25 additions & 0 deletions tests/assets/golang/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"net/http"
"os"
"path/filepath"
"strconv"
"strings"
)

Expand All @@ -16,6 +17,7 @@ func main() {
http.HandleFunc("/env.json", envJsonHandler)
http.HandleFunc("/servicebindingroot", serviceBindingRootHandler)
http.HandleFunc("/servicebindings", serviceBindingsHandler)
http.HandleFunc("/exit", exitHandler)

port := os.Getenv("PORT")
if port == "" {
Expand All @@ -25,6 +27,29 @@ func main() {
http.ListenAndServe(fmt.Sprintf(":%s", port), nil)
}

func exitHandler(w http.ResponseWriter, r *http.Request) {
err := r.ParseForm()
if err != nil {
fmt.Fprintf(w, "Failed to parse form: %v", err)
w.WriteHeader(http.StatusInternalServerError)
return
}

code := r.Form.Get("code")
if code == "" {
code = "0"
}

exitCode, err := strconv.Atoi(code)
if err != nil {
fmt.Fprintf(w, "Failed to parse exit code: %s: %v", code, err)
w.WriteHeader(http.StatusInternalServerError)
return
}

os.Exit(exitCode)
}

func helloWorldHandler(w http.ResponseWriter, _ *http.Request) {
fmt.Fprintln(w, "Hi, I'm Dorifi!")
}
Expand Down

0 comments on commit daf04d3

Please sign in to comment.