Skip to content

Commit

Permalink
Comment fixes, renames, reduce critical section for synchronized code.
Browse files Browse the repository at this point in the history
  • Loading branch information
Crystal Lemire committed Nov 30, 2023
1 parent 67667b5 commit 7b8f83f
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 21 deletions.
18 changes: 9 additions & 9 deletions protocol/daemons/server/types/health_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,26 +83,26 @@ func (u *healthCheckerMutableState) ReportFailure(now time.Time, err error) time
return now.Sub(u.mostRecentFailureStreakError.Timestamp())
}

// healthChecker encapsulates the logic for monitoring the health of a health checkable service.
// healthChecker encapsulates the logic for monitoring the health of a health-checkable service.
type healthChecker struct {
// mutableState is the mutable state of the health checker. Access to these fields is synchronized.
mutableState *healthCheckerMutableState

// healthCheckable is the health checkable service to be monitored.
// healthCheckable is the health-checkable service to be monitored.
healthCheckable types.HealthCheckable

// pollFrequency is the frequency at which the health checkable service is polled.
// pollFrequency is the frequency at which the health-checkable service is polled.
pollFrequency time.Duration

// timer triggers a health check poll for a health checkable service.
// timer triggers a health check poll for a health-checkable service.
timer *time.Timer

// maxAcceptableUnhealthyDuration is the maximum acceptable duration for a health checkable service to
// maxAcceptableUnhealthyDuration is the maximum acceptable duration for a health-checkable service to
// remain unhealthy. If the service remains unhealthy for this duration, the monitor will execute the
// specified callback function.
maxAcceptableUnhealthyDuration time.Duration

// unhealthyCallback is the callback function to be executed if the health checkable service remains
// unhealthyCallback is the callback function to be executed if the health-checkable service remains
// unhealthy for a period of time greater than or equal to the maximum acceptable unhealthy duration.
// This callback function is executed with the error that caused the service to become unhealthy.
unhealthyCallback func(error)
Expand All @@ -113,7 +113,7 @@ type healthChecker struct {
logger log.Logger
}

// Poll executes a health check for the health checkable service. If the service has been unhealthy for longer than the
// Poll executes a health check for the health-checkable service. If the service has been unhealthy for longer than the
// maximum acceptable unhealthy duration, the callback function is executed.
// This method is publicly exposed for testing. This method is synchronized.
func (hc *healthChecker) Poll() {
Expand All @@ -137,12 +137,12 @@ func (hc *healthChecker) Poll() {
hc.timer.Reset(hc.pollFrequency)
}

// Stop stops the health checker. This method is synchronized.
// Stop stops the health checker. This method is not synchronized, as the timer does not need synchronization.
func (hc *healthChecker) Stop() {
hc.timer.Stop()
}

// StartNewHealthChecker creates and starts a new health checker for a health checkable service.
// StartNewHealthChecker creates and starts a new health checker for a health-checkable service.
func StartNewHealthChecker(
healthCheckable types.HealthCheckable,
pollFrequency time.Duration,
Expand Down
37 changes: 25 additions & 12 deletions protocol/daemons/server/types/health_monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
)

const (
// HealthCheckPollFrequency is the frequency at which the health checkable service is polled.
// HealthCheckPollFrequency is the frequency at which the health-checkable service is polled.
HealthCheckPollFrequency = 5 * time.Second
)

Expand All @@ -30,8 +30,8 @@ type healthMonitorMutableState struct {
disabled bool
}

// NewHealthMonitorMutableState creates a new health monitor mutable state.
func NewHealthMonitorMutableState() *healthMonitorMutableState {
// newHealthMonitorMutableState creates a new health monitor mutable state.
func newHealthMonitorMutableState() *healthMonitorMutableState {
return &healthMonitorMutableState{
serviceToHealthChecker: make(map[string]*healthChecker),
}
Expand Down Expand Up @@ -64,13 +64,29 @@ func (ms *healthMonitorMutableState) Stop() {
ms.stopped = true
}

// RegisterHealthChecker registers a new health checker for a health checkable with the health monitor. The health
// RegisterHealthChecker registers a new health checker for a health-checkable with the health monitor. The health
// checker is lazily created using the provided function if needed. This method is synchronized. It returns an error if
// the service was already registered.
func (ms *healthMonitorMutableState) RegisterHealthChecker(
checkable types.HealthCheckable,
lazyHealthCheckerCreator func() *healthChecker,
) error {
stopService := false

// If the monitor has already been stopped, we want to stop the checkable service before returning.
// However, we'd prefer not to stop the service within the critical section in order to prevent deadlocks.
// This defer will be called last, after the lock is released.
defer func() {
if stopService {
// If the service is stoppable, stop it. This helps us to clean up daemon services in test cases
// where the monitor is stopped before all daemon services have been registered.
if stoppable, ok := checkable.(Stoppable); ok {
stoppable.Stop()
}
}
}()

// Enter into the critical section.
ms.Lock()
defer ms.Unlock()

Expand All @@ -83,11 +99,8 @@ func (ms *healthMonitorMutableState) RegisterHealthChecker(
// This could be a concern for short-running integration test cases, where the network
// stops before all daemon services have been registered.
if ms.stopped {
// If the service is stoppable, stop it. This helps us to clean up daemon services in test cases
// where the monitor is stopped before all daemon services have been registered.
if stoppable, ok := checkable.(Stoppable); ok {
stoppable.Stop()
}
// Toggle the stopService flag to true so that the service is stopped after the lock is released.
stopService = true
return nil
}

Expand Down Expand Up @@ -118,7 +131,7 @@ func NewHealthMonitor(
logger log.Logger,
) *HealthMonitor {
return &HealthMonitor{
mutableState: NewHealthMonitorMutableState(),
mutableState: newHealthMonitorMutableState(),
logger: logger.With(cosmoslog.ModuleKey, "health-monitor"),
startupGracePeriod: startupGracePeriod,
pollingFrequency: pollingFrequency,
Expand Down Expand Up @@ -171,7 +184,7 @@ func PanicServiceNotResponding(hc types.HealthCheckable) func(error) {
}

// LogErrorServiceNotResponding returns a function that logs an error indicating that the specified service
// is not responding. This is ideal for creating a callback function when registering a health checkable service.
// is not responding. This is ideal for creating a callback function when registering a health-checkable service.
func LogErrorServiceNotResponding(hc types.HealthCheckable, logger log.Logger) func(error) {
return func(err error) {
logger.Error(
Expand All @@ -184,7 +197,7 @@ func LogErrorServiceNotResponding(hc types.HealthCheckable, logger log.Logger) f
}
}

// RegisterService registers a new health checkable service with the health check monitor. If the service
// RegisterService registers a new health-checkable service with the health check monitor. If the service
// is unhealthy every time it is polled for a duration greater than or equal to the maximum acceptable unhealthy
// duration, the monitor will panic.
// This method is synchronized. It returns an error if the service was already registered or the monitor has
Expand Down

0 comments on commit 7b8f83f

Please sign in to comment.