package health import ( "context" "encoding/json" "fmt" "net/http" "runtime" "sync" "time" "github.com/fiskerinc/cloud-services/pkg/logger" "github.com/gomodule/redigo/redis" "github.com/pkg/errors" ) // Based on https://github.com/hellofresh/health-go // Status type represents health status type Status string // Possible health statuses const ( StatusOK Status = "OK" StatusPartiallyAvailable Status = "partially available" StatusUnavailable Status = "unavailable" StatusTimeout Status = "timeout" ) type ( // CheckFunc is the func which executes the check. CheckFunc func(context.Context) error // InfoFunc is the func which executes to return check info InfoFunc func(system *System) // Config carries the parameters to run the check. Config struct { // Name is the name of the resource to be checked. Name string // Timeout is the timeout defined for every check. Timeout time.Duration // SkipOnErr if set to true, it will retrieve StatusOK providing the error message from the failed resource. SkipOnErr bool // Check is the func which executes the check. Check CheckFunc // Info func sets System information Info InfoFunc // If Vital is set to true, it means that the service won't work without this resource. Vital bool } // Check represents the health check response. Check struct { // Status is the check status. Status Status `json:"status"` // Timestamp is the time in which the check occurred. Timestamp time.Time `json:"timestamp"` // Failures holds the failed checks along with their messages. Failures map[string]string `json:"failures,omitempty"` // System holds information of the go process. System *System `json:"system"` } // System runtime variables about the go process. System struct { // Version is the go version. Version string `json:"version"` // GoroutinesCount is the number of the current goroutines. GoroutinesCount int `json:"goroutines_count"` // TotalAllocBytes is the total bytes allocated. TotalAllocBytes int `json:"total_alloc_bytes"` // HeapObjectsCount is the number of objects in the go heap. HeapObjectsCount int `json:"heap_objects_count"` // TotalAllocBytes is the bytes allocated and not yet freed. AllocBytes int `json:"alloc_bytes"` // RedisPoolCount is the current Redis connection pool count RedisStats *redis.PoolStats `json:"redis_stats,omitempty"` } // Health is the health-checks container Health struct { mu sync.Mutex checks map[string]Config } checkResponse struct { config Config err error } filterChecks func(checks map[string]Config) map[string]Config ) // New instantiates and build new health check container func New(opts ...Option) (*Health, error) { h := &Health{ checks: make(map[string]Config), } for _, o := range opts { if err := o(h); err != nil { return nil, err } } return h, nil } // Register registers a check config to be performed. func (h *Health) Register(c Config) error { if c.Timeout == 0 { c.Timeout = time.Second * 1 } if c.Name == "" { return errors.New("health check must have a name to be registered") } h.mu.Lock() defer h.mu.Unlock() if _, ok := h.checks[c.Name]; ok { return fmt.Errorf("health check %q is already registered", c.Name) } h.checks[c.Name] = c return nil } // ReadinessHandler returns an HTTP handler (http.HandlerFunc). func (h *Health) ReadinessHandler() http.Handler { return http.HandlerFunc(h.ReadinessFunc) } // LivenessHandler returns an HTTP handler (http.HandlerFunc). func (h *Health) LivenessHandler() http.Handler { return http.HandlerFunc(h.LivenessFunc) } // LivenessFunc is the HTTP handler function. func (h *Health) LivenessFunc(w http.ResponseWriter, r *http.Request) { c := h.Measure(r.Context(), getLivenessCheck) w.Header().Set("Content-Type", "application/json") data, err := json.Marshal(c) if err != nil { w.WriteHeader(http.StatusInternalServerError) http.Error(w, err.Error(), http.StatusInternalServerError) return } code := http.StatusOK if c.Status == StatusUnavailable { code = http.StatusServiceUnavailable } w.WriteHeader(code) w.Write(data) } // ReadinessFunc is the HTTP handler function. func (h *Health) ReadinessFunc(w http.ResponseWriter, r *http.Request) { c := h.Measure(r.Context(), getReadinessCheck) w.Header().Set("Content-Type", "application/json") data, err := json.Marshal(c) if err != nil { w.WriteHeader(http.StatusInternalServerError) http.Error(w, err.Error(), http.StatusInternalServerError) return } code := http.StatusOK if c.Status == StatusUnavailable { code = http.StatusServiceUnavailable } w.WriteHeader(code) w.Write(data) } func (h *Health) info(system *System) { for _, c := range h.checks { if c.Info != nil { c.Info(system) } } } // Measure runs all the registered health checks and returns summary status func (h *Health) Measure(ctx context.Context, getChecks filterChecks) Check { errTimeout := errors.New("timeout error") h.mu.Lock() defer h.mu.Unlock() checksList := getChecks(h.checks) total := len(checksList) checkRespChan := make(chan checkResponse, total) var wgRes sync.WaitGroup wgRes.Add(total) go func() { wgRes.Wait() close(checkRespChan) }() for _, c := range checksList { go func(ctx context.Context, c Config, respChan chan<- checkResponse) { defer wgRes.Done() locResp := make(chan error) go func(ctx context.Context, locResp chan<- error) { defer close(locResp) locResp <- c.Check(ctx) }(ctx, locResp) select { case <-time.After(c.Timeout): respChan <- checkResponse{config: c, err: errTimeout} case err := <-locResp: respChan <- checkResponse{config: c, err: err} } }(ctx, c, checkRespChan) } status := StatusOK checks := make(map[string]string) for resp := range checkRespChan { if resp.err == errTimeout { checks[resp.config.Name] = string(StatusTimeout) status = getAvailability(status, resp.config) continue } if resp.err != nil { checks[resp.config.Name] = resp.err.Error() status = getAvailability(status, resp.config) logger.Error().Err(errors.WithMessage(resp.err, resp.config.Name)).Send() continue } checks[resp.config.Name] = string(StatusOK) } system := newSystemMetrics() h.info(&system) return newCheck(status, checks, &system) } func getReadinessCheck(checks map[string]Config) map[string]Config { return checks } func getLivenessCheck(checks map[string]Config) map[string]Config { rez := make(map[string]Config) for key, conf := range checks { if conf.Vital { rez[key] = conf } } return rez } func newCheck(s Status, failures map[string]string, system *System) Check { return Check{ Status: s, Timestamp: time.Now(), Failures: failures, System: system, } } func newSystemMetrics() System { s := runtime.MemStats{} runtime.ReadMemStats(&s) return System{ Version: runtime.Version(), GoroutinesCount: runtime.NumGoroutine(), TotalAllocBytes: int(s.TotalAlloc), HeapObjectsCount: int(s.HeapObjects), AllocBytes: int(s.Alloc), } } func getAvailability(s Status, c Config) Status { if c.SkipOnErr && s != StatusUnavailable { return StatusPartiallyAvailable } return StatusUnavailable }