Initial cloud-services repo - gateway service + pkg modules
This commit is contained in:
298
pkg/health/health.go
Normal file
298
pkg/health/health.go
Normal file
@@ -0,0 +1,298 @@
|
||||
package health
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"runtime"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"fiskerinc.com/modules/logger"
|
||||
"github.com/gomodule/redigo/redis"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// Based on https://github.com/hellofresh/health-go
|
||||
|
||||
// Status type represents health status
|
||||
type Status string
|
||||
|
||||
// Possible health statuses
|
||||
const (
|
||||
StatusOK Status = "OK"
|
||||
StatusPartiallyAvailable Status = "partially available"
|
||||
StatusUnavailable Status = "unavailable"
|
||||
StatusTimeout Status = "timeout"
|
||||
)
|
||||
|
||||
type (
|
||||
// CheckFunc is the func which executes the check.
|
||||
CheckFunc func(context.Context) error
|
||||
|
||||
// InfoFunc is the func which executes to return check info
|
||||
InfoFunc func(system *System)
|
||||
|
||||
// Config carries the parameters to run the check.
|
||||
Config struct {
|
||||
// Name is the name of the resource to be checked.
|
||||
Name string
|
||||
// Timeout is the timeout defined for every check.
|
||||
Timeout time.Duration
|
||||
// SkipOnErr if set to true, it will retrieve StatusOK providing the error message from the failed resource.
|
||||
SkipOnErr bool
|
||||
// Check is the func which executes the check.
|
||||
Check CheckFunc
|
||||
// Info func sets System information
|
||||
Info InfoFunc
|
||||
// If Vital is set to true, it means that the service won't work without this resource.
|
||||
Vital bool
|
||||
}
|
||||
|
||||
// Check represents the health check response.
|
||||
Check struct {
|
||||
// Status is the check status.
|
||||
Status Status `json:"status"`
|
||||
// Timestamp is the time in which the check occurred.
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
// Failures holds the failed checks along with their messages.
|
||||
Failures map[string]string `json:"failures,omitempty"`
|
||||
// System holds information of the go process.
|
||||
System *System `json:"system"`
|
||||
}
|
||||
|
||||
// System runtime variables about the go process.
|
||||
System struct {
|
||||
// Version is the go version.
|
||||
Version string `json:"version"`
|
||||
// GoroutinesCount is the number of the current goroutines.
|
||||
GoroutinesCount int `json:"goroutines_count"`
|
||||
// TotalAllocBytes is the total bytes allocated.
|
||||
TotalAllocBytes int `json:"total_alloc_bytes"`
|
||||
// HeapObjectsCount is the number of objects in the go heap.
|
||||
HeapObjectsCount int `json:"heap_objects_count"`
|
||||
// TotalAllocBytes is the bytes allocated and not yet freed.
|
||||
AllocBytes int `json:"alloc_bytes"`
|
||||
// RedisPoolCount is the current Redis connection pool count
|
||||
RedisStats *redis.PoolStats `json:"redis_stats,omitempty"`
|
||||
}
|
||||
|
||||
// Health is the health-checks container
|
||||
Health struct {
|
||||
mu sync.Mutex
|
||||
checks map[string]Config
|
||||
}
|
||||
|
||||
checkResponse struct {
|
||||
config Config
|
||||
err error
|
||||
}
|
||||
|
||||
filterChecks func(checks map[string]Config) map[string]Config
|
||||
)
|
||||
|
||||
// New instantiates and build new health check container
|
||||
func New(opts ...Option) (*Health, error) {
|
||||
h := &Health{
|
||||
checks: make(map[string]Config),
|
||||
}
|
||||
|
||||
for _, o := range opts {
|
||||
if err := o(h); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return h, nil
|
||||
}
|
||||
|
||||
// Register registers a check config to be performed.
|
||||
func (h *Health) Register(c Config) error {
|
||||
if c.Timeout == 0 {
|
||||
c.Timeout = time.Second * 1
|
||||
}
|
||||
|
||||
if c.Name == "" {
|
||||
return errors.New("health check must have a name to be registered")
|
||||
}
|
||||
|
||||
h.mu.Lock()
|
||||
defer h.mu.Unlock()
|
||||
|
||||
if _, ok := h.checks[c.Name]; ok {
|
||||
return fmt.Errorf("health check %q is already registered", c.Name)
|
||||
}
|
||||
|
||||
h.checks[c.Name] = c
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReadinessHandler returns an HTTP handler (http.HandlerFunc).
|
||||
func (h *Health) ReadinessHandler() http.Handler {
|
||||
return http.HandlerFunc(h.ReadinessFunc)
|
||||
}
|
||||
|
||||
// LivenessHandler returns an HTTP handler (http.HandlerFunc).
|
||||
func (h *Health) LivenessHandler() http.Handler {
|
||||
return http.HandlerFunc(h.LivenessFunc)
|
||||
}
|
||||
|
||||
// LivenessFunc is the HTTP handler function.
|
||||
func (h *Health) LivenessFunc(w http.ResponseWriter, r *http.Request) {
|
||||
c := h.Measure(r.Context(), getLivenessCheck)
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
data, err := json.Marshal(c)
|
||||
if err != nil {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
code := http.StatusOK
|
||||
if c.Status == StatusUnavailable {
|
||||
code = http.StatusServiceUnavailable
|
||||
}
|
||||
w.WriteHeader(code)
|
||||
w.Write(data)
|
||||
}
|
||||
|
||||
// ReadinessFunc is the HTTP handler function.
|
||||
func (h *Health) ReadinessFunc(w http.ResponseWriter, r *http.Request) {
|
||||
c := h.Measure(r.Context(), getReadinessCheck)
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
data, err := json.Marshal(c)
|
||||
if err != nil {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
code := http.StatusOK
|
||||
if c.Status == StatusUnavailable {
|
||||
code = http.StatusServiceUnavailable
|
||||
}
|
||||
w.WriteHeader(code)
|
||||
w.Write(data)
|
||||
}
|
||||
|
||||
func (h *Health) info(system *System) {
|
||||
for _, c := range h.checks {
|
||||
if c.Info != nil {
|
||||
c.Info(system)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Measure runs all the registered health checks and returns summary status
|
||||
func (h *Health) Measure(ctx context.Context, getChecks filterChecks) Check {
|
||||
errTimeout := errors.New("timeout error")
|
||||
|
||||
h.mu.Lock()
|
||||
defer h.mu.Unlock()
|
||||
|
||||
checksList := getChecks(h.checks)
|
||||
total := len(checksList)
|
||||
|
||||
checkRespChan := make(chan checkResponse, total)
|
||||
|
||||
var wgRes sync.WaitGroup
|
||||
wgRes.Add(total)
|
||||
|
||||
go func() {
|
||||
wgRes.Wait()
|
||||
close(checkRespChan)
|
||||
}()
|
||||
|
||||
for _, c := range checksList {
|
||||
go func(ctx context.Context, c Config, respChan chan<- checkResponse) {
|
||||
defer wgRes.Done()
|
||||
|
||||
locResp := make(chan error)
|
||||
go func(ctx context.Context, locResp chan<- error) {
|
||||
defer close(locResp)
|
||||
locResp <- c.Check(ctx)
|
||||
}(ctx, locResp)
|
||||
|
||||
select {
|
||||
case <-time.After(c.Timeout):
|
||||
respChan <- checkResponse{config: c, err: errTimeout}
|
||||
case err := <-locResp:
|
||||
respChan <- checkResponse{config: c, err: err}
|
||||
}
|
||||
}(ctx, c, checkRespChan)
|
||||
}
|
||||
|
||||
status := StatusOK
|
||||
checks := make(map[string]string)
|
||||
for resp := range checkRespChan {
|
||||
if resp.err == errTimeout {
|
||||
checks[resp.config.Name] = string(StatusTimeout)
|
||||
status = getAvailability(status, resp.config)
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
if resp.err != nil {
|
||||
checks[resp.config.Name] = resp.err.Error()
|
||||
status = getAvailability(status, resp.config)
|
||||
logger.Error().Err(errors.WithMessage(resp.err, resp.config.Name)).Send()
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
checks[resp.config.Name] = string(StatusOK)
|
||||
}
|
||||
|
||||
system := newSystemMetrics()
|
||||
h.info(&system)
|
||||
|
||||
return newCheck(status, checks, &system)
|
||||
}
|
||||
|
||||
func getReadinessCheck(checks map[string]Config) map[string]Config {
|
||||
return checks
|
||||
}
|
||||
|
||||
func getLivenessCheck(checks map[string]Config) map[string]Config {
|
||||
rez := make(map[string]Config)
|
||||
for key, conf := range checks {
|
||||
if conf.Vital {
|
||||
rez[key] = conf
|
||||
}
|
||||
}
|
||||
|
||||
return rez
|
||||
}
|
||||
|
||||
func newCheck(s Status, failures map[string]string, system *System) Check {
|
||||
return Check{
|
||||
Status: s,
|
||||
Timestamp: time.Now(),
|
||||
Failures: failures,
|
||||
System: system,
|
||||
}
|
||||
}
|
||||
|
||||
func newSystemMetrics() System {
|
||||
s := runtime.MemStats{}
|
||||
runtime.ReadMemStats(&s)
|
||||
return System{
|
||||
Version: runtime.Version(),
|
||||
GoroutinesCount: runtime.NumGoroutine(),
|
||||
TotalAllocBytes: int(s.TotalAlloc),
|
||||
HeapObjectsCount: int(s.HeapObjects),
|
||||
AllocBytes: int(s.Alloc),
|
||||
}
|
||||
}
|
||||
|
||||
func getAvailability(s Status, c Config) Status {
|
||||
if c.SkipOnErr && s != StatusUnavailable {
|
||||
return StatusPartiallyAvailable
|
||||
}
|
||||
|
||||
return StatusUnavailable
|
||||
}
|
||||
Reference in New Issue
Block a user