LFG
Some checks failed
CI/CD Pipeline / Run Tests (push) Has been cancelled
CI/CD Pipeline / Build Application (push) Has been cancelled
CI/CD Pipeline / Build Docker Image (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Create Release (push) Has been cancelled
Some checks failed
CI/CD Pipeline / Run Tests (push) Has been cancelled
CI/CD Pipeline / Build Application (push) Has been cancelled
CI/CD Pipeline / Build Docker Image (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Create Release (push) Has been cancelled
This commit is contained in:
266
internal/monitoring/monitoring.go
Normal file
266
internal/monitoring/monitoring.go
Normal file
@@ -0,0 +1,266 @@
|
||||
package monitoring
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/iwasforcedtobehere/goRZ/internal/config"
|
||||
"github.com/iwasforcedtobehere/goRZ/internal/logger"
|
||||
)
|
||||
|
||||
// Metrics represents the application metrics
|
||||
type Metrics struct {
|
||||
RequestsTotal int64 `json:"requests_total"`
|
||||
RequestsActive int64 `json:"requests_active"`
|
||||
ResponsesByStatus map[string]int64 `json:"responses_by_status"`
|
||||
TargetMetrics map[string]*TargetMetric `json:"target_metrics"`
|
||||
StartTime time.Time `json:"start_time"`
|
||||
LastUpdated time.Time `json:"last_updated"`
|
||||
mu sync.RWMutex `json:"-"`
|
||||
}
|
||||
|
||||
// TargetMetric represents metrics for a specific target
|
||||
type TargetMetric struct {
|
||||
RequestsTotal int64 `json:"requests_total"`
|
||||
ResponsesByStatus map[string]int64 `json:"responses_by_status"`
|
||||
ResponseTimes []time.Duration `json:"response_times"`
|
||||
AvgResponseTime time.Duration `json:"avg_response_time"`
|
||||
Healthy bool `json:"healthy"`
|
||||
LastChecked time.Time `json:"last_checked"`
|
||||
}
|
||||
|
||||
// Monitor represents the monitoring service
|
||||
type Monitor struct {
|
||||
config *config.Config
|
||||
logger *logger.Logger
|
||||
metrics *Metrics
|
||||
server *http.Server
|
||||
authHandler http.Handler
|
||||
}
|
||||
|
||||
// NewMonitor creates a new monitoring service
|
||||
func NewMonitor(cfg *config.Config, logger *logger.Logger) *Monitor {
|
||||
metrics := &Metrics{
|
||||
ResponsesByStatus: make(map[string]int64),
|
||||
TargetMetrics: make(map[string]*TargetMetric),
|
||||
StartTime: time.Now(),
|
||||
LastUpdated: time.Now(),
|
||||
}
|
||||
|
||||
// Initialize target metrics
|
||||
for _, target := range cfg.Proxy.Targets {
|
||||
metrics.TargetMetrics[target.Name] = &TargetMetric{
|
||||
ResponsesByStatus: make(map[string]int64),
|
||||
ResponseTimes: make([]time.Duration, 0),
|
||||
Healthy: target.Healthy,
|
||||
LastChecked: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
monitor := &Monitor{
|
||||
config: cfg,
|
||||
logger: logger,
|
||||
metrics: metrics,
|
||||
}
|
||||
|
||||
// Set up authentication if enabled
|
||||
if cfg.Monitor.Auth {
|
||||
monitor.authHandler = monitor.basicAuthHandler(monitor.metricsHandler)
|
||||
} else {
|
||||
monitor.authHandler = monitor.metricsHandler
|
||||
}
|
||||
|
||||
return monitor
|
||||
}
|
||||
|
||||
// Start starts the monitoring service
|
||||
func (m *Monitor) Start() error {
|
||||
if !m.config.Monitor.Enabled {
|
||||
m.logger.Info("Monitoring is disabled")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create HTTP server
|
||||
m.server = &http.Server{
|
||||
Addr: fmt.Sprintf(":%d", m.config.Monitor.Port),
|
||||
Handler: m.authHandler,
|
||||
}
|
||||
|
||||
// Start server in a goroutine
|
||||
go func() {
|
||||
m.logger.Info("Monitoring server starting", logger.String("address", m.server.Addr))
|
||||
if err := m.server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
||||
m.logger.Error("Monitoring server failed", logger.Error(err))
|
||||
}
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop stops the monitoring service
|
||||
func (m *Monitor) Stop() {
|
||||
if m.server != nil {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
m.server.Shutdown(ctx)
|
||||
m.logger.Info("Monitoring server stopped")
|
||||
}
|
||||
}
|
||||
|
||||
// metricsHandler handles HTTP requests for metrics
|
||||
func (m *Monitor) metricsHandler(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != m.config.Monitor.Path {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
m.metrics.mu.RLock()
|
||||
defer m.metrics.mu.RUnlock()
|
||||
|
||||
// Update last updated time
|
||||
m.metrics.LastUpdated = time.Now()
|
||||
|
||||
// Set content type
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
|
||||
// Encode metrics as JSON
|
||||
if err := json.NewEncoder(w).Encode(m.metrics); err != nil {
|
||||
m.logger.Error("Failed to encode metrics", logger.Error(err))
|
||||
http.Error(w, "Internal server error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// basicAuthHandler wraps a handler with basic authentication
|
||||
func (m *Monitor) basicAuthHandler(next http.HandlerFunc) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
username, password, ok := r.BasicAuth()
|
||||
if !ok || username != m.config.Monitor.Username || password != m.config.Monitor.Password {
|
||||
w.Header().Set("WWW-Authenticate", `Basic realm="goRZ Monitor"`)
|
||||
http.Error(w, "Unauthorized", http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
next(w, r)
|
||||
}
|
||||
}
|
||||
|
||||
// IncrementRequest increments the total request count
|
||||
func (m *Monitor) IncrementRequest() {
|
||||
m.metrics.mu.Lock()
|
||||
defer m.metrics.mu.Unlock()
|
||||
|
||||
m.metrics.RequestsTotal++
|
||||
m.metrics.LastUpdated = time.Now()
|
||||
}
|
||||
|
||||
// IncrementActiveRequest increments the active request count
|
||||
func (m *Monitor) IncrementActiveRequest() {
|
||||
m.metrics.mu.Lock()
|
||||
defer m.metrics.mu.Unlock()
|
||||
|
||||
m.metrics.RequestsActive++
|
||||
m.metrics.LastUpdated = time.Now()
|
||||
}
|
||||
|
||||
// DecrementActiveRequest decrements the active request count
|
||||
func (m *Monitor) DecrementActiveRequest() {
|
||||
m.metrics.mu.Lock()
|
||||
defer m.metrics.mu.Unlock()
|
||||
|
||||
m.metrics.RequestsActive--
|
||||
if m.metrics.RequestsActive < 0 {
|
||||
m.metrics.RequestsActive = 0
|
||||
}
|
||||
m.metrics.LastUpdated = time.Now()
|
||||
}
|
||||
|
||||
// RecordResponse records a response with the given status code
|
||||
func (m *Monitor) RecordResponse(statusCode int, targetName string, responseTime time.Duration) {
|
||||
m.metrics.mu.Lock()
|
||||
defer m.metrics.mu.Unlock()
|
||||
|
||||
status := fmt.Sprintf("%d", statusCode)
|
||||
m.metrics.ResponsesByStatus[status]++
|
||||
|
||||
// Update target metrics
|
||||
if target, exists := m.metrics.TargetMetrics[targetName]; exists {
|
||||
target.RequestsTotal++
|
||||
target.ResponsesByStatus[status]++
|
||||
|
||||
// Keep only the last 100 response times for average calculation
|
||||
if len(target.ResponseTimes) >= 100 {
|
||||
target.ResponseTimes = target.ResponseTimes[1:]
|
||||
}
|
||||
target.ResponseTimes = append(target.ResponseTimes, responseTime)
|
||||
|
||||
// Calculate average response time
|
||||
var total time.Duration
|
||||
for _, rt := range target.ResponseTimes {
|
||||
total += rt
|
||||
}
|
||||
target.AvgResponseTime = total / time.Duration(len(target.ResponseTimes))
|
||||
}
|
||||
|
||||
m.metrics.LastUpdated = time.Now()
|
||||
}
|
||||
|
||||
// UpdateTargetHealth updates the health status of a target
|
||||
func (m *Monitor) UpdateTargetHealth(targetName string, healthy bool) {
|
||||
m.metrics.mu.Lock()
|
||||
defer m.metrics.mu.Unlock()
|
||||
|
||||
if target, exists := m.metrics.TargetMetrics[targetName]; exists {
|
||||
target.Healthy = healthy
|
||||
target.LastChecked = time.Now()
|
||||
}
|
||||
|
||||
m.metrics.LastUpdated = time.Now()
|
||||
}
|
||||
|
||||
// GetMetrics returns a copy of the current metrics
|
||||
func (m *Monitor) GetMetrics() Metrics {
|
||||
m.metrics.mu.RLock()
|
||||
defer m.metrics.mu.RUnlock()
|
||||
|
||||
// Create a deep copy of the metrics
|
||||
metrics := Metrics{
|
||||
RequestsTotal: m.metrics.RequestsTotal,
|
||||
RequestsActive: m.metrics.RequestsActive,
|
||||
ResponsesByStatus: make(map[string]int64),
|
||||
TargetMetrics: make(map[string]*TargetMetric),
|
||||
StartTime: m.metrics.StartTime,
|
||||
LastUpdated: m.metrics.LastUpdated,
|
||||
}
|
||||
|
||||
// Copy response status counts
|
||||
for k, v := range m.metrics.ResponsesByStatus {
|
||||
metrics.ResponsesByStatus[k] = v
|
||||
}
|
||||
|
||||
// Copy target metrics
|
||||
for k, v := range m.metrics.TargetMetrics {
|
||||
targetMetric := &TargetMetric{
|
||||
RequestsTotal: v.RequestsTotal,
|
||||
ResponsesByStatus: make(map[string]int64),
|
||||
ResponseTimes: make([]time.Duration, len(v.ResponseTimes)),
|
||||
AvgResponseTime: v.AvgResponseTime,
|
||||
Healthy: v.Healthy,
|
||||
LastChecked: v.LastChecked,
|
||||
}
|
||||
|
||||
// Copy response status counts for target
|
||||
for rk, rv := range v.ResponsesByStatus {
|
||||
targetMetric.ResponsesByStatus[rk] = rv
|
||||
}
|
||||
|
||||
// Copy response times
|
||||
copy(targetMetric.ResponseTimes, v.ResponseTimes)
|
||||
|
||||
metrics.TargetMetrics[k] = targetMetric
|
||||
}
|
||||
|
||||
return metrics
|
||||
}
|
Reference in New Issue
Block a user