Instrument check scheduler with Prometheus metrics

Track queue depth on enqueue and pop, active worker count, check execution duration per checker, and check result status counters.
2026-03-02 00:12:28 +07:00 · 2026-03-02 00:12:28 +07:00 · 94d9d03b9d
commit 94d9d03b9d
parent e59c0a3594
1 changed files with 13 additions and 0 deletions
--- a/internal/app/checkscheduler.go
+++ b/internal/app/checkscheduler.go
@ -30,6 +30,7 @@ import (
 	"sync"
 	"time"
 	"git.happydns.org/happyDomain/internal/metrics"
 	"git.happydns.org/happyDomain/internal/storage"
 	"git.happydns.org/happyDomain/model"
 )
@ -192,6 +193,7 @@ func newCheckScheduler(
 // enqueue pushes an item to the priority queue and wakes one idle worker.
 func (s *checkScheduler) enqueue(item *queueItem) {
 	s.queue.Push(item)
 	metrics.SchedulerQueueDepth.Set(float64(s.queue.Len()))
 	select {
 	case s.workAvail <- struct{}{}:
 	default:
@ -470,6 +472,7 @@ func (w *worker) run(wg *sync.WaitGroup) {
 	for {
 		// Drain: try to grab work before blocking.
 		if item := w.scheduler.queue.Pop(); item != nil {
 			metrics.SchedulerQueueDepth.Set(float64(w.scheduler.queue.Len()))
 			w.executeCheck(item)
 			continue
 		}
@ -493,6 +496,13 @@ func (w *worker) executeCheck(item *queueItem) {
 	execution := item.execution
 	schedule := item.schedule
 	metrics.SchedulerActiveWorkers.Inc()
 	checkStart := time.Now()
 	defer func() {
 		metrics.SchedulerActiveWorkers.Dec()
 		metrics.SchedulerCheckDuration.WithLabelValues(schedule.CheckerName).Observe(time.Since(checkStart).Seconds())
 	}()
 	// Always update schedule NextRun after execution, whether it succeeds or fails.
 	// This prevents the schedule from being re-queued on the next tick if the test fails.
 	if execution.ScheduleId != nil {
@ -594,6 +604,9 @@ func (w *worker) executeCheck(item *queueItem) {
 		}
 	}
 	// Record check status metric
 	metrics.SchedulerChecksTotal.WithLabelValues(schedule.CheckerName, result.Status.String()).Inc()
 	// Save the result
 	if err := w.scheduler.resultUsecase.CreateCheckResult(result); err != nil {
 		log.Printf("Worker %d: Error saving test result: %v\n", w.id, err)