Instrument check scheduler with Prometheus metrics
Track queue depth on enqueue and pop, active worker count, check execution duration per checker, and check result status counters.
This commit is contained in:
parent
e59c0a3594
commit
94d9d03b9d
1 changed files with 13 additions and 0 deletions
|
|
@ -30,6 +30,7 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
|
||||
"git.happydns.org/happyDomain/internal/metrics"
|
||||
"git.happydns.org/happyDomain/internal/storage"
|
||||
"git.happydns.org/happyDomain/model"
|
||||
)
|
||||
|
|
@ -192,6 +193,7 @@ func newCheckScheduler(
|
|||
// enqueue pushes an item to the priority queue and wakes one idle worker.
|
||||
func (s *checkScheduler) enqueue(item *queueItem) {
|
||||
s.queue.Push(item)
|
||||
metrics.SchedulerQueueDepth.Set(float64(s.queue.Len()))
|
||||
select {
|
||||
case s.workAvail <- struct{}{}:
|
||||
default:
|
||||
|
|
@ -470,6 +472,7 @@ func (w *worker) run(wg *sync.WaitGroup) {
|
|||
for {
|
||||
// Drain: try to grab work before blocking.
|
||||
if item := w.scheduler.queue.Pop(); item != nil {
|
||||
metrics.SchedulerQueueDepth.Set(float64(w.scheduler.queue.Len()))
|
||||
w.executeCheck(item)
|
||||
continue
|
||||
}
|
||||
|
|
@ -493,6 +496,13 @@ func (w *worker) executeCheck(item *queueItem) {
|
|||
execution := item.execution
|
||||
schedule := item.schedule
|
||||
|
||||
metrics.SchedulerActiveWorkers.Inc()
|
||||
checkStart := time.Now()
|
||||
defer func() {
|
||||
metrics.SchedulerActiveWorkers.Dec()
|
||||
metrics.SchedulerCheckDuration.WithLabelValues(schedule.CheckerName).Observe(time.Since(checkStart).Seconds())
|
||||
}()
|
||||
|
||||
// Always update schedule NextRun after execution, whether it succeeds or fails.
|
||||
// This prevents the schedule from being re-queued on the next tick if the test fails.
|
||||
if execution.ScheduleId != nil {
|
||||
|
|
@ -594,6 +604,9 @@ func (w *worker) executeCheck(item *queueItem) {
|
|||
}
|
||||
}
|
||||
|
||||
// Record check status metric
|
||||
metrics.SchedulerChecksTotal.WithLabelValues(schedule.CheckerName, result.Status.String()).Inc()
|
||||
|
||||
// Save the result
|
||||
if err := w.scheduler.resultUsecase.CreateCheckResult(result); err != nil {
|
||||
log.Printf("Worker %d: Error saving test result: %v\n", w.id, err)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue