Add admin API and frontend for scheduler management

This commit is contained in:
nemunaire 2026-02-11 11:21:20 +07:00
commit e5fa392c76
12 changed files with 823 additions and 22 deletions

View file

@ -0,0 +1,102 @@
// This file is part of the happyDomain (R) project.
// Copyright (c) 2020-2026 happyDomain
// Authors: Pierre-Olivier Mercier, et al.
//
// This program is offered under a commercial and under the AGPL license.
// For commercial licensing, contact us at <contact@happydomain.org>.
//
// For AGPL licensing:
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package controller
import (
"net/http"
"github.com/gin-gonic/gin"
"git.happydns.org/happyDomain/model"
)
// AdminSchedulerController handles admin operations on the test scheduler
type AdminSchedulerController struct {
scheduler happydns.SchedulerUsecase
}
func NewAdminSchedulerController(scheduler happydns.SchedulerUsecase) *AdminSchedulerController {
return &AdminSchedulerController{scheduler: scheduler}
}
// GetSchedulerStatus returns the current scheduler state
//
// @Summary Get scheduler status
// @Description Returns the current state of the test scheduler including worker count, queue size, and upcoming schedules
// @Tags scheduler
// @Produce json
// @Success 200 {object} happydns.SchedulerStatus
// @Router /scheduler [get]
func (ctrl *AdminSchedulerController) GetSchedulerStatus(c *gin.Context) {
c.JSON(http.StatusOK, ctrl.scheduler.GetSchedulerStatus())
}
// EnableScheduler enables the test scheduler at runtime
//
// @Summary Enable scheduler
// @Description Enables the test scheduler at runtime without restarting the server
// @Tags scheduler
// @Success 200 {object} happydns.SchedulerStatus
// @Failure 500 {object} happydns.ErrorResponse
// @Router /scheduler/enable [post]
func (ctrl *AdminSchedulerController) EnableScheduler(c *gin.Context) {
if err := ctrl.scheduler.SetEnabled(true); err != nil {
c.JSON(http.StatusInternalServerError, happydns.ErrorResponse{Message: err.Error()})
return
}
c.JSON(http.StatusOK, ctrl.scheduler.GetSchedulerStatus())
}
// DisableScheduler disables the test scheduler at runtime
//
// @Summary Disable scheduler
// @Description Disables the test scheduler at runtime without restarting the server
// @Tags scheduler
// @Success 200 {object} happydns.SchedulerStatus
// @Failure 500 {object} happydns.ErrorResponse
// @Router /scheduler/disable [post]
func (ctrl *AdminSchedulerController) DisableScheduler(c *gin.Context) {
if err := ctrl.scheduler.SetEnabled(false); err != nil {
c.JSON(http.StatusInternalServerError, happydns.ErrorResponse{Message: err.Error()})
return
}
c.JSON(http.StatusOK, ctrl.scheduler.GetSchedulerStatus())
}
// RescheduleUpcoming randomizes the next run time of all enabled schedules
// within their respective intervals to spread load evenly.
//
// @Summary Reschedule upcoming tests
// @Description Randomizes the next run time of all enabled schedules within their intervals to spread load
// @Tags scheduler
// @Produce json
// @Success 200 {object} map[string]int
// @Failure 500 {object} happydns.ErrorResponse
// @Router /scheduler/reschedule-upcoming [post]
func (ctrl *AdminSchedulerController) RescheduleUpcoming(c *gin.Context) {
n, err := ctrl.scheduler.RescheduleUpcomingChecks()
if err != nil {
c.JSON(http.StatusInternalServerError, happydns.ErrorResponse{Message: err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{"rescheduled": n})
}

View file

@ -33,6 +33,7 @@ import (
type Dependencies struct { type Dependencies struct {
AuthUser happydns.AuthUserUsecase AuthUser happydns.AuthUserUsecase
Checker happydns.CheckerUsecase Checker happydns.CheckerUsecase
CheckScheduler happydns.SchedulerUsecase
Domain happydns.DomainUsecase Domain happydns.DomainUsecase
Provider happydns.ProviderUsecase Provider happydns.ProviderUsecase
RemoteZoneImporter happydns.RemoteZoneImporterUsecase RemoteZoneImporter happydns.RemoteZoneImporterUsecase
@ -51,6 +52,7 @@ func DeclareRoutes(cfg *happydns.Options, router *gin.Engine, s storage.Storage,
declareDomainRoutes(apiRoutes, dep, s) declareDomainRoutes(apiRoutes, dep, s)
declareChecksRoutes(apiRoutes, dep) declareChecksRoutes(apiRoutes, dep)
declareProviderRoutes(apiRoutes, dep, s) declareProviderRoutes(apiRoutes, dep, s)
declareSchedulerRoutes(apiRoutes, dep)
declareSessionsRoutes(cfg, apiRoutes, s) declareSessionsRoutes(cfg, apiRoutes, s)
declareUserAuthsRoutes(apiRoutes, dep, s) declareUserAuthsRoutes(apiRoutes, dep, s)
declareUsersRoutes(apiRoutes, dep, s) declareUsersRoutes(apiRoutes, dep, s)

View file

@ -0,0 +1,38 @@
// This file is part of the happyDomain (R) project.
// Copyright (c) 2020-2026 happyDomain
// Authors: Pierre-Olivier Mercier, et al.
//
// This program is offered under a commercial and under the AGPL license.
// For commercial licensing, contact us at <contact@happydomain.org>.
//
// For AGPL licensing:
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package route
import (
"github.com/gin-gonic/gin"
"git.happydns.org/happyDomain/internal/api-admin/controller"
)
func declareSchedulerRoutes(router *gin.RouterGroup, dep Dependencies) {
ctrl := controller.NewAdminSchedulerController(dep.CheckScheduler)
schedulerRoute := router.Group("/scheduler")
schedulerRoute.GET("", ctrl.GetSchedulerStatus)
schedulerRoute.POST("/enable", ctrl.EnableScheduler)
schedulerRoute.POST("/disable", ctrl.DisableScheduler)
schedulerRoute.POST("/reschedule-upcoming", ctrl.RescheduleUpcoming)
}

View file

@ -63,6 +63,7 @@ func NewAdmin(app *App) *Admin {
admin.Dependencies{ admin.Dependencies{
AuthUser: app.usecases.authUser, AuthUser: app.usecases.authUser,
Checker: app.usecases.checker, Checker: app.usecases.checker,
CheckScheduler: app.checkScheduler,
Domain: app.usecases.domain, Domain: app.usecases.domain,
Provider: app.usecases.providerAdmin, Provider: app.usecases.providerAdmin,
RemoteZoneImporter: app.usecases.orchestrator.RemoteZoneImporter, RemoteZoneImporter: app.usecases.orchestrator.RemoteZoneImporter,

View file

@ -265,7 +265,7 @@ func (app *App) initUsecases() {
app.usecases.resolver = usecase.NewResolverUsecase(app.cfg) app.usecases.resolver = usecase.NewResolverUsecase(app.cfg)
app.usecases.session = sessionService app.usecases.session = sessionService
app.usecases.checker = checkUC.NewCheckerUsecase(app.cfg, app.store) app.usecases.checker = checkUC.NewCheckerUsecase(app.cfg, app.store)
app.usecases.checkerSchedule = checkresultUC.NewCheckScheduleUsecase(app.store, app.cfg, app.usecases.checker) app.usecases.checkerSchedule = checkresultUC.NewCheckScheduleUsecase(app.store, app.cfg, app.store, app.usecases.checker)
app.usecases.checkResult = checkresultUC.NewCheckResultUsecase(app.store, app.cfg, app.usecases.checker, app.usecases.checkerSchedule) app.usecases.checkResult = checkresultUC.NewCheckResultUsecase(app.store, app.cfg, app.usecases.checker, app.usecases.checkerSchedule)
app.usecases.orchestrator = orchestrator.NewOrchestrator( app.usecases.orchestrator = orchestrator.NewOrchestrator(

View file

@ -35,10 +35,11 @@ import (
) )
const ( const (
SchedulerCheckInterval = 1 * time.Minute // How often to check for due tests SchedulerCheckInterval = 1 * time.Minute // How often to check for due tests
SchedulerCleanupInterval = 24 * time.Hour // How often to clean up old executions SchedulerCleanupInterval = 24 * time.Hour // How often to clean up old executions
CheckExecutionTimeout = 5 * time.Minute // Max time for a single test SchedulerDiscoveryInterval = 1 * time.Hour // How often to auto-discover new targets
MaxRetries = 3 // Max retry attempts for failed tests CheckExecutionTimeout = 5 * time.Minute // Max time for a single check
MaxRetries = 3 // Max retry attempts for failed checks
) )
// Priority levels for test execution queue // Priority levels for test execution queue
@ -64,6 +65,8 @@ type checkScheduler struct {
workers []*worker workers []*worker
mu sync.RWMutex mu sync.RWMutex
wg sync.WaitGroup wg sync.WaitGroup
runtimeEnabled bool
running bool
} }
// activeExecution tracks a running test execution // activeExecution tracks a running test execution
@ -173,6 +176,7 @@ func newCheckScheduler(
queue: newPriorityQueue(), queue: newPriorityQueue(),
activeExecutions: make(map[string]*activeExecution), activeExecutions: make(map[string]*activeExecution),
workers: make([]*worker, numWorkers), workers: make([]*worker, numWorkers),
runtimeEnabled: true,
} }
for i := 0; i < numWorkers; i++ { for i := 0; i < numWorkers; i++ {
@ -221,8 +225,27 @@ func (s *checkScheduler) Close() {
// Run starts the scheduler main loop. It must not be called more than once. // Run starts the scheduler main loop. It must not be called more than once.
func (s *checkScheduler) Run() { func (s *checkScheduler) Run() {
s.mu.Lock()
s.running = true
s.mu.Unlock()
defer func() {
s.mu.Lock()
s.running = false
s.mu.Unlock()
}()
log.Printf("Starting test scheduler with %d workers...\n", len(s.workers)) log.Printf("Starting test scheduler with %d workers...\n", len(s.workers))
// Reschedule overdue tests before starting workers so that tests missed
// during a server suspend or shutdown are spread into the near future
// instead of all firing at once.
if n, err := s.scheduleUsecase.RescheduleOverdueChecks(); err != nil {
log.Printf("Warning: failed to reschedule overdue tests: %v\n", err)
} else if n > 0 {
log.Printf("Rescheduled %d overdue test(s) into the near future\n", n)
}
// Start workers // Start workers
for _, w := range s.workers { for _, w := range s.workers {
s.wg.Add(1) s.wg.Add(1)
@ -232,9 +255,15 @@ func (s *checkScheduler) Run() {
// Main scheduling loop // Main scheduling loop
checkTicker := time.NewTicker(SchedulerCheckInterval) checkTicker := time.NewTicker(SchedulerCheckInterval)
cleanupTicker := time.NewTicker(SchedulerCleanupInterval) cleanupTicker := time.NewTicker(SchedulerCleanupInterval)
discoveryTicker := time.NewTicker(SchedulerDiscoveryInterval)
defer checkTicker.Stop() defer checkTicker.Stop()
defer cleanupTicker.Stop() defer cleanupTicker.Stop()
defer discoveryTicker.Stop()
// Initial discovery: create default schedules for all existing targets
if err := s.scheduleUsecase.DiscoverAndEnsureSchedules(); err != nil {
log.Printf("Warning: schedule discovery encountered errors: %v\n", err)
}
// Initial check // Initial check
s.checkSchedules() s.checkSchedules()
@ -246,6 +275,11 @@ func (s *checkScheduler) Run() {
case <-cleanupTicker.C: case <-cleanupTicker.C:
s.cleanup() s.cleanup()
case <-discoveryTicker.C:
if err := s.scheduleUsecase.DiscoverAndEnsureSchedules(); err != nil {
log.Printf("Warning: schedule discovery encountered errors: %v\n", err)
}
case item := <-s.runNowChan: case item := <-s.runNowChan:
s.enqueue(item) s.enqueue(item)
@ -257,6 +291,13 @@ func (s *checkScheduler) Run() {
// checkSchedules checks for due tests and queues them // checkSchedules checks for due tests and queues them
func (s *checkScheduler) checkSchedules() { func (s *checkScheduler) checkSchedules() {
s.mu.RLock()
enabled := s.runtimeEnabled
s.mu.RUnlock()
if !enabled {
return
}
dueSchedules, err := s.scheduleUsecase.ListDueSchedules() dueSchedules, err := s.scheduleUsecase.ListDueSchedules()
if err != nil { if err != nil {
log.Printf("Error listing due schedules: %v\n", err) log.Printf("Error listing due schedules: %v\n", err)
@ -354,6 +395,54 @@ func (s *checkScheduler) TriggerOnDemandCheck(checkerName string, targetType hap
return execution.Id, nil return execution.Id, nil
} }
// GetSchedulerStatus returns a snapshot of the current scheduler state
func (s *checkScheduler) GetSchedulerStatus() happydns.SchedulerStatus {
s.mu.RLock()
activeCount := len(s.activeExecutions)
running := s.running
runtimeEnabled := s.runtimeEnabled
s.mu.RUnlock()
nextSchedules, _ := s.scheduleUsecase.ListUpcomingSchedules(20)
return happydns.SchedulerStatus{
ConfigEnabled: !s.cfg.DisableScheduler,
RuntimeEnabled: runtimeEnabled,
Running: running,
WorkerCount: len(s.workers),
QueueSize: s.queue.Len(),
ActiveCount: activeCount,
NextSchedules: nextSchedules,
}
}
// SetEnabled enables or disables the scheduler at runtime
func (s *checkScheduler) SetEnabled(enabled bool) error {
s.mu.Lock()
wasEnabled := s.runtimeEnabled
s.runtimeEnabled = enabled
s.mu.Unlock()
if enabled && !wasEnabled {
// Spread out any overdue tests to avoid a thundering herd, then
// immediately enqueue whatever is now due.
if n, err := s.scheduleUsecase.RescheduleOverdueChecks(); err != nil {
log.Printf("Warning: failed to reschedule overdue tests on re-enable: %v\n", err)
} else if n > 0 {
log.Printf("Rescheduled %d overdue test(s) after scheduler re-enable\n", n)
}
s.checkSchedules()
}
return nil
}
// RescheduleUpcomingChecks randomizes the next run time of all enabled schedules
// within their respective intervals, delegating to the schedule usecase.
func (s *checkScheduler) RescheduleUpcomingChecks() (int, error) {
return s.scheduleUsecase.RescheduleUpcomingChecks()
}
// cleanup removes old execution records and expired test results // cleanup removes old execution records and expired test results
func (s *checkScheduler) cleanup() { func (s *checkScheduler) cleanup() {
log.Println("Running scheduler cleanup...") log.Println("Running scheduler cleanup...")
@ -404,10 +493,21 @@ func (w *worker) executeCheck(item *queueItem) {
execution := item.execution execution := item.execution
schedule := item.schedule schedule := item.schedule
// Always update schedule NextRun after execution, whether it succeeds or fails.
// This prevents the schedule from being re-queued on the next tick if the test fails.
if item.execution.ScheduleId != nil {
defer func() {
if err := w.scheduler.scheduleUsecase.UpdateScheduleAfterRun(*item.execution.ScheduleId); err != nil {
log.Printf("Worker %d: Error updating schedule after run: %v\n", w.id, err)
}
}()
}
// Mark execution as running // Mark execution as running
execution.Status = happydns.CheckExecutionRunning execution.Status = happydns.CheckExecutionRunning
if err := w.scheduler.resultUsecase.UpdateCheckExecution(execution); err != nil { if err := w.scheduler.resultUsecase.UpdateCheckExecution(execution); err != nil {
log.Printf("Worker %d: Error updating execution status: %v\n", w.id, err) log.Printf("Worker %d: Error updating execution status: %v\n", w.id, err)
_ = w.scheduler.resultUsecase.FailCheckExecution(execution.Id, err.Error())
return return
} }
@ -436,6 +536,13 @@ func (w *worker) executeCheck(item *queueItem) {
return return
} }
// Merge options: global defaults < user opts < domain/service opts < schedule opts
mergedOptions, err := w.scheduler.scheduleUsecase.PrepareCheckOptions(schedule)
if err != nil {
// Non-fatal: PrepareTestOptions already falls back to schedule-only options
log.Printf("Worker %d: warning, could not prepare plugin options for %s: %v\n", w.id, schedule.CheckerName, err)
}
// Prepare metadata // Prepare metadata
meta := map[string]string{ meta := map[string]string{
"target_type": schedule.TargetType.String(), "target_type": schedule.TargetType.String(),
@ -444,7 +551,7 @@ func (w *worker) executeCheck(item *queueItem) {
// Run the check synchronously with context-based timeout. // Run the check synchronously with context-based timeout.
// The checker is responsible for honouring ctx cancellation. // The checker is responsible for honouring ctx cancellation.
checkResult, testErr := w.runCheckSafe(ctx, checker, schedule.Options, meta) checkResult, testErr := w.runCheckSafe(ctx, checker, mergedOptions, meta)
duration := time.Since(startTime) duration := time.Since(startTime)
@ -491,14 +598,7 @@ func (w *worker) executeCheck(item *queueItem) {
return return
} }
// Update schedule if this was a scheduled test log.Printf("Worker %d: Completed test %s for target %s (status: %d, duration: %v)\n",
if execution.ScheduleId != nil {
if err := w.scheduler.scheduleUsecase.UpdateScheduleAfterRun(*execution.ScheduleId); err != nil {
log.Printf("Worker %d: Error updating schedule: %v\n", w.id, err)
}
}
log.Printf("Worker %d: Completed test %s for target %s (status: %s, duration: %v)\n",
w.id, schedule.CheckerName, schedule.TargetId.String(), result.Status, duration) w.id, schedule.CheckerName, schedule.TargetId.String(), result.Status, duration)
} }

View file

@ -37,3 +37,22 @@ func (d *disabledScheduler) Close() {}
func (d *disabledScheduler) TriggerOnDemandCheck(checkName string, targetType happydns.CheckScopeType, targetId happydns.Identifier, userId happydns.Identifier, options happydns.CheckerOptions) (happydns.Identifier, error) { func (d *disabledScheduler) TriggerOnDemandCheck(checkName string, targetType happydns.CheckScopeType, targetId happydns.Identifier, userId happydns.Identifier, options happydns.CheckerOptions) (happydns.Identifier, error) {
return happydns.Identifier{}, fmt.Errorf("test scheduler is disabled in configuration") return happydns.Identifier{}, fmt.Errorf("test scheduler is disabled in configuration")
} }
// GetSchedulerStatus returns a status indicating the scheduler is disabled
func (d *disabledScheduler) GetSchedulerStatus() happydns.SchedulerStatus {
return happydns.SchedulerStatus{
ConfigEnabled: false,
RuntimeEnabled: false,
Running: false,
}
}
// SetEnabled returns an error since the scheduler is disabled in configuration
func (d *disabledScheduler) SetEnabled(enabled bool) error {
return fmt.Errorf("scheduler is disabled in configuration, cannot enable at runtime")
}
// RescheduleUpcomingChecks returns an error since the scheduler is disabled
func (d *disabledScheduler) RescheduleUpcomingChecks() (int, error) {
return 0, fmt.Errorf("test scheduler is disabled in configuration")
}

View file

@ -22,7 +22,9 @@
package checkresult package checkresult
import ( import (
"errors"
"fmt" "fmt"
"math/rand"
"sort" "sort"
"time" "time"
@ -41,14 +43,16 @@ const (
type CheckScheduleUsecase struct { type CheckScheduleUsecase struct {
storage CheckResultStorage storage CheckResultStorage
options *happydns.Options options *happydns.Options
domainLister DomainLister
checkerUsecase happydns.CheckerUsecase checkerUsecase happydns.CheckerUsecase
} }
// NewCheckScheduleUsecase creates a new check schedule usecase // NewCheckScheduleUsecase creates a new check schedule usecase
func NewCheckScheduleUsecase(storage CheckResultStorage, options *happydns.Options, checkerUsecase happydns.CheckerUsecase) *CheckScheduleUsecase { func NewCheckScheduleUsecase(storage CheckResultStorage, options *happydns.Options, domainLister DomainLister, checkerUsecase happydns.CheckerUsecase) *CheckScheduleUsecase {
return &CheckScheduleUsecase{ return &CheckScheduleUsecase{
storage: storage, storage: storage,
options: options, options: options,
domainLister: domainLister,
checkerUsecase: checkerUsecase, checkerUsecase: checkerUsecase,
} }
} }
@ -80,14 +84,17 @@ func (u *CheckScheduleUsecase) CreateSchedule(schedule *happydns.CheckerSchedule
return err return err
} }
// Calculate next run time // Validate interval
if schedule.NextRun.IsZero() { if schedule.Interval < MinimumCheckInterval {
schedule.NextRun = time.Now().Add(schedule.Interval) return fmt.Errorf("check interval must be at least %v", MinimumCheckInterval)
} }
// Enable by default if not specified // Calculate next run time: pick a random offset within the interval
if !schedule.Enabled { // to spread load evenly across all schedules
schedule.Enabled = true // TODO: Use a smarter load balance function in the future
if schedule.NextRun.IsZero() {
offset := time.Duration(rand.Int63n(int64(schedule.Interval)))
schedule.NextRun = time.Now().Add(offset)
} }
return u.storage.CreateCheckerSchedule(schedule) return u.storage.CreateCheckerSchedule(schedule)
@ -181,7 +188,7 @@ func (u *CheckScheduleUsecase) ListDueSchedules() ([]*happydns.CheckerSchedule,
var dueSchedules []*happydns.CheckerSchedule var dueSchedules []*happydns.CheckerSchedule
for _, schedule := range schedules { for _, schedule := range schedules {
if schedule.Enabled && schedule.NextRun.Before(now) { if schedule.NextRun.Before(now) {
dueSchedules = append(dueSchedules, schedule) dueSchedules = append(dueSchedules, schedule)
} }
} }
@ -300,6 +307,77 @@ func (u *CheckScheduleUsecase) CreateDefaultSchedulesForTarget(
return u.CreateSchedule(schedule) return u.CreateSchedule(schedule)
} }
// rescheduleChecks reschedules each given schedule to a random time in [now, now+maxOffsetFn(schedule)].
func (u *CheckScheduleUsecase) rescheduleChecks(schedules []*happydns.CheckerSchedule, maxOffsetFn func(*happydns.CheckerSchedule) time.Duration) (int, error) {
count := 0
now := time.Now()
for _, schedule := range schedules {
maxOffset := maxOffsetFn(schedule)
if maxOffset <= 0 {
maxOffset = time.Second
}
schedule.NextRun = now.Add(time.Duration(rand.Int63n(int64(maxOffset))))
if err := u.storage.UpdateCheckerSchedule(schedule); err != nil {
return count, err
}
count++
}
return count, nil
}
// RescheduleUpcomingChecks randomizes the next run time of all enabled schedules
// within their respective intervals to spread load evenly. Useful after a restart.
func (u *CheckScheduleUsecase) RescheduleUpcomingChecks() (int, error) {
schedules, err := u.storage.ListEnabledCheckerSchedules()
if err != nil {
return 0, err
}
return u.rescheduleChecks(schedules, func(s *happydns.CheckerSchedule) time.Duration {
return s.Interval
})
}
// RescheduleOverdueChecks reschedules checks whose NextRun is in the past,
// spreading them over a short window to avoid scheduler famine (e.g. after
// a long machine suspend or server downtime).
// If there are fewer than 10 overdue checks, they are left as-is so that the
// caller's immediate checkSchedules pass enqueues them directly.
func (u *CheckScheduleUsecase) RescheduleOverdueChecks() (int, error) {
schedules, err := u.storage.ListEnabledCheckerSchedules()
if err != nil {
return 0, err
}
now := time.Now()
var overdue []*happydns.CheckerSchedule
for _, s := range schedules {
if s.NextRun.Before(now) {
overdue = append(overdue, s)
}
}
if len(overdue) == 0 {
return 0, nil
}
// Small backlog: let the caller enqueue them directly on the next
// checkSchedules pass rather than deferring them into the future.
if len(overdue) < 10 {
return 0, nil
}
// Spread overdue checks over a small window proportional to their count,
// capped at MinimumCheckInterval, to prevent all of them from running at once.
spreadWindow := time.Duration(len(overdue)) * 5 * time.Second
if spreadWindow > MinimumCheckInterval {
spreadWindow = MinimumCheckInterval
}
return u.rescheduleChecks(overdue, func(s *happydns.CheckerSchedule) time.Duration {
return spreadWindow
})
}
// DeleteSchedulesForTarget removes all schedules for a target // DeleteSchedulesForTarget removes all schedules for a target
func (u *CheckScheduleUsecase) DeleteSchedulesForTarget(targetType happydns.CheckScopeType, targetId happydns.Identifier) error { func (u *CheckScheduleUsecase) DeleteSchedulesForTarget(targetType happydns.CheckScopeType, targetId happydns.Identifier) error {
schedules, err := u.storage.ListCheckerSchedulesByTarget(targetType, targetId) schedules, err := u.storage.ListCheckerSchedulesByTarget(targetType, targetId)
@ -315,3 +393,94 @@ func (u *CheckScheduleUsecase) DeleteSchedulesForTarget(targetType happydns.Chec
return nil return nil
} }
// DiscoverAndEnsureSchedules creates default enabled schedules for all (plugin, domain)
// pairs that don't yet have an explicit schedule record. This implements the opt-out
// model: checks run automatically unless a schedule with Enabled=false has been saved.
// Non-fatal per-domain errors are collected and returned together.
func (u *CheckScheduleUsecase) DiscoverAndEnsureSchedules() error {
if u.domainLister == nil || u.checkerUsecase == nil {
return nil
}
plugins, err := u.checkerUsecase.ListCheckers()
if err != nil {
return fmt.Errorf("listing check plugins for discovery: %w", err)
}
iter, err := u.domainLister.ListAllDomains()
if err != nil {
return fmt.Errorf("listing domains for schedule discovery: %w", err)
}
defer iter.Close()
var errs []error
for iter.Next() {
domain := iter.Item()
if domain == nil {
continue
}
for checkerName, p := range *plugins {
if !p.Availability().ApplyToDomain {
continue
}
schedules, err := u.ListSchedulesByTarget(happydns.CheckScopeDomain, domain.Id)
if err != nil {
errs = append(errs, fmt.Errorf("listing schedules for domain %s: %w", domain.Id, err))
continue
}
hasSchedule := false
for _, sched := range schedules {
if sched.CheckerName == checkerName {
hasSchedule = true
break
}
}
if !hasSchedule {
if err := u.CreateSchedule(&happydns.CheckerSchedule{
CheckerName: checkerName,
OwnerId: domain.Owner,
TargetType: happydns.CheckScopeDomain,
TargetId: domain.Id,
Enabled: true,
}); err != nil {
errs = append(errs, fmt.Errorf("auto-creating schedule for domain %s / plugin %s: %w",
domain.Id, checkerName, err))
}
}
}
}
return errors.Join(errs...)
}
// PrepareCheckOptions fetches and merges plugin options for a scheduled check execution.
// It combines stored options (global/user/domain/service scopes) with the
// schedule-specific overrides, returning the final merged options.
func (u *CheckScheduleUsecase) PrepareCheckOptions(schedule *happydns.CheckerSchedule) (happydns.CheckerOptions, error) {
if u.checkerUsecase == nil {
return schedule.Options, nil
}
var domainId, serviceId *happydns.Identifier
switch schedule.TargetType {
case happydns.CheckScopeDomain:
domainId = &schedule.TargetId
case happydns.CheckScopeService:
serviceId = &schedule.TargetId
}
baseOptions, err := u.checkerUsecase.GetCheckerOptions(schedule.CheckerName, &schedule.OwnerId, domainId, serviceId)
if err != nil {
// Non-fatal: fall back to schedule-only options and surface as a warning
return schedule.Options, fmt.Errorf("could not fetch plugin options for %s: %w", schedule.CheckerName, err)
}
if baseOptions != nil {
return u.MergeCheckOptions(nil, nil, *baseOptions, schedule.Options), nil
}
return schedule.Options, nil
}

View file

@ -96,3 +96,8 @@ type CheckResultStorage interface {
// LastCheckSchedulerRun retrieves the last time the scheduler ran // LastCheckSchedulerRun retrieves the last time the scheduler ran
LastCheckSchedulerRun() (*time.Time, error) LastCheckSchedulerRun() (*time.Time, error)
} }
// DomainLister provides access to domain listings for schedule discovery.
type DomainLister interface {
ListAllDomains() (happydns.Iterator[happydns.Domain], error)
}

View file

@ -30,6 +30,9 @@ type SchedulerUsecase interface {
Run() Run()
Close() Close()
TriggerOnDemandCheck(checkerName string, targetType CheckScopeType, targetID Identifier, userID Identifier, options CheckerOptions) (Identifier, error) TriggerOnDemandCheck(checkerName string, targetType CheckScopeType, targetID Identifier, userID Identifier, options CheckerOptions) (Identifier, error)
GetSchedulerStatus() SchedulerStatus
SetEnabled(enabled bool) error
RescheduleUpcomingChecks() (int, error)
} }
// CheckerSchedule defines a recurring check schedule // CheckerSchedule defines a recurring check schedule
@ -126,4 +129,22 @@ type CheckerScheduleUsecase interface {
// DeleteSchedulesForTarget removes all schedules for a target // DeleteSchedulesForTarget removes all schedules for a target
DeleteSchedulesForTarget(targetType CheckScopeType, targetId Identifier) error DeleteSchedulesForTarget(targetType CheckScopeType, targetId Identifier) error
// ListUpcomingSchedules retrieves the next limit enabled schedules sorted by NextRun ascending.
ListUpcomingSchedules(limit int) ([]*CheckerSchedule, error)
// DiscoverAndEnsureSchedules creates default enabled schedules for all (plugin, domain) pairs
// that do not already have a schedule.
DiscoverAndEnsureSchedules() error
// RescheduleUpcomingChecks randomizes next run times for all enabled schedules
// within their respective intervals to spread load evenly.
RescheduleUpcomingChecks() (int, error)
// RescheduleOverdueTests reschedules overdue tests to run soon, spread over a
// short window to avoid scheduler famine after a suspend or server restart.
RescheduleOverdueChecks() (int, error)
// PrepareCheckOptions fetches and merges plugin options for a scheduled check execution.
PrepareCheckOptions(schedule *CheckerSchedule) (CheckerOptions, error)
} }

View file

@ -104,6 +104,9 @@
<NavItem> <NavItem>
<NavLink href="/checkers" active={page && page.url.pathname.startsWith('/checkers')}>Checkers</NavLink> <NavLink href="/checkers" active={page && page.url.pathname.startsWith('/checkers')}>Checkers</NavLink>
</NavItem> </NavItem>
<NavItem>
<NavLink href="/scheduler" active={page && page.url.pathname.startsWith('/scheduler')}>Scheduler</NavLink>
</NavItem>
</Nav> </Nav>
</Collapse> </Collapse>
</Navbar> </Navbar>

View file

@ -0,0 +1,341 @@
<!--
This file is part of the happyDomain (R) project.
Copyright (c) 2022-2026 happyDomain
Authors: Pierre-Olivier Mercier, et al.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at <contact@happydomain.org>.
For AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
-->
<script lang="ts">
import { onMount } from "svelte";
import {
Badge,
Button,
Card,
CardBody,
CardHeader,
Col,
Container,
Icon,
Row,
Spinner,
Table,
} from "@sveltestrap/sveltestrap";
import { toasts } from "$lib/stores/toasts";
import {
getScheduler,
postSchedulerDisable,
postSchedulerEnable,
postSchedulerRescheduleUpcoming,
} from "$lib/api-admin/sdk.gen";
interface CheckerSchedule {
id: string;
checker_name: string;
owner_id: string;
target_type: number;
target_id: string;
interval: number;
enabled: boolean;
last_run?: string;
next_run: string;
}
interface SchedulerStatus {
config_enabled: boolean;
runtime_enabled: boolean;
running: boolean;
worker_count: number;
queue_size: number;
active_count: number;
next_schedules: CheckerSchedule[] | null;
}
let status = $state<SchedulerStatus | null>(null);
let loading = $state(true);
let actionInProgress = $state(false);
let rescheduleInProgress = $state(false);
let error = $state<string | null>(null);
async function fetchStatus() {
loading = true;
error = null;
try {
const { data, error: err } = await getScheduler();
if (err) throw new Error(String(err));
status = data as SchedulerStatus;
} catch (e: any) {
error = e.message ?? "Unknown error";
} finally {
loading = false;
}
}
async function setEnabled(enabled: boolean) {
actionInProgress = true;
const action = enabled ? "enable" : "disable";
try {
const { data, error: err } = await (enabled
? postSchedulerEnable()
: postSchedulerDisable());
if (err) {
toasts.addErrorToast({ message: `Failed to ${action} scheduler: ${err}` });
return;
}
status = data as SchedulerStatus;
toasts.addToast({ message: `Scheduler ${action}d successfully`, color: "success" });
} catch (e: any) {
toasts.addErrorToast({ message: e.message ?? `Failed to ${action} scheduler` });
} finally {
actionInProgress = false;
}
}
async function rescheduleUpcoming() {
rescheduleInProgress = true;
try {
const { data, error: err } = await postSchedulerRescheduleUpcoming();
if (err) {
toasts.addErrorToast({ message: `Failed to reschedule: ${err}` });
return;
}
toasts.addToast({
message: `Rescheduled ${(data as any).rescheduled} schedule(s) successfully`,
color: "success",
});
await fetchStatus();
} catch (e: any) {
toasts.addErrorToast({ message: e.message ?? "Failed to reschedule upcoming checks" });
} finally {
rescheduleInProgress = false;
}
}
function formatDuration(ns: number): string {
const seconds = ns / 1e9;
if (seconds < 60) return `${Math.round(seconds)}s`;
const minutes = seconds / 60;
if (minutes < 60) return `${Math.round(minutes)}m`;
const hours = minutes / 60;
if (hours < 24) return `${Math.round(hours)}h`;
return `${Math.round(hours / 24)}d`;
}
function targetTypeName(t: number): string {
const names: Record<number, string> = {
0: "instance",
1: "user",
2: "domain",
3: "zone",
4: "service",
5: "ondemand",
};
return names[t] ?? "unknown";
}
onMount(fetchStatus);
</script>
<Container class="flex-fill my-5">
<Row class="mb-4">
<Col>
<h1 class="display-5">
<Icon name="clock-history"></Icon>
Test Scheduler
</h1>
<p class="text-muted lead">Monitor and control the background test scheduler</p>
</Col>
</Row>
{#if loading}
<div class="d-flex align-items-center gap-2">
<Spinner size="sm" />
<span>Loading scheduler status...</span>
</div>
{:else if error}
<Card color="danger" body>
<Icon name="exclamation-triangle-fill"></Icon>
Error loading scheduler status: {error}
<Button class="ms-3" size="sm" color="light" onclick={fetchStatus}>Retry</Button>
</Card>
{:else if status}
<!-- Status Card -->
<Card class="mb-4">
<CardHeader>
<div class="d-flex justify-content-between align-items-center">
<span><Icon name="info-circle-fill"></Icon> Scheduler Status</span>
<Button size="sm" color="secondary" outline onclick={fetchStatus}>
<Icon name="arrow-clockwise"></Icon> Refresh
</Button>
</div>
</CardHeader>
<CardBody>
<Row class="g-3 mb-3">
<Col sm={6} md={4}>
<div class="text-muted small">Config Enabled</div>
{#if status.config_enabled}
<Badge color="success">Yes</Badge>
{:else}
<Badge color="danger">No</Badge>
{/if}
</Col>
<Col sm={6} md={4}>
<div class="text-muted small">Runtime Enabled</div>
{#if status.runtime_enabled}
<Badge color="success">Yes</Badge>
{:else}
<Badge color="warning">Disabled</Badge>
{/if}
</Col>
<Col sm={6} md={4}>
<div class="text-muted small">Running</div>
{#if status.running}
<Badge color="success"><Icon name="play-fill"></Icon> Running</Badge>
{:else}
<Badge color="secondary"><Icon name="stop-fill"></Icon> Stopped</Badge>
{/if}
</Col>
<Col sm={6} md={4}>
<div class="text-muted small">Workers</div>
<strong>{status.worker_count}</strong>
</Col>
<Col sm={6} md={4}>
<div class="text-muted small">Queue Size</div>
<strong>{status.queue_size}</strong>
</Col>
<Col sm={6} md={4}>
<div class="text-muted small">Active Executions</div>
<strong>{status.active_count}</strong>
</Col>
</Row>
{#if status.config_enabled}
<div class="d-flex gap-2">
{#if status.runtime_enabled}
<Button
color="warning"
disabled={actionInProgress}
onclick={() => setEnabled(false)}
>
{#if actionInProgress}<Spinner size="sm" />{:else}<Icon
name="pause-fill"
></Icon>{/if}
Disable Scheduler
</Button>
{:else}
<Button
color="success"
disabled={actionInProgress}
onclick={() => setEnabled(true)}
>
{#if actionInProgress}<Spinner size="sm" />{:else}<Icon
name="play-fill"
></Icon>{/if}
Enable Scheduler
</Button>
{/if}
<Button
color="secondary"
outline
disabled={rescheduleInProgress}
onclick={rescheduleUpcoming}
>
{#if rescheduleInProgress}<Spinner size="sm" />{:else}<Icon
name="shuffle"
></Icon>{/if}
Spread Upcoming Checks
</Button>
</div>
{:else}
<p class="text-muted mb-0">
<Icon name="lock-fill"></Icon>
The scheduler is disabled in the server configuration and cannot be enabled at
runtime.
</p>
{/if}
</CardBody>
</Card>
<!-- Upcoming Scheduled Checks -->
<Card>
<CardHeader>
<Icon name="calendar-event-fill"></Icon>
Upcoming Scheduled Checks
{#if status.next_schedules}
<Badge color="secondary" class="ms-2">{status.next_schedules.length}</Badge>
{/if}
</CardHeader>
<CardBody class="p-0">
<div class="table-responsive">
<Table hover class="mb-0">
<thead>
<tr>
<th>Plugin</th>
<th>Target Type</th>
<th>Target ID</th>
<th>Interval</th>
<th>Last Run</th>
<th>Next Run</th>
</tr>
</thead>
<tbody>
{#if !status.next_schedules || status.next_schedules.length === 0}
<tr>
<td colspan="6" class="text-center text-muted py-3">
No scheduled checks
</td>
</tr>
{:else}
{#each status.next_schedules as schedule}
<tr>
<td><strong>{schedule.checker_name}</strong></td>
<td
><Badge color="info"
>{targetTypeName(schedule.target_type)}</Badge
></td
>
<td><code class="small">{schedule.target_id}</code></td>
<td>{formatDuration(schedule.interval)}</td>
<td>
{#if schedule.last_run}
{new Date(schedule.last_run).toLocaleString()}
{:else}
<span class="text-muted">Never</span>
{/if}
</td>
<td>
{#if new Date(schedule.next_run) < new Date()}
<span class="text-danger">
<Icon name="exclamation-circle-fill"></Icon>
{new Date(schedule.next_run).toLocaleString()}
</span>
{:else}
{new Date(schedule.next_run).toLocaleString()}
{/if}
</td>
</tr>
{/each}
{/if}
</tbody>
</Table>
</div>
</CardBody>
</Card>
{/if}
</Container>