Add admin API and frontend for scheduler management

This commit is contained in:
nemunaire 2026-02-11 11:21:20 +07:00
commit 6ed26d00b1
11 changed files with 840 additions and 32 deletions

View file

@ -0,0 +1,102 @@
// This file is part of the happyDomain (R) project.
// Copyright (c) 2020-2026 happyDomain
// Authors: Pierre-Olivier Mercier, et al.
//
// This program is offered under a commercial and under the AGPL license.
// For commercial licensing, contact us at <contact@happydomain.org>.
//
// For AGPL licensing:
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package controller
import (
"net/http"
"github.com/gin-gonic/gin"
"git.happydns.org/happyDomain/model"
)
// AdminSchedulerController handles admin operations on the test scheduler
type AdminSchedulerController struct {
scheduler happydns.SchedulerUsecase
}
func NewAdminSchedulerController(scheduler happydns.SchedulerUsecase) *AdminSchedulerController {
return &AdminSchedulerController{scheduler: scheduler}
}
// GetSchedulerStatus returns the current scheduler state
//
// @Summary Get scheduler status
// @Description Returns the current state of the test scheduler including worker count, queue size, and upcoming schedules
// @Tags scheduler
// @Produce json
// @Success 200 {object} happydns.SchedulerStatus
// @Router /scheduler [get]
func (ctrl *AdminSchedulerController) GetSchedulerStatus(c *gin.Context) {
c.JSON(http.StatusOK, ctrl.scheduler.GetSchedulerStatus())
}
// EnableScheduler enables the test scheduler at runtime
//
// @Summary Enable scheduler
// @Description Enables the test scheduler at runtime without restarting the server
// @Tags scheduler
// @Success 200 {object} happydns.SchedulerStatus
// @Failure 500 {object} happydns.ErrorResponse
// @Router /scheduler/enable [post]
func (ctrl *AdminSchedulerController) EnableScheduler(c *gin.Context) {
if err := ctrl.scheduler.SetEnabled(true); err != nil {
c.JSON(http.StatusInternalServerError, happydns.ErrorResponse{Message: err.Error()})
return
}
c.JSON(http.StatusOK, ctrl.scheduler.GetSchedulerStatus())
}
// DisableScheduler disables the test scheduler at runtime
//
// @Summary Disable scheduler
// @Description Disables the test scheduler at runtime without restarting the server
// @Tags scheduler
// @Success 200 {object} happydns.SchedulerStatus
// @Failure 500 {object} happydns.ErrorResponse
// @Router /scheduler/disable [post]
func (ctrl *AdminSchedulerController) DisableScheduler(c *gin.Context) {
if err := ctrl.scheduler.SetEnabled(false); err != nil {
c.JSON(http.StatusInternalServerError, happydns.ErrorResponse{Message: err.Error()})
return
}
c.JSON(http.StatusOK, ctrl.scheduler.GetSchedulerStatus())
}
// RescheduleUpcoming randomizes the next run time of all enabled schedules
// within their respective intervals to spread load evenly.
//
// @Summary Reschedule upcoming tests
// @Description Randomizes the next run time of all enabled schedules within their intervals to spread load
// @Tags scheduler
// @Produce json
// @Success 200 {object} map[string]int
// @Failure 500 {object} happydns.ErrorResponse
// @Router /scheduler/reschedule-upcoming [post]
func (ctrl *AdminSchedulerController) RescheduleUpcoming(c *gin.Context) {
n, err := ctrl.scheduler.RescheduleUpcomingChecks()
if err != nil {
c.JSON(http.StatusInternalServerError, happydns.ErrorResponse{Message: err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{"rescheduled": n})
}

View file

@ -33,6 +33,7 @@ import (
type Dependencies struct {
AuthUser happydns.AuthUserUsecase
Checker happydns.CheckerUsecase
CheckScheduler happydns.SchedulerUsecase
Domain happydns.DomainUsecase
Provider happydns.ProviderUsecase
RemoteZoneImporter happydns.RemoteZoneImporterUsecase
@ -51,6 +52,7 @@ func DeclareRoutes(cfg *happydns.Options, router *gin.Engine, s storage.Storage,
declareDomainRoutes(apiRoutes, dep, s)
declareChecksRoutes(apiRoutes, dep)
declareProviderRoutes(apiRoutes, dep, s)
declareSchedulerRoutes(apiRoutes, dep)
declareSessionsRoutes(cfg, apiRoutes, s)
declareUserAuthsRoutes(apiRoutes, dep, s)
declareUsersRoutes(apiRoutes, dep, s)

View file

@ -0,0 +1,38 @@
// This file is part of the happyDomain (R) project.
// Copyright (c) 2020-2026 happyDomain
// Authors: Pierre-Olivier Mercier, et al.
//
// This program is offered under a commercial and under the AGPL license.
// For commercial licensing, contact us at <contact@happydomain.org>.
//
// For AGPL licensing:
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package route
import (
"github.com/gin-gonic/gin"
"git.happydns.org/happyDomain/internal/api-admin/controller"
)
func declareSchedulerRoutes(router *gin.RouterGroup, dep Dependencies) {
ctrl := controller.NewAdminSchedulerController(dep.CheckScheduler)
schedulerRoute := router.Group("/scheduler")
schedulerRoute.GET("", ctrl.GetSchedulerStatus)
schedulerRoute.POST("/enable", ctrl.EnableScheduler)
schedulerRoute.POST("/disable", ctrl.DisableScheduler)
schedulerRoute.POST("/reschedule-upcoming", ctrl.RescheduleUpcoming)
}

View file

@ -59,6 +59,7 @@ func NewAdmin(app *App) *Admin {
admin.DeclareRoutes(app.cfg, router, app.store, admin.Dependencies{
AuthUser: app.usecases.authUser,
Checker: app.usecases.checker,
CheckScheduler: app.checkScheduler,
Domain: app.usecases.domain,
Provider: app.usecases.providerAdmin,
RemoteZoneImporter: app.usecases.orchestrator.RemoteZoneImporter,

View file

@ -240,7 +240,7 @@ func (app *App) initUsecases() {
app.usecases.session = sessionService
app.usecases.checker = checkUC.NewCheckerUsecase(app.cfg, app.store)
app.usecases.checkResult = checkresultUC.NewCheckResultUsecase(app.store, app.cfg)
app.usecases.checkerSchedule = checkresultUC.NewCheckScheduleUsecase(app.store, app.cfg)
app.usecases.checkerSchedule = checkresultUC.NewCheckScheduleUsecase(app.store, app.cfg, app.store, app.usecases.checker)
app.usecases.orchestrator = orchestrator.NewOrchestrator(
domainLogService,
@ -280,6 +280,7 @@ func (app *App) setupRouter() {
CaptchaVerifier: app.captchaVerifier,
Checker: app.usecases.checker,
CheckResult: app.usecases.checkResult,
CheckerSchedule: app.usecases.checkerSchedule,
CheckScheduler: app.checkScheduler,
Domain: app.usecases.domain,
DomainLog: app.usecases.domainLog,

View file

@ -36,10 +36,11 @@ import (
)
const (
SchedulerCheckInterval = 1 * time.Minute // How often to check for due tests
SchedulerCleanupInterval = 24 * time.Hour // How often to clean up old executions
CheckExecutionTimeout = 5 * time.Minute // Max time for a single test
MaxRetries = 3 // Max retry attempts for failed tests
SchedulerCheckInterval = 1 * time.Minute // How often to check for due tests
SchedulerCleanupInterval = 24 * time.Hour // How often to clean up old executions
SchedulerDiscoveryInterval = 1 * time.Hour // How often to auto-discover new targets
CheckExecutionTimeout = 5 * time.Minute // Max time for a single check
MaxRetries = 3 // Max retry attempts for failed checks
)
// Priority levels for test execution queue
@ -65,6 +66,8 @@ type checkScheduler struct {
workers []*worker
mu sync.RWMutex
wg sync.WaitGroup
runtimeEnabled bool
running bool
}
// activeExecution tracks a running test execution
@ -159,6 +162,25 @@ func (d *disabledScheduler) TriggerOnDemandCheck(checkName string, targetType ha
return happydns.Identifier{}, fmt.Errorf("test scheduler is disabled in configuration")
}
// GetSchedulerStatus returns a status indicating the scheduler is disabled
func (d *disabledScheduler) GetSchedulerStatus() happydns.SchedulerStatus {
return happydns.SchedulerStatus{
ConfigEnabled: false,
RuntimeEnabled: false,
Running: false,
}
}
// SetEnabled returns an error since the scheduler is disabled in configuration
func (d *disabledScheduler) SetEnabled(enabled bool) error {
return fmt.Errorf("scheduler is disabled in configuration, cannot enable at runtime")
}
// RescheduleUpcomingChecks returns an error since the scheduler is disabled
func (d *disabledScheduler) RescheduleUpcomingChecks() (int, error) {
return 0, fmt.Errorf("test scheduler is disabled in configuration")
}
// newCheckScheduler creates a new test scheduler
func newCheckScheduler(
cfg *happydns.Options,
@ -175,7 +197,7 @@ func newCheckScheduler(
store: store,
checkerUsecase: checkerUsecase,
resultUsecase: checkresult.NewCheckResultUsecase(store, cfg),
scheduleUsecase: checkresult.NewCheckScheduleUsecase(store, cfg),
scheduleUsecase: checkresult.NewCheckScheduleUsecase(store, cfg, store, checkerUsecase),
stop: make(chan struct{}),
stopWorkers: make(chan struct{}),
runNowChan: make(chan *queueItem, 100),
@ -183,6 +205,7 @@ func newCheckScheduler(
queue: newPriorityQueue(),
activeExecutions: make(map[string]*activeExecution),
workers: make([]*worker, numWorkers),
runtimeEnabled: true,
}
for i := 0; i < numWorkers; i++ {
@ -231,8 +254,27 @@ func (s *checkScheduler) Close() {
// Run starts the scheduler main loop. It must not be called more than once.
func (s *checkScheduler) Run() {
s.mu.Lock()
s.running = true
s.mu.Unlock()
defer func() {
s.mu.Lock()
s.running = false
s.mu.Unlock()
}()
log.Printf("Starting test scheduler with %d workers...\n", len(s.workers))
// Reschedule overdue tests before starting workers so that tests missed
// during a server suspend or shutdown are spread into the near future
// instead of all firing at once.
if n, err := s.scheduleUsecase.RescheduleOverdueChecks(); err != nil {
log.Printf("Warning: failed to reschedule overdue tests: %v\n", err)
} else if n > 0 {
log.Printf("Rescheduled %d overdue test(s) into the near future\n", n)
}
// Start workers
for _, w := range s.workers {
s.wg.Add(1)
@ -242,9 +284,15 @@ func (s *checkScheduler) Run() {
// Main scheduling loop
checkTicker := time.NewTicker(SchedulerCheckInterval)
cleanupTicker := time.NewTicker(SchedulerCleanupInterval)
discoveryTicker := time.NewTicker(SchedulerDiscoveryInterval)
defer checkTicker.Stop()
defer cleanupTicker.Stop()
defer discoveryTicker.Stop()
// Initial discovery: create default schedules for all existing targets
if err := s.scheduleUsecase.DiscoverAndEnsureSchedules(); err != nil {
log.Printf("Warning: schedule discovery encountered errors: %v\n", err)
}
// Initial check
s.checkSchedules()
@ -256,6 +304,11 @@ func (s *checkScheduler) Run() {
case <-cleanupTicker.C:
s.cleanup()
case <-discoveryTicker.C:
if err := s.scheduleUsecase.DiscoverAndEnsureSchedules(); err != nil {
log.Printf("Warning: schedule discovery encountered errors: %v\n", err)
}
case item := <-s.runNowChan:
s.enqueue(item)
@ -267,6 +320,13 @@ func (s *checkScheduler) Run() {
// checkSchedules checks for due tests and queues them
func (s *checkScheduler) checkSchedules() {
s.mu.RLock()
enabled := s.runtimeEnabled
s.mu.RUnlock()
if !enabled {
return
}
dueSchedules, err := s.scheduleUsecase.ListDueSchedules()
if err != nil {
log.Printf("Error listing due schedules: %v\n", err)
@ -362,6 +422,54 @@ func (s *checkScheduler) TriggerOnDemandCheck(checkerName string, targetType hap
return execution.Id, nil
}
// GetSchedulerStatus returns a snapshot of the current scheduler state
func (s *checkScheduler) GetSchedulerStatus() happydns.SchedulerStatus {
s.mu.RLock()
activeCount := len(s.activeExecutions)
running := s.running
runtimeEnabled := s.runtimeEnabled
s.mu.RUnlock()
nextSchedules, _ := s.scheduleUsecase.ListUpcomingSchedules(20)
return happydns.SchedulerStatus{
ConfigEnabled: !s.cfg.DisableScheduler,
RuntimeEnabled: runtimeEnabled,
Running: running,
WorkerCount: len(s.workers),
QueueSize: s.queue.Len(),
ActiveCount: activeCount,
NextSchedules: nextSchedules,
}
}
// SetEnabled enables or disables the scheduler at runtime
func (s *checkScheduler) SetEnabled(enabled bool) error {
s.mu.Lock()
wasEnabled := s.runtimeEnabled
s.runtimeEnabled = enabled
s.mu.Unlock()
if enabled && !wasEnabled {
// Spread out any overdue tests to avoid a thundering herd, then
// immediately enqueue whatever is now due.
if n, err := s.scheduleUsecase.RescheduleOverdueChecks(); err != nil {
log.Printf("Warning: failed to reschedule overdue tests on re-enable: %v\n", err)
} else if n > 0 {
log.Printf("Rescheduled %d overdue test(s) after scheduler re-enable\n", n)
}
s.checkSchedules()
}
return nil
}
// RescheduleUpcomingChecks randomizes the next run time of all enabled schedules
// within their respective intervals, delegating to the schedule usecase.
func (s *checkScheduler) RescheduleUpcomingChecks() (int, error) {
return s.scheduleUsecase.RescheduleUpcomingChecks()
}
// cleanup removes old execution records and expired test results
func (s *checkScheduler) cleanup() {
log.Println("Running scheduler cleanup...")
@ -412,10 +520,21 @@ func (w *worker) executeCheck(item *queueItem) {
execution := item.execution
schedule := item.schedule
// Always update schedule NextRun after execution, whether it succeeds or fails.
// This prevents the schedule from being re-queued on the next tick if the test fails.
if item.execution.ScheduleId != nil {
defer func() {
if err := w.scheduler.scheduleUsecase.UpdateScheduleAfterRun(*item.execution.ScheduleId); err != nil {
log.Printf("Worker %d: Error updating schedule after run: %v\n", w.id, err)
}
}()
}
// Mark execution as running
execution.Status = happydns.CheckExecutionRunning
if err := w.scheduler.resultUsecase.UpdateCheckExecution(execution); err != nil {
log.Printf("Worker %d: Error updating execution status: %v\n", w.id, err)
_ = w.scheduler.resultUsecase.FailCheckExecution(execution.Id, err.Error())
return
}
@ -443,6 +562,13 @@ func (w *worker) executeCheck(item *queueItem) {
return
}
// Merge options: global defaults < user opts < domain/service opts < schedule opts
mergedOptions, err := w.scheduler.scheduleUsecase.PrepareCheckOptions(schedule)
if err != nil {
// Non-fatal: PrepareTestOptions already falls back to schedule-only options
log.Printf("Worker %d: warning, could not prepare plugin options for %s: %v\n", w.id, schedule.CheckerName, err)
}
// Prepare metadata
meta := make(map[string]string)
meta["target_type"] = schedule.TargetType.String()
@ -459,7 +585,7 @@ func (w *worker) executeCheck(item *queueItem) {
errorChan <- fmt.Errorf("checker panicked: %v", r)
}
}()
result, err := checker.RunCheck(schedule.Options, meta)
result, err := checker.RunCheck(mergedOptions, meta)
if err != nil {
errorChan <- err
} else {
@ -522,13 +648,6 @@ func (w *worker) executeCheck(item *queueItem) {
return
}
// Update schedule if this was a scheduled test
if item.execution.ScheduleId != nil {
if err := w.scheduler.scheduleUsecase.UpdateScheduleAfterRun(*item.execution.ScheduleId); err != nil {
log.Printf("Worker %d: Error updating schedule: %v\n", w.id, err)
}
}
log.Printf("Worker %d: Completed test %s for target %s (status: %s, duration: %v)\n",
log.Printf("Worker %d: Completed test %s for target %s (status: %d, duration: %v)\n",
w.id, schedule.CheckerName, schedule.TargetId.String(), result.Status, duration)
}

View file

@ -22,7 +22,10 @@
package checkresult
import (
"errors"
"fmt"
"math/rand"
"sort"
"time"
"git.happydns.org/happyDomain/model"
@ -38,15 +41,19 @@ const (
// CheckScheduleUsecase implements business logic for check schedules
type CheckScheduleUsecase struct {
storage CheckResultStorage
options *happydns.Options
storage CheckResultStorage
options *happydns.Options
domainLister DomainLister
checkerUsecase happydns.CheckerUsecase
}
// NewCheckScheduleUsecase creates a new check schedule usecase
func NewCheckScheduleUsecase(storage CheckResultStorage, options *happydns.Options) *CheckScheduleUsecase {
func NewCheckScheduleUsecase(storage CheckResultStorage, options *happydns.Options, domainLister DomainLister, checkerUsecase happydns.CheckerUsecase) *CheckScheduleUsecase {
return &CheckScheduleUsecase{
storage: storage,
options: options,
storage: storage,
options: options,
domainLister: domainLister,
checkerUsecase: checkerUsecase,
}
}
@ -67,24 +74,22 @@ func (u *CheckScheduleUsecase) GetSchedule(scheduleId happydns.Identifier) (*hap
// CreateSchedule creates a new check schedule with validation
func (u *CheckScheduleUsecase) CreateSchedule(schedule *happydns.CheckerSchedule) error {
// Validate interval
if schedule.Interval < MinimumCheckInterval {
return fmt.Errorf("check interval must be at least %v", MinimumCheckInterval)
}
// Set default interval if not specified
if schedule.Interval == 0 {
schedule.Interval = u.getDefaultInterval(schedule.TargetType)
}
// Calculate next run time
if schedule.NextRun.IsZero() {
schedule.NextRun = time.Now().Add(schedule.Interval)
// Validate interval
if schedule.Interval < MinimumCheckInterval {
return fmt.Errorf("check interval must be at least %v", MinimumCheckInterval)
}
// Enable by default if not specified
if !schedule.Enabled {
schedule.Enabled = true
// Calculate next run time: pick a random offset within the interval
// to spread load evenly across all schedules
// TODO: Use a smarter load balance function in the future
if schedule.NextRun.IsZero() {
offset := time.Duration(rand.Int63n(int64(schedule.Interval)))
schedule.NextRun = time.Now().Add(offset)
}
return u.storage.CreateCheckerSchedule(schedule)
@ -178,7 +183,7 @@ func (u *CheckScheduleUsecase) ListDueSchedules() ([]*happydns.CheckerSchedule,
var dueSchedules []*happydns.CheckerSchedule
for _, schedule := range schedules {
if schedule.Enabled && schedule.NextRun.Before(now) {
if schedule.NextRun.Before(now) {
dueSchedules = append(dueSchedules, schedule)
}
}
@ -186,6 +191,24 @@ func (u *CheckScheduleUsecase) ListDueSchedules() ([]*happydns.CheckerSchedule,
return dueSchedules, nil
}
// ListUpcomingSchedules retrieves the next `limit` enabled schedules sorted by NextRun ascending
func (u *CheckScheduleUsecase) ListUpcomingSchedules(limit int) ([]*happydns.CheckerSchedule, error) {
schedules, err := u.storage.ListEnabledCheckerSchedules()
if err != nil {
return nil, err
}
sort.Slice(schedules, func(i, j int) bool {
return schedules[i].NextRun.Before(schedules[j].NextRun)
})
if limit > 0 && len(schedules) > limit {
schedules = schedules[:limit]
}
return schedules, nil
}
// getDefaultInterval returns the default check interval based on target type
func (u *CheckScheduleUsecase) getDefaultInterval(targetType happydns.CheckScopeType) time.Duration {
switch targetType {
@ -269,6 +292,77 @@ func (u *CheckScheduleUsecase) CreateDefaultSchedulesForTarget(
return u.CreateSchedule(schedule)
}
// rescheduleChecks reschedules each given schedule to a random time in [now, now+maxOffsetFn(schedule)].
func (u *CheckScheduleUsecase) rescheduleChecks(schedules []*happydns.CheckerSchedule, maxOffsetFn func(*happydns.CheckerSchedule) time.Duration) (int, error) {
count := 0
now := time.Now()
for _, schedule := range schedules {
maxOffset := maxOffsetFn(schedule)
if maxOffset <= 0 {
maxOffset = time.Second
}
schedule.NextRun = now.Add(time.Duration(rand.Int63n(int64(maxOffset))))
if err := u.storage.UpdateCheckerSchedule(schedule); err != nil {
return count, err
}
count++
}
return count, nil
}
// RescheduleUpcomingChecks randomizes the next run time of all enabled schedules
// within their respective intervals to spread load evenly. Useful after a restart.
func (u *CheckScheduleUsecase) RescheduleUpcomingChecks() (int, error) {
schedules, err := u.storage.ListEnabledCheckerSchedules()
if err != nil {
return 0, err
}
return u.rescheduleChecks(schedules, func(s *happydns.CheckerSchedule) time.Duration {
return s.Interval
})
}
// RescheduleOverdueChecks reschedules checks whose NextRun is in the past,
// spreading them over a short window to avoid scheduler famine (e.g. after
// a long machine suspend or server downtime).
// If there are fewer than 10 overdue checks, they are left as-is so that the
// caller's immediate checkSchedules pass enqueues them directly.
func (u *CheckScheduleUsecase) RescheduleOverdueChecks() (int, error) {
schedules, err := u.storage.ListEnabledCheckerSchedules()
if err != nil {
return 0, err
}
now := time.Now()
var overdue []*happydns.CheckerSchedule
for _, s := range schedules {
if s.NextRun.Before(now) {
overdue = append(overdue, s)
}
}
if len(overdue) == 0 {
return 0, nil
}
// Small backlog: let the caller enqueue them directly on the next
// checkSchedules pass rather than deferring them into the future.
if len(overdue) < 10 {
return 0, nil
}
// Spread overdue checks over a small window proportional to their count,
// capped at MinimumCheckInterval, to prevent all of them from running at once.
spreadWindow := time.Duration(len(overdue)) * 5 * time.Second
if spreadWindow > MinimumCheckInterval {
spreadWindow = MinimumCheckInterval
}
return u.rescheduleChecks(overdue, func(s *happydns.CheckerSchedule) time.Duration {
return spreadWindow
})
}
// DeleteSchedulesForTarget removes all schedules for a target
func (u *CheckScheduleUsecase) DeleteSchedulesForTarget(targetType happydns.CheckScopeType, targetId happydns.Identifier) error {
schedules, err := u.storage.ListCheckerSchedulesByTarget(targetType, targetId)
@ -284,3 +378,94 @@ func (u *CheckScheduleUsecase) DeleteSchedulesForTarget(targetType happydns.Chec
return nil
}
// DiscoverAndEnsureSchedules creates default enabled schedules for all (plugin, domain)
// pairs that don't yet have an explicit schedule record. This implements the opt-out
// model: checks run automatically unless a schedule with Enabled=false has been saved.
// Non-fatal per-domain errors are collected and returned together.
func (u *CheckScheduleUsecase) DiscoverAndEnsureSchedules() error {
if u.domainLister == nil || u.checkerUsecase == nil {
return nil
}
plugins, err := u.checkerUsecase.ListCheckers()
if err != nil {
return fmt.Errorf("listing check plugins for discovery: %w", err)
}
iter, err := u.domainLister.ListAllDomains()
if err != nil {
return fmt.Errorf("listing domains for schedule discovery: %w", err)
}
defer iter.Close()
var errs []error
for iter.Next() {
domain := iter.Item()
if domain == nil {
continue
}
for checkerName, p := range *plugins {
if !p.Availability().ApplyToDomain {
continue
}
schedules, err := u.ListSchedulesByTarget(happydns.CheckScopeDomain, domain.Id)
if err != nil {
errs = append(errs, fmt.Errorf("listing schedules for domain %s: %w", domain.Id, err))
continue
}
hasSchedule := false
for _, sched := range schedules {
if sched.CheckerName == checkerName {
hasSchedule = true
break
}
}
if !hasSchedule {
if err := u.CreateSchedule(&happydns.CheckerSchedule{
CheckerName: checkerName,
OwnerId: domain.Owner,
TargetType: happydns.CheckScopeDomain,
TargetId: domain.Id,
Enabled: true,
}); err != nil {
errs = append(errs, fmt.Errorf("auto-creating schedule for domain %s / plugin %s: %w",
domain.Id, checkerName, err))
}
}
}
}
return errors.Join(errs...)
}
// PrepareCheckOptions fetches and merges plugin options for a scheduled check execution.
// It combines stored options (global/user/domain/service scopes) with the
// schedule-specific overrides, returning the final merged options.
func (u *CheckScheduleUsecase) PrepareCheckOptions(schedule *happydns.CheckerSchedule) (happydns.CheckerOptions, error) {
if u.checkerUsecase == nil {
return schedule.Options, nil
}
var domainId, serviceId *happydns.Identifier
switch schedule.TargetType {
case happydns.CheckScopeDomain:
domainId = &schedule.TargetId
case happydns.CheckScopeService:
serviceId = &schedule.TargetId
}
baseOptions, err := u.checkerUsecase.GetCheckerOptions(schedule.CheckerName, &schedule.OwnerId, domainId, serviceId)
if err != nil {
// Non-fatal: fall back to schedule-only options and surface as a warning
return schedule.Options, fmt.Errorf("could not fetch plugin options for %s: %w", schedule.CheckerName, err)
}
if baseOptions != nil {
return u.MergeCheckOptions(nil, nil, *baseOptions, schedule.Options), nil
}
return schedule.Options, nil
}

View file

@ -96,3 +96,8 @@ type CheckResultStorage interface {
// LastCheckSchedulerRun retrieves the last time the scheduler ran
LastCheckSchedulerRun() (*time.Time, error)
}
// DomainLister provides access to domain listings for schedule discovery.
type DomainLister interface {
ListAllDomains() (happydns.Iterator[happydns.Domain], error)
}

View file

@ -30,6 +30,9 @@ type SchedulerUsecase interface {
Run()
Close()
TriggerOnDemandCheck(checkerName string, targetType CheckScopeType, targetID Identifier, userID Identifier, options CheckerOptions) (Identifier, error)
GetSchedulerStatus() SchedulerStatus
SetEnabled(enabled bool) error
RescheduleUpcomingChecks() (int, error)
}
// CheckerSchedule defines a recurring check schedule
@ -126,4 +129,12 @@ type CheckerScheduleUsecase interface {
// DeleteSchedulesForTarget removes all schedules for a target
DeleteSchedulesForTarget(targetType CheckScopeType, targetId Identifier) error
// RescheduleUpcomingChecks randomizes next run times for all enabled schedules
// within their respective intervals to spread load evenly.
RescheduleUpcomingChecks() (int, error)
// RescheduleOverdueTests reschedules overdue tests to run soon, spread over a
// short window to avoid scheduler famine after a suspend or server restart.
RescheduleOverdueChecks() (int, error)
}

View file

@ -104,6 +104,9 @@
<NavItem>
<NavLink href="/checkers" active={page && page.url.pathname.startsWith('/checkers')}>Checkers</NavLink>
</NavItem>
<NavItem>
<NavLink href="/scheduler" active={page && page.url.pathname.startsWith('/scheduler')}>Scheduler</NavLink>
</NavItem>
</Nav>
</Collapse>
</Navbar>

View file

@ -0,0 +1,341 @@
<!--
This file is part of the happyDomain (R) project.
Copyright (c) 2022-2026 happyDomain
Authors: Pierre-Olivier Mercier, et al.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at <contact@happydomain.org>.
For AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
-->
<script lang="ts">
import { onMount } from "svelte";
import {
Badge,
Button,
Card,
CardBody,
CardHeader,
Col,
Container,
Icon,
Row,
Spinner,
Table,
} from "@sveltestrap/sveltestrap";
import { toasts } from "$lib/stores/toasts";
import {
getScheduler,
postSchedulerDisable,
postSchedulerEnable,
postSchedulerRescheduleUpcoming,
} from "$lib/api-admin/sdk.gen";
interface CheckerSchedule {
id: string;
checker_name: string;
owner_id: string;
target_type: number;
target_id: string;
interval: number;
enabled: boolean;
last_run?: string;
next_run: string;
}
interface SchedulerStatus {
config_enabled: boolean;
runtime_enabled: boolean;
running: boolean;
worker_count: number;
queue_size: number;
active_count: number;
next_schedules: CheckerSchedule[] | null;
}
let status = $state<SchedulerStatus | null>(null);
let loading = $state(true);
let actionInProgress = $state(false);
let rescheduleInProgress = $state(false);
let error = $state<string | null>(null);
async function fetchStatus() {
loading = true;
error = null;
try {
const { data, error: err } = await getScheduler();
if (err) throw new Error(String(err));
status = data as SchedulerStatus;
} catch (e: any) {
error = e.message ?? "Unknown error";
} finally {
loading = false;
}
}
async function setEnabled(enabled: boolean) {
actionInProgress = true;
const action = enabled ? "enable" : "disable";
try {
const { data, error: err } = await (enabled
? postSchedulerEnable()
: postSchedulerDisable());
if (err) {
toasts.addErrorToast({ message: `Failed to ${action} scheduler: ${err}` });
return;
}
status = data as SchedulerStatus;
toasts.addToast({ message: `Scheduler ${action}d successfully`, color: "success" });
} catch (e: any) {
toasts.addErrorToast({ message: e.message ?? `Failed to ${action} scheduler` });
} finally {
actionInProgress = false;
}
}
async function rescheduleUpcoming() {
rescheduleInProgress = true;
try {
const { data, error: err } = await postSchedulerRescheduleUpcoming();
if (err) {
toasts.addErrorToast({ message: `Failed to reschedule: ${err}` });
return;
}
toasts.addToast({
message: `Rescheduled ${(data as any).rescheduled} schedule(s) successfully`,
color: "success",
});
await fetchStatus();
} catch (e: any) {
toasts.addErrorToast({ message: e.message ?? "Failed to reschedule upcoming checks" });
} finally {
rescheduleInProgress = false;
}
}
function formatDuration(ns: number): string {
const seconds = ns / 1e9;
if (seconds < 60) return `${Math.round(seconds)}s`;
const minutes = seconds / 60;
if (minutes < 60) return `${Math.round(minutes)}m`;
const hours = minutes / 60;
if (hours < 24) return `${Math.round(hours)}h`;
return `${Math.round(hours / 24)}d`;
}
function targetTypeName(t: number): string {
const names: Record<number, string> = {
0: "instance",
1: "user",
2: "domain",
3: "zone",
4: "service",
5: "ondemand",
};
return names[t] ?? "unknown";
}
onMount(fetchStatus);
</script>
<Container class="flex-fill my-5">
<Row class="mb-4">
<Col>
<h1 class="display-5">
<Icon name="clock-history"></Icon>
Test Scheduler
</h1>
<p class="text-muted lead">Monitor and control the background test scheduler</p>
</Col>
</Row>
{#if loading}
<div class="d-flex align-items-center gap-2">
<Spinner size="sm" />
<span>Loading scheduler status...</span>
</div>
{:else if error}
<Card color="danger" body>
<Icon name="exclamation-triangle-fill"></Icon>
Error loading scheduler status: {error}
<Button class="ms-3" size="sm" color="light" onclick={fetchStatus}>Retry</Button>
</Card>
{:else if status}
<!-- Status Card -->
<Card class="mb-4">
<CardHeader>
<div class="d-flex justify-content-between align-items-center">
<span><Icon name="info-circle-fill"></Icon> Scheduler Status</span>
<Button size="sm" color="secondary" outline onclick={fetchStatus}>
<Icon name="arrow-clockwise"></Icon> Refresh
</Button>
</div>
</CardHeader>
<CardBody>
<Row class="g-3 mb-3">
<Col sm={6} md={4}>
<div class="text-muted small">Config Enabled</div>
{#if status.config_enabled}
<Badge color="success">Yes</Badge>
{:else}
<Badge color="danger">No</Badge>
{/if}
</Col>
<Col sm={6} md={4}>
<div class="text-muted small">Runtime Enabled</div>
{#if status.runtime_enabled}
<Badge color="success">Yes</Badge>
{:else}
<Badge color="warning">Disabled</Badge>
{/if}
</Col>
<Col sm={6} md={4}>
<div class="text-muted small">Running</div>
{#if status.running}
<Badge color="success"><Icon name="play-fill"></Icon> Running</Badge>
{:else}
<Badge color="secondary"><Icon name="stop-fill"></Icon> Stopped</Badge>
{/if}
</Col>
<Col sm={6} md={4}>
<div class="text-muted small">Workers</div>
<strong>{status.worker_count}</strong>
</Col>
<Col sm={6} md={4}>
<div class="text-muted small">Queue Size</div>
<strong>{status.queue_size}</strong>
</Col>
<Col sm={6} md={4}>
<div class="text-muted small">Active Executions</div>
<strong>{status.active_count}</strong>
</Col>
</Row>
{#if status.config_enabled}
<div class="d-flex gap-2">
{#if status.runtime_enabled}
<Button
color="warning"
disabled={actionInProgress}
onclick={() => setEnabled(false)}
>
{#if actionInProgress}<Spinner size="sm" />{:else}<Icon
name="pause-fill"
></Icon>{/if}
Disable Scheduler
</Button>
{:else}
<Button
color="success"
disabled={actionInProgress}
onclick={() => setEnabled(true)}
>
{#if actionInProgress}<Spinner size="sm" />{:else}<Icon
name="play-fill"
></Icon>{/if}
Enable Scheduler
</Button>
{/if}
<Button
color="secondary"
outline
disabled={rescheduleInProgress}
onclick={rescheduleUpcoming}
>
{#if rescheduleInProgress}<Spinner size="sm" />{:else}<Icon
name="shuffle"
></Icon>{/if}
Spread Upcoming Checks
</Button>
</div>
{:else}
<p class="text-muted mb-0">
<Icon name="lock-fill"></Icon>
The scheduler is disabled in the server configuration and cannot be enabled at
runtime.
</p>
{/if}
</CardBody>
</Card>
<!-- Upcoming Scheduled Checks -->
<Card>
<CardHeader>
<Icon name="calendar-event-fill"></Icon>
Upcoming Scheduled Checks
{#if status.next_schedules}
<Badge color="secondary" class="ms-2">{status.next_schedules.length}</Badge>
{/if}
</CardHeader>
<CardBody class="p-0">
<div class="table-responsive">
<Table hover class="mb-0">
<thead>
<tr>
<th>Plugin</th>
<th>Target Type</th>
<th>Target ID</th>
<th>Interval</th>
<th>Last Run</th>
<th>Next Run</th>
</tr>
</thead>
<tbody>
{#if !status.next_schedules || status.next_schedules.length === 0}
<tr>
<td colspan="6" class="text-center text-muted py-3">
No scheduled checks
</td>
</tr>
{:else}
{#each status.next_schedules as schedule}
<tr>
<td><strong>{schedule.checker_name}</strong></td>
<td
><Badge color="info"
>{targetTypeName(schedule.target_type)}</Badge
></td
>
<td><code class="small">{schedule.target_id}</code></td>
<td>{formatDuration(schedule.interval)}</td>
<td>
{#if schedule.last_run}
{new Date(schedule.last_run).toLocaleString()}
{:else}
<span class="text-muted">Never</span>
{/if}
</td>
<td>
{#if new Date(schedule.next_run) < new Date()}
<span class="text-danger">
<Icon name="exclamation-circle-fill"></Icon>
{new Date(schedule.next_run).toLocaleString()}
</span>
{:else}
{new Date(schedule.next_run).toLocaleString()}
{/if}
</td>
</tr>
{/each}
{/if}
</tbody>
</Table>
</div>
</CardBody>
</Card>
{/if}
</Container>