Adds a happyDomain checker that probes STUN/TURN servers end-to-end:
DNS/SRV discovery, UDP/TCP/TLS/DTLS dial, STUN binding + reflexive-addr
sanity, open-relay detection, authenticated TURN Allocate (long-term
creds or REST-API HMAC), public-relay check, CreatePermission + Send
round-trip through the relay, and optional ChannelBind.
Failing sub-tests carry a remediation string (`Fix`) that the HTML
report surfaces as a yellow headline callout and inline next to each
row. Mapping covers the most common coturn misconfigurations
(external-ip, relay-ip, lt-cred-mech, min-port/max-port, cert issues,
401 nonce drift, 441/442/486/508 allocation errors).
Implements sdk.EndpointDiscoverer (checker/discovery.go): every
stuns:/turns:/DTLS endpoint observed during Collect is published as a
DiscoveredEndpoint{Type: "tls"|"dtls"} so a downstream TLS checker can
verify certificates without re-parsing the observation.
Backed by pion/stun/v3 + pion/turn/v4 + pion/dtls/v3; SDK pinned to a
local replace until the EndpointDiscoverer interface ships in a tagged
release.
154 lines
5.4 KiB
Go
154 lines
5.4 KiB
Go
package checker
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
sdk "git.happydns.org/checker-sdk-go/checker"
|
|
)
|
|
|
|
// stunBindingRule verifies that the STUN Binding request succeeds on every
|
|
// reachable endpoint (returns a reflexive address).
|
|
type stunBindingRule struct{}
|
|
|
|
func (r *stunBindingRule) Name() string { return "stun_turn.stun_binding" }
|
|
func (r *stunBindingRule) Description() string {
|
|
return "Verifies that the STUN Binding request receives a XOR-MAPPED-ADDRESS reply."
|
|
}
|
|
|
|
func (r *stunBindingRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
|
|
data, errSt := loadData(ctx, obs)
|
|
if errSt != nil {
|
|
return []sdk.CheckState{*errSt}
|
|
}
|
|
var states []sdk.CheckState
|
|
seen := false
|
|
for _, ep := range data.Endpoints {
|
|
if !ep.Dial.OK || !ep.STUNBinding.Attempted {
|
|
continue
|
|
}
|
|
seen = true
|
|
if ep.STUNBinding.OK {
|
|
continue
|
|
}
|
|
states = append(states, sdk.CheckState{
|
|
Status: sdk.StatusCrit,
|
|
Code: "stun_turn.stun_binding.failed",
|
|
Subject: epSubject(ep.Endpoint),
|
|
Message: ep.STUNBinding.Error,
|
|
Meta: map[string]any{
|
|
"fix": "Server did not answer the STUN Binding Request. Check that the STUN service is actually listening on this transport, and that no middlebox is filtering RFC 5389 traffic.",
|
|
},
|
|
})
|
|
}
|
|
if !seen {
|
|
return []sdk.CheckState{skippedState("stun_turn.stun_binding.skipped", "No endpoint completed a dial, STUN binding not evaluated.")}
|
|
}
|
|
if len(states) == 0 {
|
|
return []sdk.CheckState{passState("stun_turn.stun_binding.ok", "STUN Binding succeeded on every reachable endpoint.")}
|
|
}
|
|
return states
|
|
}
|
|
|
|
// stunReflexivePublicRule flags servers that return a private/loopback
|
|
// reflexive address (typically a TURN server behind NAT with missing
|
|
// external-ip configuration).
|
|
type stunReflexivePublicRule struct{}
|
|
|
|
func (r *stunReflexivePublicRule) Name() string { return "stun_turn.reflexive_public" }
|
|
func (r *stunReflexivePublicRule) Description() string {
|
|
return "Flags endpoints that return a private/loopback reflexive address (server unaware of its public IP)."
|
|
}
|
|
|
|
func (r *stunReflexivePublicRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
|
|
data, errSt := loadData(ctx, obs)
|
|
if errSt != nil {
|
|
return []sdk.CheckState{*errSt}
|
|
}
|
|
var states []sdk.CheckState
|
|
seen := false
|
|
for _, ep := range data.Endpoints {
|
|
if !ep.STUNBinding.OK {
|
|
continue
|
|
}
|
|
seen = true
|
|
if !ep.STUNBinding.IsPrivateMapped {
|
|
continue
|
|
}
|
|
states = append(states, sdk.CheckState{
|
|
Status: sdk.StatusCrit,
|
|
Code: "stun_turn.reflexive_public.private",
|
|
Subject: epSubject(ep.Endpoint),
|
|
Message: fmt.Sprintf("server returned a private/loopback IP: %s", ep.STUNBinding.ReflexiveAddr),
|
|
Meta: map[string]any{
|
|
"fix": "Server appears to be behind NAT and unaware of its public IP. Set `external-ip=<public>` (coturn) or the equivalent on your TURN server.",
|
|
},
|
|
})
|
|
}
|
|
if !seen {
|
|
return []sdk.CheckState{skippedState("stun_turn.reflexive_public.skipped", "No successful STUN Binding to evaluate.")}
|
|
}
|
|
if len(states) == 0 {
|
|
return []sdk.CheckState{passState("stun_turn.reflexive_public.ok", "Every reflexive address is public.")}
|
|
}
|
|
return states
|
|
}
|
|
|
|
// stunLatencyRule folds the warningRTT / criticalRTT thresholds the old
|
|
// Collect hard-coded into a dedicated rule.
|
|
type stunLatencyRule struct{}
|
|
|
|
func (r *stunLatencyRule) Name() string { return "stun_turn.stun_latency" }
|
|
func (r *stunLatencyRule) Description() string {
|
|
return "Compares the STUN Binding RTT against the configured warning/critical thresholds."
|
|
}
|
|
|
|
func (r *stunLatencyRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState {
|
|
data, errSt := loadData(ctx, obs)
|
|
if errSt != nil {
|
|
return []sdk.CheckState{*errSt}
|
|
}
|
|
warn := time.Duration(sdk.GetIntOption(opts, "warningRTT", int(data.WarningRTTMs))) * time.Millisecond
|
|
crit := time.Duration(sdk.GetIntOption(opts, "criticalRTT", int(data.CriticalRTT))) * time.Millisecond
|
|
if warn <= 0 {
|
|
warn = 200 * time.Millisecond
|
|
}
|
|
if crit <= 0 {
|
|
crit = 1000 * time.Millisecond
|
|
}
|
|
var states []sdk.CheckState
|
|
seen := false
|
|
for _, ep := range data.Endpoints {
|
|
if !ep.STUNBinding.OK {
|
|
continue
|
|
}
|
|
seen = true
|
|
rtt := time.Duration(ep.STUNBinding.RTTMs) * time.Millisecond
|
|
switch {
|
|
case rtt > crit:
|
|
states = append(states, sdk.CheckState{
|
|
Status: sdk.StatusCrit,
|
|
Code: "stun_turn.stun_latency.critical",
|
|
Subject: epSubject(ep.Endpoint),
|
|
Message: fmt.Sprintf("STUN RTT %dms exceeds critical threshold %dms", ep.STUNBinding.RTTMs, crit.Milliseconds()),
|
|
Meta: map[string]any{"fix": "Server is very slow to respond. Check server load, network path, and consider deploying closer to your users."},
|
|
})
|
|
case rtt > warn:
|
|
states = append(states, sdk.CheckState{
|
|
Status: sdk.StatusWarn,
|
|
Code: "stun_turn.stun_latency.high",
|
|
Subject: epSubject(ep.Endpoint),
|
|
Message: fmt.Sprintf("STUN RTT %dms exceeds warning threshold %dms", ep.STUNBinding.RTTMs, warn.Milliseconds()),
|
|
Meta: map[string]any{"fix": "Latency is high enough to noticeably degrade interactive RTC. Consider a server geographically closer to your users."},
|
|
})
|
|
}
|
|
}
|
|
if !seen {
|
|
return []sdk.CheckState{skippedState("stun_turn.stun_latency.skipped", "No successful STUN Binding to evaluate.")}
|
|
}
|
|
if len(states) == 0 {
|
|
return []sdk.CheckState{passState("stun_turn.stun_latency.ok", "STUN RTT within acceptable thresholds on every endpoint.")}
|
|
}
|
|
return states
|
|
}
|