checker-stun-turn/checker/rules_discovery.go
Pierre-Olivier Mercier 7c7706fe3f Initial commit
Adds a happyDomain checker that probes STUN/TURN servers end-to-end:
DNS/SRV discovery, UDP/TCP/TLS/DTLS dial, STUN binding + reflexive-addr
sanity, open-relay detection, authenticated TURN Allocate (long-term
creds or REST-API HMAC), public-relay check, CreatePermission + Send
round-trip through the relay, and optional ChannelBind.

Failing sub-tests carry a remediation string (`Fix`) that the HTML
report surfaces as a yellow headline callout and inline next to each
row. Mapping covers the most common coturn misconfigurations
(external-ip, relay-ip, lt-cred-mech, min-port/max-port, cert issues,
401 nonce drift, 441/442/486/508 allocation errors).

Implements sdk.EndpointDiscoverer (checker/discovery.go): every
stuns:/turns:/DTLS endpoint observed during Collect is published as a
DiscoveredEndpoint{Type: "tls"|"dtls"} so a downstream TLS checker can
verify certificates without re-parsing the observation.

Backed by pion/stun/v3 + pion/turn/v4 + pion/dtls/v3; SDK pinned to a
local replace until the EndpointDiscoverer interface ships in a tagged
release.
2026-04-26 19:55:05 +07:00

186 lines
6.4 KiB
Go

package checker
import (
"context"
"fmt"
"strings"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// discoveryRule reports the outcome of endpoint discovery (URI parse
// or SRV lookup). If Collect recorded a GlobalError, this is where it
// surfaces.
type discoveryRule struct{}
func (r *discoveryRule) Name() string { return "stun_turn.discovery" }
func (r *discoveryRule) Description() string {
return "Verifies that at least one STUN/TURN endpoint could be discovered (explicit URI or SRV lookup)."
}
func (r *discoveryRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if data.GlobalError != "" {
return []sdk.CheckState{{
Status: sdk.StatusError,
Message: data.GlobalError,
Code: "stun_turn.discovery.error",
}}
}
if len(data.Endpoints) == 0 {
return []sdk.CheckState{{
Status: sdk.StatusError,
Message: "no endpoints to probe",
Code: "stun_turn.discovery.empty",
}}
}
return []sdk.CheckState{passState("stun_turn.discovery.ok",
fmt.Sprintf("%d endpoint(s) discovered", len(data.Endpoints)))}
}
// srvStunRule verifies that at least one STUN (non-TURN) SRV/URI endpoint
// was obtained. Only meaningful in SRV-discovery mode.
type srvStunRule struct{}
func (r *srvStunRule) Name() string { return "stun_turn.srv_stun" }
func (r *srvStunRule) Description() string {
return "Verifies that at least one STUN endpoint is reachable via SRV (_stun/_stuns) or an explicit URI."
}
func (r *srvStunRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if data.GlobalError != "" {
return []sdk.CheckState{skippedState("stun_turn.srv_stun.skipped",
"Discovery failed, SRV coverage could not be evaluated.")}
}
// Count endpoints whose source indicates STUN SRV (or URI form).
var stunCount, turnCount int
for _, ep := range data.Endpoints {
if ep.Endpoint.IsTURN {
turnCount++
} else {
stunCount++
}
}
if stunCount == 0 && turnCount > 0 {
return []sdk.CheckState{{
Status: sdk.StatusInfo,
Code: "stun_turn.srv_stun.none",
Message: "No STUN-only endpoint discovered (TURN endpoints also expose STUN).",
}}
}
if stunCount == 0 {
return []sdk.CheckState{{
Status: sdk.StatusWarn,
Code: "stun_turn.srv_stun.missing",
Message: "No STUN endpoint discovered; clients may fail to obtain a reflexive address.",
}}
}
return []sdk.CheckState{passState("stun_turn.srv_stun.ok",
fmt.Sprintf("%d STUN endpoint(s) discovered", stunCount))}
}
// srvTurnRule verifies TURN endpoint coverage.
type srvTurnRule struct{}
func (r *srvTurnRule) Name() string { return "stun_turn.srv_turn" }
func (r *srvTurnRule) Description() string {
return "Verifies that at least one TURN endpoint is reachable via SRV (_turn/_turns) or an explicit URI."
}
func (r *srvTurnRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if data.GlobalError != "" {
return []sdk.CheckState{skippedState("stun_turn.srv_turn.skipped",
"Discovery failed, TURN coverage could not be evaluated.")}
}
if data.Mode == "stun" {
return []sdk.CheckState{skippedState("stun_turn.srv_turn.skipped",
"TURN coverage not evaluated (mode=stun).")}
}
var turnCount int
for _, ep := range data.Endpoints {
if ep.Endpoint.IsTURN {
turnCount++
}
}
if turnCount == 0 {
sev := sdk.StatusWarn
if data.Mode == "turn" {
sev = sdk.StatusCrit
}
return []sdk.CheckState{{
Status: sev,
Code: "stun_turn.srv_turn.missing",
Message: "No TURN endpoint discovered; clients behind symmetric NAT will have no relay path.",
}}
}
return []sdk.CheckState{passState("stun_turn.srv_turn.ok",
fmt.Sprintf("%d TURN endpoint(s) discovered", turnCount))}
}
// dialRule reports connectivity/handshake failures per endpoint.
type dialRule struct{}
func (r *dialRule) Name() string { return "stun_turn.dial" }
func (r *dialRule) Description() string {
return "Verifies that every discovered endpoint accepts a connection (TCP/TLS handshake or UDP socket)."
}
func (r *dialRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if data.GlobalError != "" || len(data.Endpoints) == 0 {
return []sdk.CheckState{skippedState("stun_turn.dial.skipped", "No endpoint to evaluate.")}
}
var states []sdk.CheckState
for _, ep := range data.Endpoints {
if ep.Dial.OK {
continue
}
states = append(states, sdk.CheckState{
Status: sdk.StatusCrit,
Code: "stun_turn.dial.failed",
Subject: epSubject(ep.Endpoint),
Message: ep.Dial.Error,
Meta: map[string]any{"fix": dialFix(ep.Endpoint, ep.Dial.Error)},
})
}
if len(states) == 0 {
return []sdk.CheckState{passState("stun_turn.dial.ok", "All discovered endpoints accepted a connection.")}
}
return states
}
// dialFix mirrors the fix phrasing the old Collect emitted for dial
// failures. Kept verbatim so users keep the same remediation guidance.
func dialFix(ep Endpoint, errMsg string) string {
msg := strings.ToLower(errMsg)
switch {
case strings.Contains(msg, "no such host"):
return fmt.Sprintf("Hostname `%s` does not resolve. Add the matching A/AAAA record (or fix typos in the URI).", ep.Host)
case strings.Contains(msg, "tls handshake"), strings.Contains(msg, "x509"):
return fmt.Sprintf("TLS handshake failed for `%s`. Reissue the certificate covering this hostname (e.g. via Let's Encrypt) and reload the server (coturn: `cert=` and `pkey=`).", ep.Host)
case strings.Contains(msg, "connection refused"):
return fmt.Sprintf("Nothing is listening on %s/%d. Start the server with the appropriate listening port (coturn: `listening-port=`/`tls-listening-port=`).", ep.Host, ep.Port)
case strings.Contains(msg, "i/o timeout"), strings.Contains(msg, "deadline"):
switch ep.Transport {
case TransportUDP:
return "No reply on UDP. Open the UDP port inbound and verify your network does not block UDP egress."
default:
return "Connection timed out. A firewall or NAT is likely blocking this port."
}
}
return "Could not establish a connection to the server."
}