Initial commit
Adds a happyDomain checker that probes STUN/TURN servers end-to-end:
DNS/SRV discovery, UDP/TCP/TLS/DTLS dial, STUN binding + reflexive-addr
sanity, open-relay detection, authenticated TURN Allocate (long-term
creds or REST-API HMAC), public-relay check, CreatePermission + Send
round-trip through the relay, and optional ChannelBind.
Failing sub-tests carry a remediation string (`Fix`) that the HTML
report surfaces as a yellow headline callout and inline next to each
row. Mapping covers the most common coturn misconfigurations
(external-ip, relay-ip, lt-cred-mech, min-port/max-port, cert issues,
401 nonce drift, 441/442/486/508 allocation errors).
Implements sdk.EndpointDiscoverer (checker/discovery.go): every
stuns:/turns:/DTLS endpoint observed during Collect is published as a
DiscoveredEndpoint{Type: "tls"|"dtls"} so a downstream TLS checker can
verify certificates without re-parsing the observation.
Backed by pion/stun/v3 + pion/turn/v4 + pion/dtls/v3; SDK pinned to a
local replace until the EndpointDiscoverer interface ships in a tagged
release.
This commit is contained in:
commit
6ad7d3f593
29 changed files with 2794 additions and 0 deletions
186
checker/rules_discovery.go
Normal file
186
checker/rules_discovery.go
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
package checker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
sdk "git.happydns.org/checker-sdk-go/checker"
|
||||
)
|
||||
|
||||
// discoveryRule reports the outcome of endpoint discovery (URI parse
|
||||
// or SRV lookup). If Collect recorded a GlobalError, this is where it
|
||||
// surfaces.
|
||||
type discoveryRule struct{}
|
||||
|
||||
func (r *discoveryRule) Name() string { return "stun_turn.discovery" }
|
||||
func (r *discoveryRule) Description() string {
|
||||
return "Verifies that at least one STUN/TURN endpoint could be discovered (explicit URI or SRV lookup)."
|
||||
}
|
||||
|
||||
func (r *discoveryRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
|
||||
data, errSt := loadData(ctx, obs)
|
||||
if errSt != nil {
|
||||
return []sdk.CheckState{*errSt}
|
||||
}
|
||||
if data.GlobalError != "" {
|
||||
return []sdk.CheckState{{
|
||||
Status: sdk.StatusError,
|
||||
Message: data.GlobalError,
|
||||
Code: "stun_turn.discovery.error",
|
||||
}}
|
||||
}
|
||||
if len(data.Endpoints) == 0 {
|
||||
return []sdk.CheckState{{
|
||||
Status: sdk.StatusError,
|
||||
Message: "no endpoints to probe",
|
||||
Code: "stun_turn.discovery.empty",
|
||||
}}
|
||||
}
|
||||
return []sdk.CheckState{passState("stun_turn.discovery.ok",
|
||||
fmt.Sprintf("%d endpoint(s) discovered", len(data.Endpoints)))}
|
||||
}
|
||||
|
||||
// srvStunRule verifies that at least one STUN (non-TURN) SRV/URI endpoint
|
||||
// was obtained. Only meaningful in SRV-discovery mode.
|
||||
type srvStunRule struct{}
|
||||
|
||||
func (r *srvStunRule) Name() string { return "stun_turn.srv_stun" }
|
||||
func (r *srvStunRule) Description() string {
|
||||
return "Verifies that at least one STUN endpoint is reachable via SRV (_stun/_stuns) or an explicit URI."
|
||||
}
|
||||
|
||||
func (r *srvStunRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
|
||||
data, errSt := loadData(ctx, obs)
|
||||
if errSt != nil {
|
||||
return []sdk.CheckState{*errSt}
|
||||
}
|
||||
if data.GlobalError != "" {
|
||||
return []sdk.CheckState{skippedState("stun_turn.srv_stun.skipped",
|
||||
"Discovery failed, SRV coverage could not be evaluated.")}
|
||||
}
|
||||
// Count endpoints whose source indicates STUN SRV (or URI form).
|
||||
var stunCount, turnCount int
|
||||
for _, ep := range data.Endpoints {
|
||||
if ep.Endpoint.IsTURN {
|
||||
turnCount++
|
||||
} else {
|
||||
stunCount++
|
||||
}
|
||||
}
|
||||
if stunCount == 0 && turnCount > 0 {
|
||||
return []sdk.CheckState{{
|
||||
Status: sdk.StatusInfo,
|
||||
Code: "stun_turn.srv_stun.none",
|
||||
Message: "No STUN-only endpoint discovered (TURN endpoints also expose STUN).",
|
||||
}}
|
||||
}
|
||||
if stunCount == 0 {
|
||||
return []sdk.CheckState{{
|
||||
Status: sdk.StatusWarn,
|
||||
Code: "stun_turn.srv_stun.missing",
|
||||
Message: "No STUN endpoint discovered; clients may fail to obtain a reflexive address.",
|
||||
}}
|
||||
}
|
||||
return []sdk.CheckState{passState("stun_turn.srv_stun.ok",
|
||||
fmt.Sprintf("%d STUN endpoint(s) discovered", stunCount))}
|
||||
}
|
||||
|
||||
// srvTurnRule verifies TURN endpoint coverage.
|
||||
type srvTurnRule struct{}
|
||||
|
||||
func (r *srvTurnRule) Name() string { return "stun_turn.srv_turn" }
|
||||
func (r *srvTurnRule) Description() string {
|
||||
return "Verifies that at least one TURN endpoint is reachable via SRV (_turn/_turns) or an explicit URI."
|
||||
}
|
||||
|
||||
func (r *srvTurnRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState {
|
||||
data, errSt := loadData(ctx, obs)
|
||||
if errSt != nil {
|
||||
return []sdk.CheckState{*errSt}
|
||||
}
|
||||
if data.GlobalError != "" {
|
||||
return []sdk.CheckState{skippedState("stun_turn.srv_turn.skipped",
|
||||
"Discovery failed, TURN coverage could not be evaluated.")}
|
||||
}
|
||||
if data.Mode == "stun" {
|
||||
return []sdk.CheckState{skippedState("stun_turn.srv_turn.skipped",
|
||||
"TURN coverage not evaluated (mode=stun).")}
|
||||
}
|
||||
var turnCount int
|
||||
for _, ep := range data.Endpoints {
|
||||
if ep.Endpoint.IsTURN {
|
||||
turnCount++
|
||||
}
|
||||
}
|
||||
if turnCount == 0 {
|
||||
sev := sdk.StatusWarn
|
||||
if data.Mode == "turn" {
|
||||
sev = sdk.StatusCrit
|
||||
}
|
||||
return []sdk.CheckState{{
|
||||
Status: sev,
|
||||
Code: "stun_turn.srv_turn.missing",
|
||||
Message: "No TURN endpoint discovered; clients behind symmetric NAT will have no relay path.",
|
||||
}}
|
||||
}
|
||||
return []sdk.CheckState{passState("stun_turn.srv_turn.ok",
|
||||
fmt.Sprintf("%d TURN endpoint(s) discovered", turnCount))}
|
||||
}
|
||||
|
||||
// dialRule reports connectivity/handshake failures per endpoint.
|
||||
type dialRule struct{}
|
||||
|
||||
func (r *dialRule) Name() string { return "stun_turn.dial" }
|
||||
func (r *dialRule) Description() string {
|
||||
return "Verifies that every discovered endpoint accepts a connection (TCP/TLS handshake or UDP socket)."
|
||||
}
|
||||
|
||||
func (r *dialRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
|
||||
data, errSt := loadData(ctx, obs)
|
||||
if errSt != nil {
|
||||
return []sdk.CheckState{*errSt}
|
||||
}
|
||||
if data.GlobalError != "" || len(data.Endpoints) == 0 {
|
||||
return []sdk.CheckState{skippedState("stun_turn.dial.skipped", "No endpoint to evaluate.")}
|
||||
}
|
||||
var states []sdk.CheckState
|
||||
for _, ep := range data.Endpoints {
|
||||
if ep.Dial.OK {
|
||||
continue
|
||||
}
|
||||
states = append(states, sdk.CheckState{
|
||||
Status: sdk.StatusCrit,
|
||||
Code: "stun_turn.dial.failed",
|
||||
Subject: epSubject(ep.Endpoint),
|
||||
Message: ep.Dial.Error,
|
||||
Meta: map[string]any{"fix": dialFix(ep.Endpoint, ep.Dial.Error)},
|
||||
})
|
||||
}
|
||||
if len(states) == 0 {
|
||||
return []sdk.CheckState{passState("stun_turn.dial.ok", "All discovered endpoints accepted a connection.")}
|
||||
}
|
||||
return states
|
||||
}
|
||||
|
||||
// dialFix mirrors the fix phrasing the old Collect emitted for dial
|
||||
// failures. Kept verbatim so users keep the same remediation guidance.
|
||||
func dialFix(ep Endpoint, errMsg string) string {
|
||||
msg := strings.ToLower(errMsg)
|
||||
switch {
|
||||
case strings.Contains(msg, "no such host"):
|
||||
return fmt.Sprintf("Hostname `%s` does not resolve. Add the matching A/AAAA record (or fix typos in the URI).", ep.Host)
|
||||
case strings.Contains(msg, "tls handshake"), strings.Contains(msg, "x509"):
|
||||
return fmt.Sprintf("TLS handshake failed for `%s`. Reissue the certificate covering this hostname (e.g. via Let's Encrypt) and reload the server (coturn: `cert=` and `pkey=`).", ep.Host)
|
||||
case strings.Contains(msg, "connection refused"):
|
||||
return fmt.Sprintf("Nothing is listening on %s/%d. Start the server with the appropriate listening port (coturn: `listening-port=`/`tls-listening-port=`).", ep.Host, ep.Port)
|
||||
case strings.Contains(msg, "i/o timeout"), strings.Contains(msg, "deadline"):
|
||||
switch ep.Transport {
|
||||
case TransportUDP:
|
||||
return "No reply on UDP. Open the UDP port inbound and verify your network does not block UDP egress."
|
||||
default:
|
||||
return "Connection timed out. A firewall or NAT is likely blocking this port."
|
||||
}
|
||||
}
|
||||
return "Could not establish a connection to the server."
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue