Adds a happyDomain checker that probes STUN/TURN servers end-to-end:
DNS/SRV discovery, UDP/TCP/TLS/DTLS dial, STUN binding + reflexive-addr
sanity, open-relay detection, authenticated TURN Allocate (long-term
creds or REST-API HMAC), public-relay check, CreatePermission + Send
round-trip through the relay, and optional ChannelBind.
Failing sub-tests carry a remediation string (`Fix`) that the HTML
report surfaces as a yellow headline callout and inline next to each
row. Mapping covers the most common coturn misconfigurations
(external-ip, relay-ip, lt-cred-mech, min-port/max-port, cert issues,
401 nonce drift, 441/442/486/508 allocation errors).
Implements sdk.EndpointDiscoverer (checker/discovery.go): every
stuns:/turns:/DTLS endpoint observed during Collect is published as a
DiscoveredEndpoint{Type: "tls"|"dtls"} so a downstream TLS checker can
verify certificates without re-parsing the observation.
Backed by pion/stun/v3 + pion/turn/v4 + pion/dtls/v3; SDK pinned to a
local replace until the EndpointDiscoverer interface ships in a tagged
release.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
331 lines
11 KiB
Go
331 lines
11 KiB
Go
package checker
|
|
|
|
import (
|
|
"context"
|
|
"crypto/tls"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
sdk "git.happydns.org/checker-sdk-go/checker"
|
|
)
|
|
|
|
type probeConfig struct {
|
|
mode string
|
|
username string
|
|
password string
|
|
sharedSecret string
|
|
realm string
|
|
probePeer string
|
|
testChannelBind bool
|
|
timeout time.Duration
|
|
warningRTT time.Duration
|
|
criticalRTT time.Duration
|
|
}
|
|
|
|
func (p *stunTurnProvider) Collect(ctx context.Context, opts sdk.CheckerOptions) (any, error) {
|
|
zone, _ := opts["zone"].(string)
|
|
uri, _ := opts["serverURI"].(string)
|
|
mode, _ := opts["mode"].(string)
|
|
if mode == "" {
|
|
mode = "auto"
|
|
}
|
|
username, _ := opts["username"].(string)
|
|
password, _ := opts["credential"].(string)
|
|
sharedSecret, _ := opts["sharedSecret"].(string)
|
|
realm, _ := opts["realm"].(string)
|
|
transportsRaw, _ := opts["transports"].(string)
|
|
probePeer, _ := opts["probePeer"].(string)
|
|
if probePeer == "" {
|
|
probePeer = "1.1.1.1:53"
|
|
}
|
|
timeoutSec := sdk.GetIntOption(opts, "timeout", 5)
|
|
if timeoutSec <= 0 {
|
|
timeoutSec = 5
|
|
}
|
|
|
|
cfg := probeConfig{
|
|
mode: mode,
|
|
username: username,
|
|
password: password,
|
|
sharedSecret: sharedSecret,
|
|
realm: realm,
|
|
probePeer: probePeer,
|
|
testChannelBind: sdk.GetBoolOption(opts, "testChannelBind", false),
|
|
timeout: time.Duration(timeoutSec) * time.Second,
|
|
warningRTT: time.Duration(sdk.GetIntOption(opts, "warningRTT", 200)) * time.Millisecond,
|
|
criticalRTT: time.Duration(sdk.GetIntOption(opts, "criticalRTT", 1000)) * time.Millisecond,
|
|
}
|
|
|
|
transports := parseTransports(transportsRaw)
|
|
|
|
collectedAt := time.Now().UTC()
|
|
endpoints, err := discoverEndpoints(ctx, zone, uri, transports)
|
|
if err != nil {
|
|
return &StunTurnData{
|
|
Zone: zone,
|
|
Mode: mode,
|
|
CollectedAt: collectedAt,
|
|
GlobalError: err.Error(),
|
|
}, nil
|
|
}
|
|
|
|
data := &StunTurnData{
|
|
Zone: zone,
|
|
Mode: mode,
|
|
CollectedAt: collectedAt,
|
|
}
|
|
|
|
for _, ep := range endpoints {
|
|
report := EndpointReport{Endpoint: ep}
|
|
probeEndpoint(ctx, &report, cfg)
|
|
data.Endpoints = append(data.Endpoints, report)
|
|
}
|
|
return data, nil
|
|
}
|
|
|
|
func probeEndpoint(ctx context.Context, r *EndpointReport, cfg probeConfig) {
|
|
ep := r.Endpoint
|
|
|
|
dialName := fmt.Sprintf("dial:%s", ep.Transport)
|
|
dialStart := time.Now()
|
|
dc, err := dial(ctx, ep, cfg.timeout)
|
|
dialDur := time.Since(dialStart)
|
|
if err != nil {
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: dialName,
|
|
Status: SubTestCrit,
|
|
DurationMs: dialDur.Milliseconds(),
|
|
Error: err.Error(),
|
|
Fix: dialFix(ep, err),
|
|
})
|
|
return
|
|
}
|
|
defer dc.Close()
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: dialName,
|
|
Status: SubTestOK,
|
|
DurationMs: dialDur.Milliseconds(),
|
|
Detail: fmt.Sprintf("connected to %s", dc.remoteAddr),
|
|
})
|
|
|
|
if dc.tlsState != nil {
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "tls",
|
|
Status: SubTestOK,
|
|
Detail: fmt.Sprintf("%s, %s, peer cert CN=%s",
|
|
tlsVersionString(dc.tlsState.Version),
|
|
tls.CipherSuiteName(dc.tlsState.CipherSuite),
|
|
peerCertCN(dc.tlsState),
|
|
),
|
|
})
|
|
}
|
|
if dc.dtlsState != nil {
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "dtls",
|
|
Status: SubTestOK,
|
|
Detail: "DTLS handshake completed",
|
|
})
|
|
}
|
|
|
|
bind := runSTUNBinding(dc, cfg.timeout)
|
|
if bind.Err != nil {
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "stun_binding",
|
|
Status: SubTestCrit,
|
|
Error: bind.Err.Error(),
|
|
Fix: "Server did not answer the STUN Binding Request. Check that the STUN service is actually listening on this transport, and that no middlebox is filtering RFC 5389 traffic.",
|
|
})
|
|
return
|
|
}
|
|
rttStatus := SubTestOK
|
|
rttFix := ""
|
|
if bind.RTT > cfg.criticalRTT {
|
|
rttStatus = SubTestCrit
|
|
rttFix = "Server is very slow to respond. Check server load, network path, and consider deploying closer to your users."
|
|
} else if bind.RTT > cfg.warningRTT {
|
|
rttStatus = SubTestWarn
|
|
rttFix = "Latency is high enough to noticeably degrade interactive RTC. Consider a server geographically closer to your users."
|
|
}
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "stun_binding",
|
|
Status: rttStatus,
|
|
DurationMs: bind.RTT.Milliseconds(),
|
|
Detail: fmt.Sprintf("reflexive address: %s", bind.ReflexiveAddr),
|
|
Fix: rttFix,
|
|
})
|
|
if bind.IsPrivateMapped {
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "stun_reflexive_public",
|
|
Status: SubTestCrit,
|
|
Detail: fmt.Sprintf("server returned a private/loopback IP: %s", bind.ReflexiveAddr),
|
|
Fix: "Server appears to be behind NAT and unaware of its public IP. Set `external-ip=<public>` (coturn) or the equivalent on your TURN server.",
|
|
})
|
|
} else {
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "stun_reflexive_public",
|
|
Status: SubTestOK,
|
|
Detail: fmt.Sprintf("public reflexive: %s", bind.ReflexiveAddr),
|
|
})
|
|
}
|
|
|
|
// Mode short-circuits: STUN-only servers stop here.
|
|
if cfg.mode == "stun" || !ep.IsTURN {
|
|
return
|
|
}
|
|
|
|
noAuth := runTURNAllocate(dc, nil, cfg.timeout)
|
|
if noAuth.RelayConn != nil {
|
|
_ = noAuth.RelayConn.Close()
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "turn_open_relay_check",
|
|
Status: SubTestCrit,
|
|
Detail: "TURN allocation accepted without authentication",
|
|
Fix: "Enable long-term credentials (`lt-cred-mech` for coturn). Open relays are abused for spam and DDoS amplification.",
|
|
})
|
|
} else if noAuth.UnauthChallenge {
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "turn_open_relay_check",
|
|
Status: SubTestOK,
|
|
Detail: "server correctly challenged the unauthenticated allocate (401)",
|
|
})
|
|
} else {
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "turn_open_relay_check",
|
|
Status: SubTestWarn,
|
|
Detail: fmt.Sprintf("unexpected response (code=%d): %s", noAuth.AuthErrorCode, noAuth.AuthErrorReason),
|
|
Fix: "Server did not behave like a standard TURN. Verify it actually implements RFC 5766.",
|
|
})
|
|
}
|
|
|
|
creds := pickCredentials(cfg.username, cfg.password, cfg.sharedSecret, cfg.realm)
|
|
if creds == nil {
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "turn_allocate_auth",
|
|
Status: SubTestSkipped,
|
|
Detail: "no credentials provided",
|
|
})
|
|
return
|
|
}
|
|
|
|
// We need a fresh dialed conn; pion/turn binds the client to one PacketConn lifetime.
|
|
dc2, err := dial(ctx, ep, cfg.timeout)
|
|
if err != nil {
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "turn_allocate_auth",
|
|
Status: SubTestError,
|
|
Error: fmt.Sprintf("redial failed: %v", err),
|
|
})
|
|
return
|
|
}
|
|
defer dc2.Close()
|
|
|
|
auth := runTURNAllocate(dc2, creds, cfg.timeout)
|
|
if auth.Err != nil {
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "turn_allocate_auth",
|
|
Status: SubTestCrit,
|
|
DurationMs: auth.Duration.Milliseconds(),
|
|
Error: auth.Err.Error(),
|
|
Detail: fmt.Sprintf("STUN error code: %d", auth.AuthErrorCode),
|
|
Fix: allocateFix(auth.AuthErrorCode),
|
|
})
|
|
return
|
|
}
|
|
defer auth.RelayConn.Close()
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "turn_allocate_auth",
|
|
Status: SubTestOK,
|
|
DurationMs: auth.Duration.Milliseconds(),
|
|
Detail: fmt.Sprintf("relay address: %s", auth.RelayAddr),
|
|
})
|
|
if auth.IsPrivateRelay {
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "turn_relay_public",
|
|
Status: SubTestCrit,
|
|
Detail: fmt.Sprintf("relay address is private: %s", auth.RelayAddr),
|
|
Fix: "Set `relay-ip=<public>` (coturn). The relay range must be publicly reachable for clients to use TURN.",
|
|
})
|
|
} else {
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "turn_relay_public",
|
|
Status: SubTestOK,
|
|
Detail: fmt.Sprintf("relay is public: %s", auth.RelayAddr),
|
|
})
|
|
}
|
|
|
|
if err := runRelayEcho(auth.RelayConn, cfg.probePeer, cfg.timeout); err != nil {
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "turn_relay_echo",
|
|
Status: SubTestWarn,
|
|
Error: err.Error(),
|
|
Fix: "Relay path could not carry traffic to the probe peer. Check the firewall/NAT around the server's relay range (`min-port`/`max-port`/`relay-ip` for coturn).",
|
|
})
|
|
} else {
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "turn_relay_echo",
|
|
Status: SubTestOK,
|
|
Detail: fmt.Sprintf("CreatePermission + Send to %s succeeded", cfg.probePeer),
|
|
})
|
|
}
|
|
|
|
if cfg.testChannelBind {
|
|
// pion/turn handles ChannelBind transparently when the relay PacketConn
|
|
// is used through a turn.Client; we just record that the option was on.
|
|
r.SubTests = append(r.SubTests, SubTest{
|
|
Name: "turn_channel_bind",
|
|
Status: SubTestInfo,
|
|
Detail: "ChannelBind exercised implicitly by relay traffic",
|
|
})
|
|
}
|
|
}
|
|
|
|
func pickCredentials(username, password, sharedSecret, realm string) *turnCredentials {
|
|
if sharedSecret != "" {
|
|
return restAPICredentials(sharedSecret, username, realm, time.Hour)
|
|
}
|
|
if username != "" && password != "" {
|
|
return &turnCredentials{Username: username, Password: password, Realm: realm}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func dialFix(ep Endpoint, err error) string {
|
|
msg := strings.ToLower(err.Error())
|
|
switch {
|
|
case strings.Contains(msg, "no such host"):
|
|
return fmt.Sprintf("Hostname `%s` does not resolve. Add the matching A/AAAA record (or fix typos in the URI).", ep.Host)
|
|
case strings.Contains(msg, "tls handshake"), strings.Contains(msg, "x509"):
|
|
return fmt.Sprintf("TLS handshake failed for `%s`. Reissue the certificate covering this hostname (e.g. via Let's Encrypt) and reload the server (coturn: `cert=` and `pkey=`).", ep.Host)
|
|
case strings.Contains(msg, "connection refused"):
|
|
return fmt.Sprintf("Nothing is listening on %s/%d. Start the server with the appropriate listening port (coturn: `listening-port=`/`tls-listening-port=`).", ep.Host, ep.Port)
|
|
case strings.Contains(msg, "i/o timeout"), strings.Contains(msg, "deadline"):
|
|
switch ep.Transport {
|
|
case TransportUDP:
|
|
return "No reply on UDP. Open the UDP port inbound and verify your network does not block UDP egress."
|
|
default:
|
|
return "Connection timed out. A firewall or NAT is likely blocking this port."
|
|
}
|
|
}
|
|
return "Could not establish a connection to the server."
|
|
}
|
|
|
|
func allocateFix(code int) string {
|
|
switch code {
|
|
case 401:
|
|
return "Server kept rejecting the credentials. Check username/password (or the REST shared secret), and verify the server clock (NTP), as TURN nonces are time-sensitive."
|
|
case 403:
|
|
return "Server forbade the request. The user may not have allocation rights, or a peer-address filter is in effect."
|
|
case 437:
|
|
return "Allocation Mismatch. Wait a few seconds for the previous allocation to expire and retry, or restart the TURN server."
|
|
case 441:
|
|
return "Wrong Credentials. Double-check username/password; for REST-API auth ensure the shared secret matches the server's `static-auth-secret`."
|
|
case 442:
|
|
return "Unsupported Transport Protocol. Try a different transport in the URI (`?transport=tcp`/`udp`) or enable it server-side."
|
|
case 486:
|
|
return "Allocation Quota Reached. Lower per-user concurrent allocations or raise `user-quota`."
|
|
case 508:
|
|
return "Insufficient Capacity. Server is out of relay ports; raise `total-quota` or extend the `min-port`/`max-port` range."
|
|
}
|
|
return "TURN Allocate failed. Inspect the error and confirm the server speaks RFC 5766 on this transport."
|
|
}
|