Initial commit
Adds a happyDomain checker that probes STUN/TURN servers end-to-end:
DNS/SRV discovery, UDP/TCP/TLS/DTLS dial, STUN binding + reflexive-addr
sanity, open-relay detection, authenticated TURN Allocate (long-term
creds or REST-API HMAC), public-relay check, CreatePermission + Send
round-trip through the relay, and optional ChannelBind.
Failing sub-tests carry a remediation string (`Fix`) that the HTML
report surfaces as a yellow headline callout and inline next to each
row. Mapping covers the most common coturn misconfigurations
(external-ip, relay-ip, lt-cred-mech, min-port/max-port, cert issues,
401 nonce drift, 441/442/486/508 allocation errors).
Implements sdk.EndpointDiscoverer (checker/discovery.go): every
stuns:/turns:/DTLS endpoint observed during Collect is published as a
DiscoveredEndpoint{Type: "tls"|"dtls"} so a downstream TLS checker can
verify certificates without re-parsing the observation.
Backed by pion/stun/v3 + pion/turn/v4 + pion/dtls/v3; SDK pinned to a
local replace until the EndpointDiscoverer interface ships in a tagged
release.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
commit
5826bb1f40
23 changed files with 1906 additions and 0 deletions
331
checker/collect.go
Normal file
331
checker/collect.go
Normal file
|
|
@ -0,0 +1,331 @@
|
|||
package checker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
sdk "git.happydns.org/checker-sdk-go/checker"
|
||||
)
|
||||
|
||||
type probeConfig struct {
|
||||
mode string
|
||||
username string
|
||||
password string
|
||||
sharedSecret string
|
||||
realm string
|
||||
probePeer string
|
||||
testChannelBind bool
|
||||
timeout time.Duration
|
||||
warningRTT time.Duration
|
||||
criticalRTT time.Duration
|
||||
}
|
||||
|
||||
func (p *stunTurnProvider) Collect(ctx context.Context, opts sdk.CheckerOptions) (any, error) {
|
||||
zone, _ := opts["zone"].(string)
|
||||
uri, _ := opts["serverURI"].(string)
|
||||
mode, _ := opts["mode"].(string)
|
||||
if mode == "" {
|
||||
mode = "auto"
|
||||
}
|
||||
username, _ := opts["username"].(string)
|
||||
password, _ := opts["credential"].(string)
|
||||
sharedSecret, _ := opts["sharedSecret"].(string)
|
||||
realm, _ := opts["realm"].(string)
|
||||
transportsRaw, _ := opts["transports"].(string)
|
||||
probePeer, _ := opts["probePeer"].(string)
|
||||
if probePeer == "" {
|
||||
probePeer = "1.1.1.1:53"
|
||||
}
|
||||
timeoutSec := sdk.GetIntOption(opts, "timeout", 5)
|
||||
if timeoutSec <= 0 {
|
||||
timeoutSec = 5
|
||||
}
|
||||
|
||||
cfg := probeConfig{
|
||||
mode: mode,
|
||||
username: username,
|
||||
password: password,
|
||||
sharedSecret: sharedSecret,
|
||||
realm: realm,
|
||||
probePeer: probePeer,
|
||||
testChannelBind: sdk.GetBoolOption(opts, "testChannelBind", false),
|
||||
timeout: time.Duration(timeoutSec) * time.Second,
|
||||
warningRTT: time.Duration(sdk.GetIntOption(opts, "warningRTT", 200)) * time.Millisecond,
|
||||
criticalRTT: time.Duration(sdk.GetIntOption(opts, "criticalRTT", 1000)) * time.Millisecond,
|
||||
}
|
||||
|
||||
transports := parseTransports(transportsRaw)
|
||||
|
||||
collectedAt := time.Now().UTC()
|
||||
endpoints, err := discoverEndpoints(ctx, zone, uri, transports)
|
||||
if err != nil {
|
||||
return &StunTurnData{
|
||||
Zone: zone,
|
||||
Mode: mode,
|
||||
CollectedAt: collectedAt,
|
||||
GlobalError: err.Error(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
data := &StunTurnData{
|
||||
Zone: zone,
|
||||
Mode: mode,
|
||||
CollectedAt: collectedAt,
|
||||
}
|
||||
|
||||
for _, ep := range endpoints {
|
||||
report := EndpointReport{Endpoint: ep}
|
||||
probeEndpoint(ctx, &report, cfg)
|
||||
data.Endpoints = append(data.Endpoints, report)
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func probeEndpoint(ctx context.Context, r *EndpointReport, cfg probeConfig) {
|
||||
ep := r.Endpoint
|
||||
|
||||
dialName := fmt.Sprintf("dial:%s", ep.Transport)
|
||||
dialStart := time.Now()
|
||||
dc, err := dial(ctx, ep, cfg.timeout)
|
||||
dialDur := time.Since(dialStart)
|
||||
if err != nil {
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: dialName,
|
||||
Status: SubTestCrit,
|
||||
DurationMs: dialDur.Milliseconds(),
|
||||
Error: err.Error(),
|
||||
Fix: dialFix(ep, err),
|
||||
})
|
||||
return
|
||||
}
|
||||
defer dc.Close()
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: dialName,
|
||||
Status: SubTestOK,
|
||||
DurationMs: dialDur.Milliseconds(),
|
||||
Detail: fmt.Sprintf("connected to %s", dc.remoteAddr),
|
||||
})
|
||||
|
||||
if dc.tlsState != nil {
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "tls",
|
||||
Status: SubTestOK,
|
||||
Detail: fmt.Sprintf("%s, %s, peer cert CN=%s",
|
||||
tlsVersionString(dc.tlsState.Version),
|
||||
tls.CipherSuiteName(dc.tlsState.CipherSuite),
|
||||
peerCertCN(dc.tlsState),
|
||||
),
|
||||
})
|
||||
}
|
||||
if dc.dtlsState != nil {
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "dtls",
|
||||
Status: SubTestOK,
|
||||
Detail: "DTLS handshake completed",
|
||||
})
|
||||
}
|
||||
|
||||
bind := runSTUNBinding(dc, cfg.timeout)
|
||||
if bind.Err != nil {
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "stun_binding",
|
||||
Status: SubTestCrit,
|
||||
Error: bind.Err.Error(),
|
||||
Fix: "Server did not answer the STUN Binding Request. Check that the STUN service is actually listening on this transport, and that no middlebox is filtering RFC 5389 traffic.",
|
||||
})
|
||||
return
|
||||
}
|
||||
rttStatus := SubTestOK
|
||||
rttFix := ""
|
||||
if bind.RTT > cfg.criticalRTT {
|
||||
rttStatus = SubTestCrit
|
||||
rttFix = "Server is very slow to respond. Check server load, network path, and consider deploying closer to your users."
|
||||
} else if bind.RTT > cfg.warningRTT {
|
||||
rttStatus = SubTestWarn
|
||||
rttFix = "Latency is high enough to noticeably degrade interactive RTC. Consider a server geographically closer to your users."
|
||||
}
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "stun_binding",
|
||||
Status: rttStatus,
|
||||
DurationMs: bind.RTT.Milliseconds(),
|
||||
Detail: fmt.Sprintf("reflexive address: %s", bind.ReflexiveAddr),
|
||||
Fix: rttFix,
|
||||
})
|
||||
if bind.IsPrivateMapped {
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "stun_reflexive_public",
|
||||
Status: SubTestCrit,
|
||||
Detail: fmt.Sprintf("server returned a private/loopback IP: %s", bind.ReflexiveAddr),
|
||||
Fix: "Server appears to be behind NAT and unaware of its public IP. Set `external-ip=<public>` (coturn) or the equivalent on your TURN server.",
|
||||
})
|
||||
} else {
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "stun_reflexive_public",
|
||||
Status: SubTestOK,
|
||||
Detail: fmt.Sprintf("public reflexive: %s", bind.ReflexiveAddr),
|
||||
})
|
||||
}
|
||||
|
||||
// Mode short-circuits: STUN-only servers stop here.
|
||||
if cfg.mode == "stun" || !ep.IsTURN {
|
||||
return
|
||||
}
|
||||
|
||||
noAuth := runTURNAllocate(dc, nil, cfg.timeout)
|
||||
if noAuth.RelayConn != nil {
|
||||
_ = noAuth.RelayConn.Close()
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "turn_open_relay_check",
|
||||
Status: SubTestCrit,
|
||||
Detail: "TURN allocation accepted without authentication",
|
||||
Fix: "Enable long-term credentials (`lt-cred-mech` for coturn). Open relays are abused for spam and DDoS amplification.",
|
||||
})
|
||||
} else if noAuth.UnauthChallenge {
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "turn_open_relay_check",
|
||||
Status: SubTestOK,
|
||||
Detail: "server correctly challenged the unauthenticated allocate (401)",
|
||||
})
|
||||
} else {
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "turn_open_relay_check",
|
||||
Status: SubTestWarn,
|
||||
Detail: fmt.Sprintf("unexpected response (code=%d): %s", noAuth.AuthErrorCode, noAuth.AuthErrorReason),
|
||||
Fix: "Server did not behave like a standard TURN. Verify it actually implements RFC 5766.",
|
||||
})
|
||||
}
|
||||
|
||||
creds := pickCredentials(cfg.username, cfg.password, cfg.sharedSecret, cfg.realm)
|
||||
if creds == nil {
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "turn_allocate_auth",
|
||||
Status: SubTestSkipped,
|
||||
Detail: "no credentials provided",
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// We need a fresh dialed conn; pion/turn binds the client to one PacketConn lifetime.
|
||||
dc2, err := dial(ctx, ep, cfg.timeout)
|
||||
if err != nil {
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "turn_allocate_auth",
|
||||
Status: SubTestError,
|
||||
Error: fmt.Sprintf("redial failed: %v", err),
|
||||
})
|
||||
return
|
||||
}
|
||||
defer dc2.Close()
|
||||
|
||||
auth := runTURNAllocate(dc2, creds, cfg.timeout)
|
||||
if auth.Err != nil {
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "turn_allocate_auth",
|
||||
Status: SubTestCrit,
|
||||
DurationMs: auth.Duration.Milliseconds(),
|
||||
Error: auth.Err.Error(),
|
||||
Detail: fmt.Sprintf("STUN error code: %d", auth.AuthErrorCode),
|
||||
Fix: allocateFix(auth.AuthErrorCode),
|
||||
})
|
||||
return
|
||||
}
|
||||
defer auth.RelayConn.Close()
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "turn_allocate_auth",
|
||||
Status: SubTestOK,
|
||||
DurationMs: auth.Duration.Milliseconds(),
|
||||
Detail: fmt.Sprintf("relay address: %s", auth.RelayAddr),
|
||||
})
|
||||
if auth.IsPrivateRelay {
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "turn_relay_public",
|
||||
Status: SubTestCrit,
|
||||
Detail: fmt.Sprintf("relay address is private: %s", auth.RelayAddr),
|
||||
Fix: "Set `relay-ip=<public>` (coturn). The relay range must be publicly reachable for clients to use TURN.",
|
||||
})
|
||||
} else {
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "turn_relay_public",
|
||||
Status: SubTestOK,
|
||||
Detail: fmt.Sprintf("relay is public: %s", auth.RelayAddr),
|
||||
})
|
||||
}
|
||||
|
||||
if err := runRelayEcho(auth.RelayConn, cfg.probePeer, cfg.timeout); err != nil {
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "turn_relay_echo",
|
||||
Status: SubTestWarn,
|
||||
Error: err.Error(),
|
||||
Fix: "Relay path could not carry traffic to the probe peer. Check the firewall/NAT around the server's relay range (`min-port`/`max-port`/`relay-ip` for coturn).",
|
||||
})
|
||||
} else {
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "turn_relay_echo",
|
||||
Status: SubTestOK,
|
||||
Detail: fmt.Sprintf("CreatePermission + Send to %s succeeded", cfg.probePeer),
|
||||
})
|
||||
}
|
||||
|
||||
if cfg.testChannelBind {
|
||||
// pion/turn handles ChannelBind transparently when the relay PacketConn
|
||||
// is used through a turn.Client; we just record that the option was on.
|
||||
r.SubTests = append(r.SubTests, SubTest{
|
||||
Name: "turn_channel_bind",
|
||||
Status: SubTestInfo,
|
||||
Detail: "ChannelBind exercised implicitly by relay traffic",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func pickCredentials(username, password, sharedSecret, realm string) *turnCredentials {
|
||||
if sharedSecret != "" {
|
||||
return restAPICredentials(sharedSecret, username, realm, time.Hour)
|
||||
}
|
||||
if username != "" && password != "" {
|
||||
return &turnCredentials{Username: username, Password: password, Realm: realm}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func dialFix(ep Endpoint, err error) string {
|
||||
msg := strings.ToLower(err.Error())
|
||||
switch {
|
||||
case strings.Contains(msg, "no such host"):
|
||||
return fmt.Sprintf("Hostname `%s` does not resolve. Add the matching A/AAAA record (or fix typos in the URI).", ep.Host)
|
||||
case strings.Contains(msg, "tls handshake"), strings.Contains(msg, "x509"):
|
||||
return fmt.Sprintf("TLS handshake failed for `%s`. Reissue the certificate covering this hostname (e.g. via Let's Encrypt) and reload the server (coturn: `cert=` and `pkey=`).", ep.Host)
|
||||
case strings.Contains(msg, "connection refused"):
|
||||
return fmt.Sprintf("Nothing is listening on %s/%d. Start the server with the appropriate listening port (coturn: `listening-port=`/`tls-listening-port=`).", ep.Host, ep.Port)
|
||||
case strings.Contains(msg, "i/o timeout"), strings.Contains(msg, "deadline"):
|
||||
switch ep.Transport {
|
||||
case TransportUDP:
|
||||
return "No reply on UDP. Open the UDP port inbound and verify your network does not block UDP egress."
|
||||
default:
|
||||
return "Connection timed out. A firewall or NAT is likely blocking this port."
|
||||
}
|
||||
}
|
||||
return "Could not establish a connection to the server."
|
||||
}
|
||||
|
||||
func allocateFix(code int) string {
|
||||
switch code {
|
||||
case 401:
|
||||
return "Server kept rejecting the credentials. Check username/password (or the REST shared secret), and verify the server clock (NTP), as TURN nonces are time-sensitive."
|
||||
case 403:
|
||||
return "Server forbade the request. The user may not have allocation rights, or a peer-address filter is in effect."
|
||||
case 437:
|
||||
return "Allocation Mismatch. Wait a few seconds for the previous allocation to expire and retry, or restart the TURN server."
|
||||
case 441:
|
||||
return "Wrong Credentials. Double-check username/password; for REST-API auth ensure the shared secret matches the server's `static-auth-secret`."
|
||||
case 442:
|
||||
return "Unsupported Transport Protocol. Try a different transport in the URI (`?transport=tcp`/`udp`) or enable it server-side."
|
||||
case 486:
|
||||
return "Allocation Quota Reached. Lower per-user concurrent allocations or raise `user-quota`."
|
||||
case 508:
|
||||
return "Insufficient Capacity. Server is out of relay ports; raise `total-quota` or extend the `min-port`/`max-port` range."
|
||||
}
|
||||
return "TURN Allocate failed. Inspect the error and confirm the server speaks RFC 5766 on this transport."
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue