package checker import ( "bytes" "context" "crypto/tls" "errors" "fmt" "net" "slices" "strconv" "strings" "sync" "time" "github.com/miekg/dns" sdk "git.happydns.org/checker-sdk-go/checker" ) // Collect runs the full SIP probe against a domain. func (p *sipProvider) Collect(ctx context.Context, opts sdk.CheckerOptions) (any, error) { domain, _ := sdk.GetOption[string](opts, "domain") domain = strings.TrimSuffix(strings.TrimSpace(domain), ".") if domain == "" { return nil, fmt.Errorf("domain is required") } timeoutSecs := sdk.GetFloatOption(opts, "timeout", 5) if timeoutSecs < 1 { timeoutSecs = 5 } perEndpoint := time.Duration(timeoutSecs * float64(time.Second)) probeUDP := sdk.GetBoolOption(opts, "probeUDP", true) probeTCP := sdk.GetBoolOption(opts, "probeTCP", true) probeTLS := sdk.GetBoolOption(opts, "probeTLS", true) data := &SIPData{ Domain: domain, RunAt: time.Now().UTC().Format(time.RFC3339), SRV: SRVLookup{Errors: map[string]string{}}, } resolver := net.DefaultResolver // NAPTR lookup — best-effort, failures become an info issue. if naptr, err := lookupNAPTR(ctx, domain); err != nil { data.SRV.Errors["naptr"] = err.Error() } else { data.NAPTR = naptr } // SRV lookups (per transport). Errors are kept per-prefix; "not // found" is normalised to nil by lookupSRV. type srvSet struct { prefix string want bool dst *[]SRVRecord } sets := []srvSet{ {"_sip._udp.", probeUDP, &data.SRV.UDP}, {"_sip._tcp.", probeTCP, &data.SRV.TCP}, {"_sips._tcp.", probeTLS, &data.SRV.SIPS}, } for _, s := range sets { if !s.want { continue } recs, err := lookupSRV(ctx, resolver, s.prefix, domain) if err != nil { data.SRV.Errors[s.prefix] = err.Error() continue } *s.dst = recs } // Fallback when no SRV at all: synthesize a single target on each // enabled transport against the bare domain. total := len(data.SRV.UDP) + len(data.SRV.TCP) + len(data.SRV.SIPS) if total == 0 { data.SRV.FallbackProbed = true if probeUDP { data.SRV.UDP = []SRVRecord{{Target: domain, Port: 5060}} } if probeTCP { data.SRV.TCP = []SRVRecord{{Target: domain, Port: 5060}} } if probeTLS { data.SRV.SIPS = []SRVRecord{{Target: domain, Port: 5061}} } } type transportJob struct { records []SRVRecord prefix string t Transport } jobs := []transportJob{ {data.SRV.UDP, "_sip._udp.", TransportUDP}, {data.SRV.TCP, "_sip._tcp.", TransportTCP}, {data.SRV.SIPS, "_sips._tcp.", TransportTLS}, } var wg sync.WaitGroup var mu sync.Mutex for _, job := range jobs { wg.Add(1) go func(j transportJob) { defer wg.Done() resolveAllInto(ctx, resolver, j.records) eps := probeSet(ctx, j.prefix, j.t, j.records, perEndpoint) mu.Lock() data.Endpoints = append(data.Endpoints, eps...) mu.Unlock() }(job) } wg.Wait() computeCoverage(data) data.Issues = deriveIssues(data, probeUDP, probeTCP, probeTLS) return data, nil } // ─── DNS ────────────────────────────────────────────────────────────── func lookupSRV(ctx context.Context, r *net.Resolver, prefix, domain string) ([]SRVRecord, error) { name := prefix + dns.Fqdn(domain) _, records, err := r.LookupSRV(ctx, "", "", name) if err != nil { var dnsErr *net.DNSError if errors.As(err, &dnsErr) && dnsErr.IsNotFound { return nil, nil } return nil, err } // RFC 2782 null-target: single "." record with port 0 means // "service explicitly unavailable". if len(records) == 1 && (records[0].Target == "." || records[0].Target == "") && records[0].Port == 0 { return nil, nil } out := make([]SRVRecord, 0, len(records)) for _, r := range records { out = append(out, SRVRecord{ Target: strings.TrimSuffix(r.Target, "."), Port: r.Port, Priority: r.Priority, Weight: r.Weight, }) } return out, nil } func lookupNAPTR(ctx context.Context, domain string) ([]NAPTRRecord, error) { cfg, err := dns.ClientConfigFromFile("/etc/resolv.conf") if err != nil || cfg == nil || len(cfg.Servers) == 0 { cfg = &dns.ClientConfig{Servers: []string{"1.1.1.1", "8.8.8.8"}, Port: "53"} } m := new(dns.Msg) m.SetQuestion(dns.Fqdn(domain), dns.TypeNAPTR) m.RecursionDesired = true c := new(dns.Client) c.Timeout = 3 * time.Second var lastErr error for _, srv := range cfg.Servers { addr := net.JoinHostPort(srv, cfg.Port) in, _, err := c.ExchangeContext(ctx, m, addr) if err != nil { lastErr = err continue } if in.Rcode == dns.RcodeNameError { return nil, nil } if in.Rcode != dns.RcodeSuccess { lastErr = fmt.Errorf("rcode %s", dns.RcodeToString[in.Rcode]) continue } var out []NAPTRRecord for _, rr := range in.Answer { n, ok := rr.(*dns.NAPTR) if !ok { continue } if !strings.HasPrefix(strings.ToUpper(n.Service), "SIP+") && !strings.HasPrefix(strings.ToUpper(n.Service), "SIPS+") { continue } out = append(out, NAPTRRecord{ Service: n.Service, Regexp: n.Regexp, Replacement: strings.TrimSuffix(n.Replacement, "."), Flags: n.Flags, Order: n.Order, Preference: n.Preference, }) } return out, nil } return nil, lastErr } func resolveAllInto(ctx context.Context, r *net.Resolver, records []SRVRecord) { for i := range records { ips, err := r.LookupIPAddr(ctx, records[i].Target) if err != nil { continue } for _, ip := range ips { if v4 := ip.IP.To4(); v4 != nil { records[i].IPv4 = append(records[i].IPv4, v4.String()) } else { records[i].IPv6 = append(records[i].IPv6, ip.IP.String()) } } } } // ─── Probing ────────────────────────────────────────────────────────── func probeSet(ctx context.Context, prefix string, t Transport, records []SRVRecord, timeout time.Duration) []EndpointProbe { var eps []EndpointProbe for _, rec := range records { addrs := allAddrs(rec) if len(addrs) == 0 { eps = append(eps, EndpointProbe{ Transport: t, SRVPrefix: prefix, Target: rec.Target, Port: rec.Port, Error: "no A/AAAA records for target", }) continue } for _, a := range addrs { eps = append(eps, probeEndpoint(ctx, t, prefix, rec, a, timeout)) } } return eps } type probeAddr struct { ip string isV6 bool } func allAddrs(r SRVRecord) []probeAddr { out := make([]probeAddr, 0, len(r.IPv4)+len(r.IPv6)) for _, ip := range r.IPv4 { out = append(out, probeAddr{ip: ip, isV6: false}) } for _, ip := range r.IPv6 { out = append(out, probeAddr{ip: ip, isV6: true}) } return out } func probeEndpoint(ctx context.Context, t Transport, prefix string, rec SRVRecord, a probeAddr, timeout time.Duration) (ep EndpointProbe) { start := time.Now() addrPort := net.JoinHostPort(a.ip, strconv.Itoa(int(rec.Port))) ep = EndpointProbe{ Transport: t, SRVPrefix: prefix, Target: rec.Target, Port: rec.Port, Address: addrPort, IsIPv6: a.isV6, } defer func() { ep.ElapsedMS = time.Since(start).Milliseconds() }() ua := "happyDomain-checker-sip/" + Version switch t { case TransportUDP: probeUDP(ctx, &ep, rec.Target, ua, timeout) case TransportTCP: probeTCP(ctx, &ep, rec.Target, ua, timeout) case TransportTLS: probeTLSConn(ctx, &ep, rec.Target, ua, timeout) } return } func probeUDP(ctx context.Context, ep *EndpointProbe, target, ua string, timeout time.Duration) { d := net.Dialer{Timeout: timeout} conn, err := d.DialContext(ctx, "udp", ep.Address) if err != nil { ep.ReachableErr = err.Error() ep.Error = "udp dial: " + err.Error() return } defer conn.Close() ep.Reachable = true _ = conn.SetDeadline(time.Now().Add(timeout)) req := buildOptionsRequest(target, ep.Port, TransportUDP, localAddrFor(conn), ua) sent := time.Now() if _, err := conn.Write([]byte(req)); err != nil { ep.Error = "udp write: " + err.Error() return } ep.OptionsSent = true buf := make([]byte, 8192) n, err := conn.Read(buf) if err != nil { ep.Error = "no udp response: " + err.Error() return } resp, err := parseSIPResponse(bytes.NewReader(buf[:n])) if err != nil { ep.Error = "bad response: " + err.Error() return } applyResponse(ep, resp, sent) } func probeTCP(ctx context.Context, ep *EndpointProbe, target, ua string, timeout time.Duration) { d := net.Dialer{Timeout: timeout} conn, err := d.DialContext(ctx, "tcp", ep.Address) if err != nil { ep.ReachableErr = err.Error() ep.Error = "tcp dial: " + err.Error() return } defer conn.Close() ep.Reachable = true _ = conn.SetDeadline(time.Now().Add(timeout)) req := buildOptionsRequest(target, ep.Port, TransportTCP, localAddrFor(conn), ua) sent := time.Now() if _, err := conn.Write([]byte(req)); err != nil { ep.Error = "tcp write: " + err.Error() return } ep.OptionsSent = true resp, err := parseSIPResponse(conn) if err != nil { ep.Error = "no tcp response: " + err.Error() return } applyResponse(ep, resp, sent) } func probeTLSConn(ctx context.Context, ep *EndpointProbe, target, ua string, timeout time.Duration) { d := net.Dialer{Timeout: timeout} raw, err := d.DialContext(ctx, "tcp", ep.Address) if err != nil { ep.ReachableErr = err.Error() ep.Error = "tcp dial: " + err.Error() return } // We deliberately skip cert verification — checker-tls is the // source of truth for TLS posture. We just want to reach SIP over // TLS. cfg := &tls.Config{ InsecureSkipVerify: true, //nolint:gosec ServerName: target, } conn := tls.Client(raw, cfg) if err := conn.HandshakeContext(ctx); err != nil { _ = raw.Close() ep.Error = "tls handshake: " + err.Error() return } defer conn.Close() ep.Reachable = true state := conn.ConnectionState() ep.TLSVersion = tls.VersionName(state.Version) ep.TLSCipher = tls.CipherSuiteName(state.CipherSuite) _ = conn.SetDeadline(time.Now().Add(timeout)) req := buildOptionsRequest(target, ep.Port, TransportTLS, localAddrFor(conn), ua) sent := time.Now() if _, err := conn.Write([]byte(req)); err != nil { ep.Error = "tls write: " + err.Error() return } ep.OptionsSent = true resp, err := parseSIPResponse(conn) if err != nil { ep.Error = "no tls response: " + err.Error() return } applyResponse(ep, resp, sent) } func applyResponse(ep *EndpointProbe, resp *sipResponse, sent time.Time) { ep.OptionsRawCode = resp.StatusCode ep.OptionsStatus = fmt.Sprintf("%d %s", resp.StatusCode, strings.TrimSpace(resp.StatusPhrase)) ep.OptionsRTTMs = time.Since(sent).Milliseconds() ep.ServerHeader = resp.Server ep.UserAgent = resp.UserAgent ep.AllowMethods = resp.Allow ep.ContactURI = resp.Contact } // ─── Coverage + issues ──────────────────────────────────────────────── func computeCoverage(data *SIPData) { for _, ep := range data.Endpoints { if ep.Reachable { if ep.IsIPv6 { data.Coverage.HasIPv6 = true } else { data.Coverage.HasIPv4 = true } } if !ep.OK() { continue } switch ep.Transport { case TransportUDP: data.Coverage.WorkingUDP = true case TransportTCP: data.Coverage.WorkingTCP = true case TransportTLS: data.Coverage.WorkingTLS = true } } data.Coverage.AnyWorking = data.Coverage.WorkingUDP || data.Coverage.WorkingTCP || data.Coverage.WorkingTLS } func deriveIssues(data *SIPData, wantUDP, wantTCP, wantTLS bool) []Issue { var out []Issue totalSRV := len(data.SRV.UDP) + len(data.SRV.TCP) + len(data.SRV.SIPS) if totalSRV == 0 && data.SRV.FallbackProbed { out = append(out, Issue{ Code: CodeNoSRV, Severity: SeverityCrit, Message: "No SIP SRV records published for " + data.Domain + ".", Fix: "Publish `_sip._tcp." + data.Domain + ". SRV 10 10 5060 sip." + data.Domain + ".` (and `_sips._tcp` on 5061 for TLS).", }) } // "Only UDP" — the most common real-world failure for modern trunks. if len(data.SRV.UDP) > 0 && len(data.SRV.TCP) == 0 && len(data.SRV.SIPS) == 0 && !data.SRV.FallbackProbed { out = append(out, Issue{ Code: CodeOnlyUDP, Severity: SeverityWarn, Message: "Only _sip._udp is published; modern SIP trunks (Twilio, OVH, Orange…) prefer TCP/TLS.", Fix: "Also publish `_sip._tcp." + data.Domain + ".` and ideally `_sips._tcp." + data.Domain + ".`.", }) } // No TLS at all when TCP exists. if wantTLS && len(data.SRV.SIPS) == 0 && (len(data.SRV.UDP) > 0 || len(data.SRV.TCP) > 0) && !data.SRV.FallbackProbed { out = append(out, Issue{ Code: CodeNoTLS, Severity: SeverityInfo, Message: "No _sips._tcp SRV record — SIP signalling runs in the clear.", Fix: "Publish `_sips._tcp." + data.Domain + ".` on port 5061 and terminate TLS on the server.", }) } // Per-prefix DNS errors. for prefix, msg := range data.SRV.Errors { if prefix == "naptr" { out = append(out, Issue{ Code: CodeNAPTRServfail, Severity: SeverityInfo, Message: "NAPTR lookup for " + data.Domain + " failed: " + msg, Fix: "This is optional. If you meant to expose a NAPTR, verify your authoritative resolver answers AUTH/NXDOMAIN cleanly.", }) continue } out = append(out, Issue{ Code: CodeSRVServfail, Severity: SeverityWarn, Message: "SRV lookup for `" + prefix + data.Domain + "` failed: " + msg, Fix: "Check zone serial and authoritative NS for this name.", }) } // Fallback-probed notice. if data.SRV.FallbackProbed { out = append(out, Issue{ Code: CodeFallbackProbed, Severity: SeverityInfo, Message: "No SIP SRV records: probing fell back to " + data.Domain + ":5060 / :5061.", Fix: "Publish the SRV records expected by SIP clients and trunks.", }) } // Per-endpoint findings. for _, ep := range data.Endpoints { switch { case !ep.Reachable && ep.ReachableErr == "" && ep.Error == "no A/AAAA records for target": out = append(out, Issue{ Code: CodeSRVTargetUnresolved, Severity: SeverityCrit, Message: "SRV target `" + ep.Target + "` has no A/AAAA.", Fix: "Add A/AAAA records for `" + ep.Target + "` or change the SRV target.", Endpoint: ep.Target, }) case !ep.Reachable: code := CodeTCPUnreachable msg := "TCP port " + strconv.Itoa(int(ep.Port)) + " is closed or filtered on " + ep.Address + "." fix := "Verify the SIP server is running and the firewall/NAT forwards port " + strconv.Itoa(int(ep.Port)) + "." switch ep.Transport { case TransportUDP: code = CodeUDPUnreachable msg = "UDP port " + strconv.Itoa(int(ep.Port)) + " refused on " + ep.Address + "." fix = "Verify the SIP server listens on UDP " + strconv.Itoa(int(ep.Port)) + " and that no stateless firewall drops the reply." case TransportTLS: if ep.Error != "" && strings.HasPrefix(ep.Error, "tls handshake") { code = CodeTLSHandshake msg = "TLS handshake failed on " + ep.Address + ": " + strings.TrimPrefix(ep.Error, "tls handshake: ") fix = "Present a valid certificate (chain + SAN including `" + ep.Target + "`) and accept TLS 1.2+." } } out = append(out, Issue{ Code: code, Severity: SeverityCrit, Message: msg, Fix: fix, Endpoint: ep.Address, }) case ep.Reachable && !ep.OptionsSent: out = append(out, Issue{ Code: CodeOptionsNoAnswer, Severity: SeverityCrit, Message: ep.Address + " accepted the connection but the probe could not send an OPTIONS: " + ep.Error, Fix: "Investigate the server's SIP listener.", Endpoint: ep.Address, }) case ep.OptionsSent && ep.OptionsRawCode == 0: out = append(out, Issue{ Code: CodeOptionsNoAnswer, Severity: SeverityCrit, Message: ep.Address + " is reachable but silent on SIP OPTIONS.", Fix: "Enable unauthenticated OPTIONS (`handle_options = yes` in Kamailio, `allowguest = yes` in Asterisk/FreeSWITCH) or add the probe source to the ACL.", Endpoint: ep.Address, }) case ep.OptionsRawCode >= 300: out = append(out, Issue{ Code: CodeOptionsNon2xx, Severity: SeverityWarn, Message: ep.Address + " answered " + ep.OptionsStatus + " to OPTIONS.", Fix: "Check SIP routing / ACL. Some stacks reject unauthenticated OPTIONS with 403/404.", Endpoint: ep.Address, }) case ep.OK() && len(ep.AllowMethods) > 0 && !slices.Contains(ep.AllowMethods, "INVITE"): out = append(out, Issue{ Code: CodeOptionsNoInvite, Severity: SeverityWarn, Message: ep.Address + " answered 2xx but does not advertise INVITE in Allow.", Fix: "Verify the dialplan / endpoint is allowed to place calls.", Endpoint: ep.Address, }) case ep.OK() && len(ep.AllowMethods) == 0: out = append(out, Issue{ Code: CodeOptionsNoAllow, Severity: SeverityInfo, Message: ep.Address + " answered 2xx but did not advertise an Allow header.", Fix: "Configure the SIP stack to include Allow (benign but helps callers discover capabilities).", Endpoint: ep.Address, }) } } // Nothing reachable at all. if len(data.Endpoints) > 0 && !data.Coverage.AnyWorking { out = append(out, Issue{ Code: CodeAllDown, Severity: SeverityCrit, Message: "No SIP endpoint answered OPTIONS on any transport.", Fix: "Verify the SIP server is running and reachable on the published SRV ports.", }) } // IPv6 coverage. if data.Coverage.HasIPv4 && !data.Coverage.HasIPv6 { out = append(out, Issue{ Code: CodeNoIPv6, Severity: SeverityInfo, Message: "No IPv6 endpoint reachable.", Fix: "Publish AAAA records for the SRV targets.", }) } return out }