checker-caa/checker/ccadb.go

203 lines
4.9 KiB
Go

package checker
import (
"bytes"
_ "embed"
"encoding/csv"
"fmt"
"io"
"sort"
"strings"
"sync"
)
//go:generate wget -O AllCAAIdentifiersReport.csv https://ccadb.my.salesforce-sites.com/ccadb/AllCAAIdentifiersReportCSVV2
//go:embed AllCAAIdentifiersReport.csv
var ccadbCSV []byte
// ccadbIndex is the in-memory representation of AllCAAIdentifiersReport.csv.
// Two indexes are maintained because CCADB rows sometimes have an empty
// Subject Key Identifier column (very rare; a handful of legacy entries)
// and we want to still resolve those via Subject DN.
type ccadbIndex struct {
bySKI map[string][]string
byDN map[string][]string
}
var (
ccadbOnce sync.Once
ccadb *ccadbIndex
ccadbErr error
)
// loadCCADB parses the embedded CSV once. Failure means the binary
// itself is broken.
func loadCCADB() (*ccadbIndex, error) {
ccadbOnce.Do(func() {
ccadb, ccadbErr = parseCCADB(bytes.NewReader(ccadbCSV))
})
return ccadb, ccadbErr
}
// parseCCADB is exposed for testing with alternate CSV inputs.
func parseCCADB(r io.Reader) (*ccadbIndex, error) {
reader := csv.NewReader(r)
reader.FieldsPerRecord = -1 // some rows carry a trailing empty field
header, err := reader.Read()
if err != nil {
return nil, fmt.Errorf("read header: %w", err)
}
idxSubject := -1
idxSKI := -1
idxDomains := -1
for i, h := range header {
switch strings.TrimSpace(h) {
case "Subject":
idxSubject = i
case "Subject Key Identifier (Hex)":
idxSKI = i
case "Recognized CAA Domains":
idxDomains = i
}
}
if idxSubject < 0 || idxSKI < 0 || idxDomains < 0 {
return nil, fmt.Errorf("unexpected CCADB header: %v", header)
}
minCols := max(idxSubject, idxSKI, idxDomains)
idx := &ccadbIndex{
bySKI: map[string][]string{},
byDN: map[string][]string{},
}
for {
row, err := reader.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, fmt.Errorf("read row: %w", err)
}
if len(row) <= minCols {
continue
}
domains := splitCAADomains(row[idxDomains])
if len(domains) == 0 {
continue
}
if ski := strings.ToUpper(strings.TrimSpace(row[idxSKI])); ski != "" {
idx.bySKI[ski] = mergeDomains(idx.bySKI[ski], domains)
}
if dn := normalizeDN(row[idxSubject]); dn != "" {
idx.byDN[dn] = mergeDomains(idx.byDN[dn], domains)
}
}
return idx, nil
}
// Lookup resolves an observed issuer to its CAA identifier domains.
// AKI takes precedence; DN is the fallback for rows without an SKI.
// The returned slice is a fresh copy; callers may retain or mutate it.
func Lookup(aki, dn string) ([]string, bool) {
idx, err := loadCCADB()
if err != nil || idx == nil {
return nil, false
}
if aki != "" {
if d, ok := idx.bySKI[strings.ToUpper(strings.TrimSpace(aki))]; ok && len(d) > 0 {
return append([]string(nil), d...), true
}
}
if dn != "" {
if d, ok := idx.byDN[normalizeDN(dn)]; ok && len(d) > 0 {
return append([]string(nil), d...), true
}
}
return nil, false
}
// splitCAADomains lowercases because CAA identifiers are case-insensitive.
func splitCAADomains(raw string) []string {
var out []string
for d := range strings.SplitSeq(raw, ",") {
d = strings.TrimSpace(strings.ToLower(d))
if d != "" {
out = append(out, d)
}
}
return out
}
// mergeDomains appends new entries to an existing slice, de-duplicating.
// CCADB occasionally lists the same CA twice (cross-signs, re-issues);
// we don't want that to bloat the lookup result.
func mergeDomains(existing, add []string) []string {
if len(existing) == 0 {
return append([]string(nil), add...)
}
seen := map[string]bool{}
for _, d := range existing {
seen[d] = true
}
for _, d := range add {
if !seen[d] {
existing = append(existing, d)
seen[d] = true
}
}
return existing
}
// normalizeDN canonicalizes a subject DN so Go's comma-joined form
// compares equal to CCADB's semicolon-joined form for the same RDNs.
// Intentionally permissive: escaping differences are ignored; AKI is
// the common path anyway.
func normalizeDN(dn string) string {
if dn == "" {
return ""
}
fields := splitRDNs(dn)
for i, f := range fields {
f = strings.TrimSpace(f)
if eq := strings.IndexByte(f, '='); eq > 0 {
f = strings.ToUpper(f[:eq]) + "=" + strings.TrimSpace(f[eq+1:])
}
fields[i] = f
}
sort.Strings(fields)
return strings.Join(fields, ",")
}
// splitRDNs splits a DN string on either ',' or ';', respecting
// backslash escapes. Most RDN values in CCADB do not contain escaped
// separators, but a handful (paths in OU values) do.
func splitRDNs(dn string) []string {
var out []string
var cur strings.Builder
escape := false
for i := 0; i < len(dn); i++ {
c := dn[i]
if escape {
cur.WriteByte(c)
escape = false
continue
}
switch c {
case '\\':
cur.WriteByte(c)
escape = true
case ',', ';':
out = append(out, cur.String())
cur.Reset()
default:
cur.WriteByte(c)
}
}
if cur.Len() > 0 {
out = append(out, cur.String())
}
return out
}