package checker import ( "bytes" _ "embed" "encoding/csv" "fmt" "io" "sort" "strings" "sync" ) //go:generate wget -O AllCAAIdentifiersReport.csv https://ccadb.my.salesforce-sites.com/ccadb/AllCAAIdentifiersReportCSVV2 //go:embed AllCAAIdentifiersReport.csv var ccadbCSV []byte // ccadbIndex is the in-memory representation of AllCAAIdentifiersReport.csv. // Two indexes are maintained because CCADB rows sometimes have an empty // Subject Key Identifier column (very rare; a handful of legacy entries) // and we want to still resolve those via Subject DN. type ccadbIndex struct { bySKI map[string][]string byDN map[string][]string } var ( ccadbOnce sync.Once ccadb *ccadbIndex ccadbErr error ) // loadCCADB parses the embedded CSV into the two lookup indexes on first // call. Subsequent calls are no-ops. The CSV is shipped with the binary // so parse failures indicate a bug or a corrupted build, not a runtime // condition; tests assert the parse succeeds for the checked-in file. func loadCCADB() (*ccadbIndex, error) { ccadbOnce.Do(func() { ccadb, ccadbErr = parseCCADB(bytes.NewReader(ccadbCSV)) }) return ccadb, ccadbErr } // parseCCADB is exposed for testing with alternate CSV inputs. func parseCCADB(r io.Reader) (*ccadbIndex, error) { reader := csv.NewReader(r) reader.FieldsPerRecord = -1 // some rows carry a trailing empty field header, err := reader.Read() if err != nil { return nil, fmt.Errorf("read header: %w", err) } idxSubject := -1 idxSKI := -1 idxDomains := -1 for i, h := range header { switch strings.TrimSpace(h) { case "Subject": idxSubject = i case "Subject Key Identifier (Hex)": idxSKI = i case "Recognized CAA Domains": idxDomains = i } } if idxSubject < 0 || idxSKI < 0 || idxDomains < 0 { return nil, fmt.Errorf("unexpected CCADB header: %v", header) } idx := &ccadbIndex{ bySKI: map[string][]string{}, byDN: map[string][]string{}, } for { row, err := reader.Read() if err == io.EOF { break } if err != nil { return nil, fmt.Errorf("read row: %w", err) } domains := splitCAADomains(row[idxDomains]) if len(domains) == 0 { continue } if ski := strings.ToUpper(strings.TrimSpace(row[idxSKI])); ski != "" { idx.bySKI[ski] = mergeDomains(idx.bySKI[ski], domains) } if dn := normalizeDN(row[idxSubject]); dn != "" { idx.byDN[dn] = mergeDomains(idx.byDN[dn], domains) } } return idx, nil } // Lookup resolves an observed certificate issuer to the CAA identifier // domains the issuing CA publishes in its CPS. aki is the uppercase hex // Authority Key Identifier of the leaf (i.e. the issuer's SKI); dn is // the RFC 2253 subject DN of the issuer (leaf.Issuer.String() in Go). // // AKI takes precedence because CCADB keys by it. DN is a fallback for // the rare rows where the SKI column is empty. // // Returns ok=false when neither key resolves. The returned slice is a // fresh copy; callers may retain or mutate it. func Lookup(aki, dn string) ([]string, bool) { idx, err := loadCCADB() if err != nil || idx == nil { return nil, false } if aki != "" { if d, ok := idx.bySKI[strings.ToUpper(strings.TrimSpace(aki))]; ok && len(d) > 0 { return append([]string(nil), d...), true } } if dn != "" { if d, ok := idx.byDN[normalizeDN(dn)]; ok && len(d) > 0 { return append([]string(nil), d...), true } } return nil, false } // splitCAADomains splits CCADB's "Recognized CAA Domains" cell, which // can hold a comma-separated list (e.g. DigiCert rows list ~20 // domains). Whitespace is trimmed, empties are dropped, and the result // is lowercased because CAA identifiers are case-insensitive. func splitCAADomains(raw string) []string { var out []string for _, d := range strings.Split(raw, ",") { d = strings.TrimSpace(strings.ToLower(d)) if d != "" { out = append(out, d) } } return out } // mergeDomains appends new entries to an existing slice, de-duplicating. // CCADB occasionally lists the same CA twice (cross-signs, re-issues); // we don't want that to bloat the lookup result. func mergeDomains(existing, add []string) []string { if len(existing) == 0 { return append([]string(nil), add...) } seen := map[string]bool{} for _, d := range existing { seen[d] = true } for _, d := range add { if !seen[d] { existing = append(existing, d) seen[d] = true } } return existing } // normalizeDN produces a canonical key from a subject DN so that DNs // produced by Go's pkix.Name.String (comma-joined) compare equal to // DNs produced by CCADB (semicolon-joined) when their RDN sets match. // // Rules: // - split on ',' or ';'; // - trim each RDN; // - uppercase the RDN type (left of '=') because RFC 4514 types are // case-insensitive; values are left as-is; // - sort the RDNs alphabetically so reordering does not break // comparison. // // This is intentionally permissive; escaping differences between // implementations are ignored. Good enough for CCADB fallbacks, and // the common path is the AKI lookup anyway. func normalizeDN(dn string) string { if dn == "" { return "" } fields := splitRDNs(dn) for i, f := range fields { f = strings.TrimSpace(f) if eq := strings.IndexByte(f, '='); eq > 0 { f = strings.ToUpper(f[:eq]) + "=" + strings.TrimSpace(f[eq+1:]) } fields[i] = f } sort.Strings(fields) return strings.Join(fields, ",") } // splitRDNs splits a DN string on either ',' or ';', respecting // backslash escapes. Most RDN values in CCADB do not contain escaped // separators, but a handful (paths in OU values) do. func splitRDNs(dn string) []string { var out []string var cur strings.Builder escape := false for i := 0; i < len(dn); i++ { c := dn[i] if escape { cur.WriteByte(c) escape = false continue } switch c { case '\\': cur.WriteByte(c) escape = true case ',', ';': out = append(out, cur.String()) cur.Reset() default: cur.WriteByte(c) } } if cur.Len() > 0 { out = append(out, cur.String()) } return out }