checker: split monolithic rule into per-concern rules

Replace the single matrix_federation rule with individual rules for
federation status, well-known delegation, SRV records, connection
reachability, TLS checks, and homeserver version, so the UI surfaces a
clear checklist. Drop the incorrect well-known/server_name equality
check: m.server points at the delegated federation endpoint, which is
intentionally distinct from server_name.
This commit is contained in:
nemunaire 2026-04-26 00:40:59 +07:00
commit e4b6481d32
8 changed files with 346 additions and 60 deletions

View file

@ -50,9 +50,7 @@ func Definition() *sdk.CheckerDefinition {
},
},
},
Rules: []sdk.CheckRule{
Rule(),
},
Rules: Rules(),
Interval: &sdk.CheckIntervalSpec{
Min: 5 * time.Minute,
Max: 7 * 24 * time.Hour,

View file

@ -3,79 +3,62 @@ package checker
import (
"context"
"fmt"
"strings"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// Rule returns a new matrix federation check rule.
// Rules returns the full list of CheckRules exposed by the Matrix checker.
// Each rule covers a single concern so the UI can show a clear checklist
// rather than a single monolithic pass/fail line.
func Rules() []sdk.CheckRule {
return []sdk.CheckRule{
&federationOKRule{},
&wellKnownRule{},
&srvRecordsRule{},
&connectionReachableRule{},
&tlsChecksRule{},
&versionRule{},
}
}
// Rule returns the aggregate federation rule.
//
// Deprecated: prefer Rules() which exposes every concern individually. Kept
// for backward compatibility with callers that embed a single rule.
func Rule() sdk.CheckRule {
return &matrixRule{}
return &federationOKRule{}
}
type matrixRule struct{}
func (r *matrixRule) Name() string {
return "matrix_federation"
}
func (r *matrixRule) Description() string {
return "Checks whether Matrix federation is working correctly"
}
func (r *matrixRule) ValidateOptions(opts sdk.CheckerOptions) error {
return nil
}
func (r *matrixRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState {
// loadMatrixData fetches the Matrix observation. On error returns a
// CheckState the caller should emit to short-circuit its rule.
func loadMatrixData(ctx context.Context, obs sdk.ObservationGetter) (*MatrixFederationData, *sdk.CheckState) {
var data MatrixFederationData
if err := obs.Get(ctx, ObservationKeyMatrix, &data); err != nil {
return []sdk.CheckState{{
return nil, &sdk.CheckState{
Status: sdk.StatusError,
Message: fmt.Sprintf("Failed to get Matrix federation data: %v", err),
Code: "matrix_federation_error",
}}
}
domain, _ := opts["serviceDomain"].(string)
domain = strings.TrimSuffix(domain, ".")
if data.FederationOK {
version := strings.TrimSpace(data.Version.Name + " " + data.Version.Version)
return []sdk.CheckState{{
Status: sdk.StatusOK,
Message: fmt.Sprintf("Running %s", version),
Code: "matrix_federation_ok",
Meta: map[string]any{
"version": version,
},
}}
}
var statusLine string
if data.DNSResult.SRVError != nil && data.WellKnownResult.Result != "" {
statusLine = fmt.Sprintf("%s OR %s", data.DNSResult.SRVError.Message, data.WellKnownResult.Result)
} else if len(data.ConnectionErrors) > 0 {
var msg strings.Builder
for srv, cerr := range data.ConnectionErrors {
if msg.Len() > 0 {
msg.WriteString("; ")
}
msg.WriteString(srv)
msg.WriteString(": ")
msg.WriteString(cerr.Message)
Code: "matrix.observation_error",
}
statusLine = fmt.Sprintf("Connection errors: %s", msg.String())
} else if data.WellKnownResult.Server != domain {
statusLine = fmt.Sprintf("Bad homeserver_name: got %s, expected %s", data.WellKnownResult.Server, domain)
} else {
statusLine = fmt.Sprintf("Federation broken. Check https://federationtester.matrix.org/#%s", domain)
}
return []sdk.CheckState{{
Status: sdk.StatusCrit,
Message: statusLine,
Code: "matrix_federation_fail",
}}
return &data, nil
}
func passState(code, message string) sdk.CheckState {
return sdk.CheckState{Status: sdk.StatusOK, Message: message, Code: code}
}
func infoState(code, message string) sdk.CheckState {
return sdk.CheckState{Status: sdk.StatusInfo, Message: message, Code: code}
}
func warnState(code, message string) sdk.CheckState {
return sdk.CheckState{Status: sdk.StatusWarn, Message: message, Code: code}
}
func critState(code, message string) sdk.CheckState {
return sdk.CheckState{Status: sdk.StatusCrit, Message: message, Code: code}
}
func unknownState(code, message string) sdk.CheckState {
return sdk.CheckState{Status: sdk.StatusUnknown, Message: message, Code: code}
}

View file

@ -0,0 +1,40 @@
package checker
import (
"context"
"fmt"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// connectionReachableRule checks that every federation endpoint returned
// by DNS accepted the TLS connection the tester attempted.
type connectionReachableRule struct{}
func (r *connectionReachableRule) Name() string { return "matrix.connection_reachable" }
func (r *connectionReachableRule) Description() string {
return "Checks that every discovered federation endpoint accepts an inbound connection."
}
func (r *connectionReachableRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadMatrixData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if len(data.ConnectionErrors) == 0 && len(data.ConnectionReports) == 0 {
return []sdk.CheckState{infoState("matrix.connection_reachable.unknown", "No endpoint was probed by the federation tester.")}
}
if len(data.ConnectionErrors) == 0 {
return []sdk.CheckState{passState("matrix.connection_reachable.ok", fmt.Sprintf("All %d endpoint(s) accepted the connection.", len(data.ConnectionReports)))}
}
out := make([]sdk.CheckState, 0, len(data.ConnectionErrors))
for addr, cerr := range data.ConnectionErrors {
st := critState("matrix.connection_reachable.fail", cerr.Message)
st.Subject = addr
out = append(out, st)
}
return out
}

View file

@ -0,0 +1,61 @@
package checker
import (
"context"
"fmt"
"strings"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// federationOKRule reflects the overall FederationOK flag reported by the
// Matrix Federation Tester. Other rules isolate specific concerns; this
// rule is the global verdict so callers get a single-line answer to
// "does this homeserver federate?".
type federationOKRule struct{}
func (r *federationOKRule) Name() string { return "matrix.federation_ok" }
func (r *federationOKRule) Description() string {
return "Reports the overall federation status returned by the Matrix Federation Tester."
}
func (r *federationOKRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadMatrixData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
domain, _ := opts["serviceDomain"].(string)
domain = strings.TrimSuffix(domain, ".")
if data.FederationOK {
version := strings.TrimSpace(data.Version.Name + " " + data.Version.Version)
st := passState("matrix.federation_ok.ok", "Matrix federation is working.")
if version != "" {
st.Message = fmt.Sprintf("Matrix federation is working (running %s).", version)
st.Meta = map[string]any{"version": version}
}
return []sdk.CheckState{st}
}
var statusLine string
switch {
case data.DNSResult.SRVError != nil && data.WellKnownResult.Result != "":
statusLine = fmt.Sprintf("%s OR %s", data.DNSResult.SRVError.Message, data.WellKnownResult.Result)
case len(data.ConnectionErrors) > 0:
var msg strings.Builder
for srv, cerr := range data.ConnectionErrors {
if msg.Len() > 0 {
msg.WriteString("; ")
}
msg.WriteString(srv)
msg.WriteString(": ")
msg.WriteString(cerr.Message)
}
statusLine = fmt.Sprintf("Connection errors: %s", msg.String())
default:
statusLine = fmt.Sprintf("Federation broken. Check https://federationtester.matrix.org/#%s", domain)
}
return []sdk.CheckState{critState("matrix.federation_ok.fail", statusLine)}
}

48
checker/rules_srv.go Normal file
View file

@ -0,0 +1,48 @@
package checker
import (
"context"
"fmt"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// srvRecordsRule checks _matrix-fed._tcp / _matrix._tcp SRV delegation: was
// the lookup successful, and does it yield at least one record (or was it
// legitimately skipped because of a CNAME/well-known path)?
type srvRecordsRule struct{}
func (r *srvRecordsRule) Name() string { return "matrix.srv_records" }
func (r *srvRecordsRule) Description() string {
return "Checks that the Matrix SRV lookup (_matrix-fed._tcp / _matrix._tcp) succeeded or was legitimately skipped."
}
func (r *srvRecordsRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadMatrixData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
dns := data.DNSResult
if dns.SRVError != nil {
return []sdk.CheckState{critState("matrix.srv_records.error", fmt.Sprintf("SRV lookup error: %s", dns.SRVError.Message))}
}
if dns.SRVSkipped {
msg := "SRV lookup skipped by the federation tester."
if dns.SRVCName != "" {
msg = fmt.Sprintf("SRV lookup skipped (CNAME: %s).", dns.SRVCName)
}
return []sdk.CheckState{unknownState("matrix.srv_records.skipped", msg)}
}
if len(dns.SRVRecords) == 0 {
return []sdk.CheckState{infoState(
"matrix.srv_records.absent",
"No Matrix SRV records published (federation may still work via well-known).",
)}
}
return []sdk.CheckState{passState("matrix.srv_records.ok", fmt.Sprintf("%d SRV record(s) published.", len(dns.SRVRecords)))}
}

73
checker/rules_tls.go Normal file
View file

@ -0,0 +1,73 @@
package checker
import (
"context"
"fmt"
"strings"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// tlsChecksRule reviews the TLS-level findings the federation tester
// reports for every endpoint it managed to reach: certificate validity,
// matching server name, future expiry, presence of an Ed25519 key, and so
// on. One CheckState is emitted per reachable endpoint so the UI can pin
// the outcome on the exact address.
type tlsChecksRule struct{}
func (r *tlsChecksRule) Name() string { return "matrix.tls_checks" }
func (r *tlsChecksRule) Description() string {
return "Reviews the TLS posture on every reachable federation endpoint (certificate chain, hostname match, Ed25519 key, …)."
}
func (r *tlsChecksRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadMatrixData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if len(data.ConnectionReports) == 0 {
return []sdk.CheckState{infoState("matrix.tls_checks.skipped", "No endpoint reached: TLS posture could not be assessed.")}
}
out := make([]sdk.CheckState, 0, len(data.ConnectionReports))
for addr, cr := range data.ConnectionReports {
var problems []string
if !cr.Checks.MatchingServerName {
problems = append(problems, "server name does not match certificate")
}
if !cr.Checks.FutureValidUntilTS {
problems = append(problems, "certificate expired or near expiry")
}
if !cr.Checks.ValidCertificates {
problems = append(problems, "certificate chain is invalid")
}
if !cr.Checks.HasEd25519Key {
problems = append(problems, "no Ed25519 signing key advertised")
}
if !cr.Checks.AllEd25519ChecksOK {
problems = append(problems, "Ed25519 key verification failed")
}
for _, e := range cr.Errors {
if e != "" {
problems = append(problems, e)
}
}
if len(problems) == 0 && cr.Checks.AllChecksOK {
st := passState("matrix.tls_checks.ok", "All TLS checks passed.")
st.Subject = addr
out = append(out, st)
continue
}
msg := "TLS checks failed."
if len(problems) > 0 {
msg = fmt.Sprintf("TLS checks failed: %s.", strings.Join(problems, "; "))
}
st := critState("matrix.tls_checks.fail", msg)
st.Subject = addr
out = append(out, st)
}
return out
}

40
checker/rules_version.go Normal file
View file

@ -0,0 +1,40 @@
package checker
import (
"context"
"fmt"
"strings"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// versionRule reports whether the federation tester could fetch the
// homeserver version string. The test probe reaches /_matrix/federation/v1/version,
// so a failure here hints at a federation-path problem even when the rest
// of the federation handshake looks healthy.
type versionRule struct{}
func (r *versionRule) Name() string { return "matrix.version" }
func (r *versionRule) Description() string {
return "Checks that the homeserver responds to /_matrix/federation/v1/version and reports its name and version."
}
func (r *versionRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadMatrixData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if data.Version.Error != "" {
return []sdk.CheckState{warnState("matrix.version.error", fmt.Sprintf("Homeserver /version probe failed: %s", data.Version.Error))}
}
version := strings.TrimSpace(data.Version.Name + " " + data.Version.Version)
if version == "" {
return []sdk.CheckState{infoState("matrix.version.unknown", "Homeserver did not return a version string.")}
}
st := passState("matrix.version.ok", fmt.Sprintf("Homeserver running %s.", version))
st.Meta = map[string]any{"version": version}
return []sdk.CheckState{st}
}

View file

@ -0,0 +1,43 @@
package checker
import (
"context"
"fmt"
"strings"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// wellKnownRule checks the /.well-known/matrix/server delegation: was a
// delegation published, did it resolve, and does it point back at the
// expected server_name?
type wellKnownRule struct{}
func (r *wellKnownRule) Name() string { return "matrix.well_known" }
func (r *wellKnownRule) Description() string {
return "Checks that /.well-known/matrix/server (if published) is valid and points at the expected server_name."
}
func (r *wellKnownRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadMatrixData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
wk := data.WellKnownResult
// Nothing published: the host may rely on SRV only. Mark informational.
if wk.Server == "" && wk.Result == "" {
return []sdk.CheckState{infoState("matrix.well_known.absent", "No /.well-known/matrix/server delegation published (federation may still work via SRV).")}
}
// Published but the tester flagged an error string.
if wk.Server == "" && wk.Result != "" {
if strings.Contains(strings.ToLower(wk.Result), "no .well-known") {
return []sdk.CheckState{unknownState("matrix.well_known.absent", "No /.well-known/matrix/server delegation found (federation may still work via SRV).")}
}
return []sdk.CheckState{critState("matrix.well_known.error", fmt.Sprintf("Well-known delegation error: %s", wk.Result))}
}
return []sdk.CheckState{passState("matrix.well_known.ok", fmt.Sprintf("Well-known delegation resolves to %s.", wk.Server))}
}