From edde9f885d1e50de03c94bb55b596a638291601c Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Sat, 29 Oct 2022 13:36:37 +0200 Subject: [PATCH] repochecker/grammalecte: New plugin to check french grammar --- .drone.yml | 3 + Dockerfile-repochecker | 20 ++- repochecker/grammalecte/errors.go | 86 +++++++++++ repochecker/grammalecte/flags.go | 36 +++++ repochecker/grammalecte/grammalecte.go | 202 +++++++++++++++++++++++++ repochecker/grammalecte/main.go | 89 +++++++++++ 6 files changed, 431 insertions(+), 5 deletions(-) create mode 100644 repochecker/grammalecte/errors.go create mode 100644 repochecker/grammalecte/flags.go create mode 100644 repochecker/grammalecte/grammalecte.go create mode 100644 repochecker/grammalecte/main.go diff --git a/.drone.yml b/.drone.yml index 20749cd3..d844ec3e 100644 --- a/.drone.yml +++ b/.drone.yml @@ -23,6 +23,7 @@ steps: - go get -v -d srs.epita.fr/fic-server/dashboard - go get -v -d srs.epita.fr/fic-server/repochecker - go get -v -d srs.epita.fr/fic-server/repochecker/epita + - go get -v -d srs.epita.fr/fic-server/repochecker/grammalecte - go get -v -d srs.epita.fr/fic-server/qa - mkdir deploy @@ -41,6 +42,7 @@ steps: - go vet -v -buildvcs=false srs.epita.fr/fic-server/dashboard - go vet -v -buildvcs=false srs.epita.fr/fic-server/repochecker - go vet -v -buildvcs=false srs.epita.fr/fic-server/repochecker/epita + - go vet -v -buildvcs=false srs.epita.fr/fic-server/repochecker/grammalecte - go vet -v -buildvcs=false srs.epita.fr/fic-server/qa - go vet -v -buildvcs=false srs.epita.fr/fic-server/settings @@ -98,6 +100,7 @@ steps: - apk --no-cache add build-base - go build -buildvcs=false --tags checkupdate -v -o deploy/repochecker-${DRONE_STAGE_OS}-${DRONE_STAGE_ARCH} srs.epita.fr/fic-server/repochecker - go build -buildvcs=false -buildmode=plugin -v -o deploy/repochecker-epita-rules-${DRONE_STAGE_OS}-${DRONE_STAGE_ARCH}.so srs.epita.fr/fic-server/repochecker/epita + - go build -buildvcs=false -buildmode=plugin -v -o deploy/repochecker-grammalecte-rules-${DRONE_STAGE_OS}-${DRONE_STAGE_ARCH}.so srs.epita.fr/fic-server/repochecker/grammalecte - grep "const version" repochecker/update.go | sed -r 's/^.*=\s*(\S.*)$/\1/' > deploy/repochecker.version environment: CGO_ENABLED: 0 diff --git a/Dockerfile-repochecker b/Dockerfile-repochecker index f6a9a036..6eecd6f0 100644 --- a/Dockerfile-repochecker +++ b/Dockerfile-repochecker @@ -4,23 +4,33 @@ RUN apk add --no-cache git WORKDIR /go/src/srs.epita.fr/fic-server/ +RUN apk add --no-cache build-base + COPY go.mod go.sum ./ COPY settings settings/ COPY libfic ./libfic/ COPY admin ./admin/ COPY repochecker ./repochecker/ -RUN apk add --no-cache build-base && \ - go get -d -v ./repochecker && \ +RUN go get -d -v ./repochecker && \ go build -v -o repochecker/repochecker ./repochecker && \ - go build -v -buildmode=plugin -o repochecker/epita-rules.so ./repochecker/epita + go build -v -buildmode=plugin -o repochecker/epita-rules.so ./repochecker/epita && \ + go build -v -buildmode=plugin -o repochecker/grammalecte-rules.so ./repochecker/grammalecte +ENV GRAMMALECTE_VERSION 2.1.1 + +ADD https://grammalecte.net/grammalecte/zip/Grammalecte-fr-v$GRAMMALECTE_VERSION.zip /srv/grammalecte.zip + +RUN mkdir /srv/grammalecte && cd /srv/grammalecte && unzip /srv/grammalecte.zip && sed -i 's/if sys.version_info.major < (3, 7):/if False:/' /srv/grammalecte/grammalecte-server.py + FROM alpine:3.16 -RUN apk add --no-cache git +ENTRYPOINT ["/usr/bin/repochecker", "--rules-plugins=/usr/lib/epita-rules.so", "--rules-plugins=/usr/lib/grammalecte-rules.so"] -ENTRYPOINT ["/usr/bin/repochecker", "--checks-plugins=/usr/lib/epita-rules.so"] +RUN apk add --no-cache git python3 +COPY --from=gobuild /srv/grammalecte /srv/grammalecte COPY --from=gobuild /go/src/srs.epita.fr/fic-server/repochecker/repochecker /usr/bin/repochecker COPY --from=gobuild /go/src/srs.epita.fr/fic-server/repochecker/epita-rules.so /usr/lib/epita-rules.so +COPY --from=gobuild /go/src/srs.epita.fr/fic-server/repochecker/grammalecte-rules.so /usr/lib/grammalecte-rules.so diff --git a/repochecker/grammalecte/errors.go b/repochecker/grammalecte/errors.go new file mode 100644 index 00000000..25eae350 --- /dev/null +++ b/repochecker/grammalecte/errors.go @@ -0,0 +1,86 @@ +package main + +import ( + "bytes" + "fmt" + "strings" +) + +const LOG_PREFIX_LEN = 20 + +type SpellingError struct { + Prefix string + Source string + NSource int + Start int + End int + Type string + Value string + Suggestions []string +} + +func (e SpellingError) Error() string { + suggestions := "" + if len(e.Suggestions) > 0 { + suggestions = "\nSuggestions : " + strings.Join(e.Suggestions, ", ") + } + + return fmt.Sprintf( + "%sspelling error %s %s\n%q\n%s%s", + e.Prefix, + e.Type, + e.Value, + e.Source, + underline(1, e.Start, e.End), + suggestions, + ) +} + +type GrammarError struct { + Prefix string + Source string + NSource int + Start int + End int + RuleId string + Type string + Message string + Suggestions []string + URL string +} + +func (e GrammarError) Error() string { + sornot := "" + if len(e.Suggestions) > 1 { + sornot = "s" + } + + suggestions := "" + if len(e.Suggestions) > 0 { + suggestions = "\nSuggestion" + sornot + " : " + strings.Join(e.Suggestions, ", ") + } + + return fmt.Sprintf( + "%s%s (%s)\n%q\n%s%s", + e.Prefix, + e.Message, + e.RuleId, + e.Source, + underline(1, e.Start, e.End), + suggestions, + ) +} + +func underline(prefix, start, end int) string { + var b bytes.Buffer + + for i := 0; i < prefix+start; i++ { + b.Write([]byte{' '}) + } + + for i := 0; i < end-start; i++ { + b.Write([]byte{'^'}) + } + + return b.String() +} diff --git a/repochecker/grammalecte/flags.go b/repochecker/grammalecte/flags.go new file mode 100644 index 00000000..49333b66 --- /dev/null +++ b/repochecker/grammalecte/flags.go @@ -0,0 +1,36 @@ +package main + +import ( + "srs.epita.fr/fic-server/libfic" +) + +func GrammalecteCheckKeyFlag(flag *fic.FlagKey, raw string) (errs []error) { + label, _, _, _ := flag.AnalyzeFlagLabel() + errs = append(errs, grammalecte("label ", label, &CommonOpts)...) + + if len(flag.Help) > 0 { + errs = append(errs, grammalecte("help ", flag.Help, &CommonOpts)...) + } + + return +} + +func GrammalecteCheckFlagChoice(choice *fic.FlagChoice) (errs []error) { + errs = append(errs, grammalecte("label ", choice.Label, &CommonOpts)...) + + return +} + +func GrammalecteCheckHint(hint *fic.EHint) (errs []error) { + if len(hint.Title) > 0 { + errs = append(errs, grammalecte("title ", hint.Title, &CommonOpts)...) + } + + return +} + +func GrammalecteCheckMDText(str string) (errs []error) { + errs = append(errs, grammalecte("", str, &CommonOpts)...) + + return +} diff --git a/repochecker/grammalecte/grammalecte.go b/repochecker/grammalecte/grammalecte.go new file mode 100644 index 00000000..54afa7e7 --- /dev/null +++ b/repochecker/grammalecte/grammalecte.go @@ -0,0 +1,202 @@ +package main + +import ( + "encoding/json" + "log" + "net/http" + "net/url" + "regexp" + "strings" +) + +type GrammalecteOptions struct { + Typographie bool `json:"basic"` + SignesTypographiques bool `json:"typo"` + ApostropheTypographiques bool `json:"apos"` + EcritureEpicene bool `json:"eepi"` + EspaceSurnumeraires bool `json:"tab"` + EspaceInsecables bool `json:"nbsp"` + Majuscules bool `json:"maj"` + MajusculesPourMinisteres bool `json:"minis"` + Virgules bool `json:"virg"` + PonctuationFinale bool `json:"poncfin"` + TraitsDUnionEtSoudures bool `json:"tu"` + Nombres bool `json:"num"` + UnitesDeMesure bool `json:"unit"` + NormesFrancaises bool `json:"nf"` + LigaturesTypographiques bool `json:"liga"` + ApostropheManquate bool `json:"mapos"` + Chimie bool `json:"chim"` + ErreurDeNumerisation bool `json:"ocr"` + NomsEtAdjectifs bool `json:"gramm"` + FauxAmis bool `json:"conf"` + Locutions bool `json:"loc"` + Accords bool `json:"gn"` + Verbes bool `json:"verbs"` + Conjugaisons bool `json:"conj"` + Infinitif bool `json:"infi"` + Imperatif bool `json:"imp"` + Interrogatif bool `json:"inte"` + ParticipesPasses bool `json:"ppas"` + Verbose bool `json:"vmode"` + Style bool `json:"style"` + Populaire bool `json:"bs"` + Pleonasme bool `json:"pleo"` + ElisionEuphonie bool `json:"eleu"` + AdvNegation bool `json:"neg"` + RepetitionParag bool `json:"redon1"` + RepetitionPhrase bool `json:"redon2"` + Divers bool `json:"misc"` + MotsComposes bool `json:"mc"` + Dates bool `json:"date"` + Debug bool `json:"debug"` + IdRule bool `json:"idrule"` +} + +type GrammalecteGrammarError struct { + Start int `json:"nStart"` + End int `json:"nEnd"` + LineId string `json:"sLineId"` + RuleId string `json:"sRuleId"` + Type string `json:"sType"` + Colors []int `json:"aColor"` + Message string `json:"sMessage"` + Suggestions []string `json:"aSuggestions"` + URL string `json:"url"` +} + +type GrammalecteSpellingError struct { + I int `json:"i"` + Type string `json:"sType"` + Value string `json:"sValue"` + Start int `json:"nStart"` + End int `json:"nEnd"` +} + +type GrammalecteData struct { + Paragraph int `json:"iparagraph"` + Text string `json:"sText"` + GrammarErrors []GrammalecteGrammarError `json:"lGrammarErrors"` + SpellingErrors []GrammalecteSpellingError `json:"lSpellingErrors"` +} + +type GrammalecteResponse struct { + Program string `json:"program"` + Version string `json:"version"` + Lang string `json:"lang"` + Error string `json:"error,omitempty"` + Data []GrammalecteData `json:"data"` +} + +type GrammalecteSuggestions struct { + Suggestions []string `json:"suggestions"` +} + +func suggest(term string) (suggestions *GrammalecteSuggestions, err error) { + form := url.Values{} + form.Add("token", term) + + resp, err := http.Post(GRAMMALECTE_LOCAL_URL+"/suggest/fr", "application/x-www-form-urlencoded", strings.NewReader(form.Encode())) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + dec := json.NewDecoder(resp.Body) + err = dec.Decode(&suggestions) + if err != nil { + return nil, err + } + + return +} + +var ( + mdimg = regexp.MustCompile(`!\[([^\]]+)\]\([^)]+\)`) +) + +func grammalecte(name string, text string, options *GrammalecteOptions) (errs []error) { + // Remove Markdown elements + text = mdimg.ReplaceAllString(text, "Image : ${1}") + + form := url.Values{} + form.Add("text", text) + form.Add("tf", "on") + + if options != nil { + d, err := json.Marshal(options) + if err != nil { + log.Println("Unable to marshall GrammalecteOptions:", err.Error()) + } else { + form.Add("options", string(d)) + } + } + + resp, err := http.Post(GRAMMALECTE_LOCAL_URL+"/gc_text/fr", "application/x-www-form-urlencoded", strings.NewReader(form.Encode())) + if err != nil { + log.Println("Unable to contact grammalecte server:", err.Error()) + return + } + defer resp.Body.Close() + + var gresponse GrammalecteResponse + + dec := json.NewDecoder(resp.Body) + err = dec.Decode(&gresponse) + if err != nil { + log.Println("Unable to analyse grammalecte response: ", err.Error()) + return + } + + if len(gresponse.Error) > 0 { + log.Println("Grammalecte report an error: ", gresponse.Error) + } + + for _, data := range gresponse.Data { + for _, serror := range data.SpellingErrors { + allowed := false + for _, w := range ALLOWED_WORDS { + if w == serror.Value { + allowed = true + break + } + } + if allowed { + continue + } + + suggestions, _ := suggest(serror.Value) + errs = append(errs, SpellingError{ + Prefix: name, + Source: data.Text, + NSource: data.Paragraph, + Start: serror.Start, + End: serror.End, + Type: serror.Type, + Value: serror.Value, + Suggestions: suggestions.Suggestions, + }) + } + + for _, gerror := range data.GrammarErrors { + if data.Text[0] == '>' && gerror.RuleId == "poncfin_règle1" { + continue + } + + errs = append(errs, GrammarError{ + Prefix: name, + Source: data.Text, + NSource: data.Paragraph, + Start: gerror.Start, + End: gerror.End, + RuleId: gerror.RuleId, + Type: gerror.Type, + Message: gerror.Message, + Suggestions: gerror.Suggestions, + URL: gerror.URL, + }) + } + } + + return +} diff --git a/repochecker/grammalecte/main.go b/repochecker/grammalecte/main.go new file mode 100644 index 00000000..081c43bc --- /dev/null +++ b/repochecker/grammalecte/main.go @@ -0,0 +1,89 @@ +package main + +import ( + "log" + "os" + "os/exec" + "time" + + "srs.epita.fr/fic-server/admin/sync" +) + +const GRAMMALECTE_LOCAL_URL = "http://127.0.0.1:8080" + +var ALLOWED_WORDS = []string{ + "phishing", + "Phishing", + "keylogger", + "Keylogger", + "flag", + "ANSSI", + "DDOS", + "Peer-to-Peer", +} + +var CommonOpts = GrammalecteOptions{ + Typographie: true, + SignesTypographiques: true, + ApostropheTypographiques: true, + EcritureEpicene: true, + EspaceSurnumeraires: true, + Majuscules: true, + Virgules: true, + PonctuationFinale: true, + TraitsDUnionEtSoudures: true, + Nombres: true, + NormesFrancaises: true, + LigaturesTypographiques: true, + ApostropheManquate: true, + Chimie: true, + NomsEtAdjectifs: true, + FauxAmis: true, + Locutions: true, + Accords: true, + Verbes: true, + Conjugaisons: true, + Infinitif: true, + Imperatif: true, + Interrogatif: true, + ParticipesPasses: true, + Style: true, + Populaire: true, + Pleonasme: true, + ElisionEuphonie: true, + AdvNegation: true, + RepetitionParag: true, + RepetitionPhrase: true, + Divers: true, + MotsComposes: true, + Dates: true, + IdRule: true, +} + +func runGrammalecteServer() error { + path := "grammalecte-server.py" + if _, err := os.Stat(path); os.IsNotExist(err) { + path = "/srv/grammalecte/grammalecte-server.py" + } + + cmd := exec.Command("python3", path) + if err := cmd.Start(); err != nil { + return err + } + + log.Println("Waiting for grammalecte server to be ready...") + time.Sleep(2000 * time.Millisecond) + + return nil +} + +func RegisterChecksHooks(h *sync.CheckHooks) { + if err := runGrammalecteServer(); err != nil { + log.Fatal("Unable to start grammalecte-server:", err) + } else { + h.RegisterFlagKeyHook(GrammalecteCheckKeyFlag) + h.RegisterFlagChoiceHook(GrammalecteCheckFlagChoice) + h.RegisterHintHook(GrammalecteCheckHint) + h.RegisterMDTextHook(GrammalecteCheckMDText) + } +}