repochecker/grammalecte: New plugin to check french grammar

This commit is contained in:
nemunaire 2022-10-29 13:36:37 +02:00
parent 721908ee18
commit edde9f885d
6 changed files with 431 additions and 5 deletions

View File

@ -23,6 +23,7 @@ steps:
- go get -v -d srs.epita.fr/fic-server/dashboard
- go get -v -d srs.epita.fr/fic-server/repochecker
- go get -v -d srs.epita.fr/fic-server/repochecker/epita
- go get -v -d srs.epita.fr/fic-server/repochecker/grammalecte
- go get -v -d srs.epita.fr/fic-server/qa
- mkdir deploy
@ -41,6 +42,7 @@ steps:
- go vet -v -buildvcs=false srs.epita.fr/fic-server/dashboard
- go vet -v -buildvcs=false srs.epita.fr/fic-server/repochecker
- go vet -v -buildvcs=false srs.epita.fr/fic-server/repochecker/epita
- go vet -v -buildvcs=false srs.epita.fr/fic-server/repochecker/grammalecte
- go vet -v -buildvcs=false srs.epita.fr/fic-server/qa
- go vet -v -buildvcs=false srs.epita.fr/fic-server/settings
@ -98,6 +100,7 @@ steps:
- apk --no-cache add build-base
- go build -buildvcs=false --tags checkupdate -v -o deploy/repochecker-${DRONE_STAGE_OS}-${DRONE_STAGE_ARCH} srs.epita.fr/fic-server/repochecker
- go build -buildvcs=false -buildmode=plugin -v -o deploy/repochecker-epita-rules-${DRONE_STAGE_OS}-${DRONE_STAGE_ARCH}.so srs.epita.fr/fic-server/repochecker/epita
- go build -buildvcs=false -buildmode=plugin -v -o deploy/repochecker-grammalecte-rules-${DRONE_STAGE_OS}-${DRONE_STAGE_ARCH}.so srs.epita.fr/fic-server/repochecker/grammalecte
- grep "const version" repochecker/update.go | sed -r 's/^.*=\s*(\S.*)$/\1/' > deploy/repochecker.version
environment:
CGO_ENABLED: 0

View File

@ -4,23 +4,33 @@ RUN apk add --no-cache git
WORKDIR /go/src/srs.epita.fr/fic-server/
RUN apk add --no-cache build-base
COPY go.mod go.sum ./
COPY settings settings/
COPY libfic ./libfic/
COPY admin ./admin/
COPY repochecker ./repochecker/
RUN apk add --no-cache build-base && \
go get -d -v ./repochecker && \
RUN go get -d -v ./repochecker && \
go build -v -o repochecker/repochecker ./repochecker && \
go build -v -buildmode=plugin -o repochecker/epita-rules.so ./repochecker/epita
go build -v -buildmode=plugin -o repochecker/epita-rules.so ./repochecker/epita && \
go build -v -buildmode=plugin -o repochecker/grammalecte-rules.so ./repochecker/grammalecte
ENV GRAMMALECTE_VERSION 2.1.1
ADD https://grammalecte.net/grammalecte/zip/Grammalecte-fr-v$GRAMMALECTE_VERSION.zip /srv/grammalecte.zip
RUN mkdir /srv/grammalecte && cd /srv/grammalecte && unzip /srv/grammalecte.zip && sed -i 's/if sys.version_info.major < (3, 7):/if False:/' /srv/grammalecte/grammalecte-server.py
FROM alpine:3.16
RUN apk add --no-cache git
ENTRYPOINT ["/usr/bin/repochecker", "--rules-plugins=/usr/lib/epita-rules.so", "--rules-plugins=/usr/lib/grammalecte-rules.so"]
ENTRYPOINT ["/usr/bin/repochecker", "--checks-plugins=/usr/lib/epita-rules.so"]
RUN apk add --no-cache git python3
COPY --from=gobuild /srv/grammalecte /srv/grammalecte
COPY --from=gobuild /go/src/srs.epita.fr/fic-server/repochecker/repochecker /usr/bin/repochecker
COPY --from=gobuild /go/src/srs.epita.fr/fic-server/repochecker/epita-rules.so /usr/lib/epita-rules.so
COPY --from=gobuild /go/src/srs.epita.fr/fic-server/repochecker/grammalecte-rules.so /usr/lib/grammalecte-rules.so

View File

@ -0,0 +1,86 @@
package main
import (
"bytes"
"fmt"
"strings"
)
const LOG_PREFIX_LEN = 20
type SpellingError struct {
Prefix string
Source string
NSource int
Start int
End int
Type string
Value string
Suggestions []string
}
func (e SpellingError) Error() string {
suggestions := ""
if len(e.Suggestions) > 0 {
suggestions = "\nSuggestions : " + strings.Join(e.Suggestions, ", ")
}
return fmt.Sprintf(
"%sspelling error %s %s\n%q\n%s%s",
e.Prefix,
e.Type,
e.Value,
e.Source,
underline(1, e.Start, e.End),
suggestions,
)
}
type GrammarError struct {
Prefix string
Source string
NSource int
Start int
End int
RuleId string
Type string
Message string
Suggestions []string
URL string
}
func (e GrammarError) Error() string {
sornot := ""
if len(e.Suggestions) > 1 {
sornot = "s"
}
suggestions := ""
if len(e.Suggestions) > 0 {
suggestions = "\nSuggestion" + sornot + " : " + strings.Join(e.Suggestions, ", ")
}
return fmt.Sprintf(
"%s%s (%s)\n%q\n%s%s",
e.Prefix,
e.Message,
e.RuleId,
e.Source,
underline(1, e.Start, e.End),
suggestions,
)
}
func underline(prefix, start, end int) string {
var b bytes.Buffer
for i := 0; i < prefix+start; i++ {
b.Write([]byte{' '})
}
for i := 0; i < end-start; i++ {
b.Write([]byte{'^'})
}
return b.String()
}

View File

@ -0,0 +1,36 @@
package main
import (
"srs.epita.fr/fic-server/libfic"
)
func GrammalecteCheckKeyFlag(flag *fic.FlagKey, raw string) (errs []error) {
label, _, _, _ := flag.AnalyzeFlagLabel()
errs = append(errs, grammalecte("label ", label, &CommonOpts)...)
if len(flag.Help) > 0 {
errs = append(errs, grammalecte("help ", flag.Help, &CommonOpts)...)
}
return
}
func GrammalecteCheckFlagChoice(choice *fic.FlagChoice) (errs []error) {
errs = append(errs, grammalecte("label ", choice.Label, &CommonOpts)...)
return
}
func GrammalecteCheckHint(hint *fic.EHint) (errs []error) {
if len(hint.Title) > 0 {
errs = append(errs, grammalecte("title ", hint.Title, &CommonOpts)...)
}
return
}
func GrammalecteCheckMDText(str string) (errs []error) {
errs = append(errs, grammalecte("", str, &CommonOpts)...)
return
}

View File

@ -0,0 +1,202 @@
package main
import (
"encoding/json"
"log"
"net/http"
"net/url"
"regexp"
"strings"
)
type GrammalecteOptions struct {
Typographie bool `json:"basic"`
SignesTypographiques bool `json:"typo"`
ApostropheTypographiques bool `json:"apos"`
EcritureEpicene bool `json:"eepi"`
EspaceSurnumeraires bool `json:"tab"`
EspaceInsecables bool `json:"nbsp"`
Majuscules bool `json:"maj"`
MajusculesPourMinisteres bool `json:"minis"`
Virgules bool `json:"virg"`
PonctuationFinale bool `json:"poncfin"`
TraitsDUnionEtSoudures bool `json:"tu"`
Nombres bool `json:"num"`
UnitesDeMesure bool `json:"unit"`
NormesFrancaises bool `json:"nf"`
LigaturesTypographiques bool `json:"liga"`
ApostropheManquate bool `json:"mapos"`
Chimie bool `json:"chim"`
ErreurDeNumerisation bool `json:"ocr"`
NomsEtAdjectifs bool `json:"gramm"`
FauxAmis bool `json:"conf"`
Locutions bool `json:"loc"`
Accords bool `json:"gn"`
Verbes bool `json:"verbs"`
Conjugaisons bool `json:"conj"`
Infinitif bool `json:"infi"`
Imperatif bool `json:"imp"`
Interrogatif bool `json:"inte"`
ParticipesPasses bool `json:"ppas"`
Verbose bool `json:"vmode"`
Style bool `json:"style"`
Populaire bool `json:"bs"`
Pleonasme bool `json:"pleo"`
ElisionEuphonie bool `json:"eleu"`
AdvNegation bool `json:"neg"`
RepetitionParag bool `json:"redon1"`
RepetitionPhrase bool `json:"redon2"`
Divers bool `json:"misc"`
MotsComposes bool `json:"mc"`
Dates bool `json:"date"`
Debug bool `json:"debug"`
IdRule bool `json:"idrule"`
}
type GrammalecteGrammarError struct {
Start int `json:"nStart"`
End int `json:"nEnd"`
LineId string `json:"sLineId"`
RuleId string `json:"sRuleId"`
Type string `json:"sType"`
Colors []int `json:"aColor"`
Message string `json:"sMessage"`
Suggestions []string `json:"aSuggestions"`
URL string `json:"url"`
}
type GrammalecteSpellingError struct {
I int `json:"i"`
Type string `json:"sType"`
Value string `json:"sValue"`
Start int `json:"nStart"`
End int `json:"nEnd"`
}
type GrammalecteData struct {
Paragraph int `json:"iparagraph"`
Text string `json:"sText"`
GrammarErrors []GrammalecteGrammarError `json:"lGrammarErrors"`
SpellingErrors []GrammalecteSpellingError `json:"lSpellingErrors"`
}
type GrammalecteResponse struct {
Program string `json:"program"`
Version string `json:"version"`
Lang string `json:"lang"`
Error string `json:"error,omitempty"`
Data []GrammalecteData `json:"data"`
}
type GrammalecteSuggestions struct {
Suggestions []string `json:"suggestions"`
}
func suggest(term string) (suggestions *GrammalecteSuggestions, err error) {
form := url.Values{}
form.Add("token", term)
resp, err := http.Post(GRAMMALECTE_LOCAL_URL+"/suggest/fr", "application/x-www-form-urlencoded", strings.NewReader(form.Encode()))
if err != nil {
return nil, err
}
defer resp.Body.Close()
dec := json.NewDecoder(resp.Body)
err = dec.Decode(&suggestions)
if err != nil {
return nil, err
}
return
}
var (
mdimg = regexp.MustCompile(`!\[([^\]]+)\]\([^)]+\)`)
)
func grammalecte(name string, text string, options *GrammalecteOptions) (errs []error) {
// Remove Markdown elements
text = mdimg.ReplaceAllString(text, "Image : ${1}")
form := url.Values{}
form.Add("text", text)
form.Add("tf", "on")
if options != nil {
d, err := json.Marshal(options)
if err != nil {
log.Println("Unable to marshall GrammalecteOptions:", err.Error())
} else {
form.Add("options", string(d))
}
}
resp, err := http.Post(GRAMMALECTE_LOCAL_URL+"/gc_text/fr", "application/x-www-form-urlencoded", strings.NewReader(form.Encode()))
if err != nil {
log.Println("Unable to contact grammalecte server:", err.Error())
return
}
defer resp.Body.Close()
var gresponse GrammalecteResponse
dec := json.NewDecoder(resp.Body)
err = dec.Decode(&gresponse)
if err != nil {
log.Println("Unable to analyse grammalecte response: ", err.Error())
return
}
if len(gresponse.Error) > 0 {
log.Println("Grammalecte report an error: ", gresponse.Error)
}
for _, data := range gresponse.Data {
for _, serror := range data.SpellingErrors {
allowed := false
for _, w := range ALLOWED_WORDS {
if w == serror.Value {
allowed = true
break
}
}
if allowed {
continue
}
suggestions, _ := suggest(serror.Value)
errs = append(errs, SpellingError{
Prefix: name,
Source: data.Text,
NSource: data.Paragraph,
Start: serror.Start,
End: serror.End,
Type: serror.Type,
Value: serror.Value,
Suggestions: suggestions.Suggestions,
})
}
for _, gerror := range data.GrammarErrors {
if data.Text[0] == '>' && gerror.RuleId == "poncfin_règle1" {
continue
}
errs = append(errs, GrammarError{
Prefix: name,
Source: data.Text,
NSource: data.Paragraph,
Start: gerror.Start,
End: gerror.End,
RuleId: gerror.RuleId,
Type: gerror.Type,
Message: gerror.Message,
Suggestions: gerror.Suggestions,
URL: gerror.URL,
})
}
}
return
}

View File

@ -0,0 +1,89 @@
package main
import (
"log"
"os"
"os/exec"
"time"
"srs.epita.fr/fic-server/admin/sync"
)
const GRAMMALECTE_LOCAL_URL = "http://127.0.0.1:8080"
var ALLOWED_WORDS = []string{
"phishing",
"Phishing",
"keylogger",
"Keylogger",
"flag",
"ANSSI",
"DDOS",
"Peer-to-Peer",
}
var CommonOpts = GrammalecteOptions{
Typographie: true,
SignesTypographiques: true,
ApostropheTypographiques: true,
EcritureEpicene: true,
EspaceSurnumeraires: true,
Majuscules: true,
Virgules: true,
PonctuationFinale: true,
TraitsDUnionEtSoudures: true,
Nombres: true,
NormesFrancaises: true,
LigaturesTypographiques: true,
ApostropheManquate: true,
Chimie: true,
NomsEtAdjectifs: true,
FauxAmis: true,
Locutions: true,
Accords: true,
Verbes: true,
Conjugaisons: true,
Infinitif: true,
Imperatif: true,
Interrogatif: true,
ParticipesPasses: true,
Style: true,
Populaire: true,
Pleonasme: true,
ElisionEuphonie: true,
AdvNegation: true,
RepetitionParag: true,
RepetitionPhrase: true,
Divers: true,
MotsComposes: true,
Dates: true,
IdRule: true,
}
func runGrammalecteServer() error {
path := "grammalecte-server.py"
if _, err := os.Stat(path); os.IsNotExist(err) {
path = "/srv/grammalecte/grammalecte-server.py"
}
cmd := exec.Command("python3", path)
if err := cmd.Start(); err != nil {
return err
}
log.Println("Waiting for grammalecte server to be ready...")
time.Sleep(2000 * time.Millisecond)
return nil
}
func RegisterChecksHooks(h *sync.CheckHooks) {
if err := runGrammalecteServer(); err != nil {
log.Fatal("Unable to start grammalecte-server:", err)
} else {
h.RegisterFlagKeyHook(GrammalecteCheckKeyFlag)
h.RegisterFlagChoiceHook(GrammalecteCheckFlagChoice)
h.RegisterHintHook(GrammalecteCheckHint)
h.RegisterMDTextHook(GrammalecteCheckMDText)
}
}