server/repochecker/grammalecte/grammalecte.go

218 lines
6.4 KiB
Go

package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
"net/url"
"regexp"
"strings"
"go.uber.org/multierr"
"srs.epita.fr/fic-server/admin/sync"
lib "srs.epita.fr/fic-server/repochecker/grammalecte/lib"
)
type GrammalecteOptions struct {
Typographie bool `json:"basic"`
SignesTypographiques bool `json:"typo"`
ApostropheTypographiques bool `json:"apos"`
EcritureEpicene bool `json:"eepi"`
EspaceSurnumeraires bool `json:"tab"`
EspaceInsecables bool `json:"nbsp"`
Majuscules bool `json:"maj"`
MajusculesPourMinisteres bool `json:"minis"`
Virgules bool `json:"virg"`
PonctuationFinale bool `json:"poncfin"`
TraitsDUnionEtSoudures bool `json:"tu"`
Nombres bool `json:"num"`
UnitesDeMesure bool `json:"unit"`
NormesFrancaises bool `json:"nf"`
LigaturesTypographiques bool `json:"liga"`
ApostropheManquate bool `json:"mapos"`
Chimie bool `json:"chim"`
ErreurDeNumerisation bool `json:"ocr"`
NomsEtAdjectifs bool `json:"gramm"`
FauxAmis bool `json:"conf"`
Locutions bool `json:"loc"`
Accords bool `json:"gn"`
Verbes bool `json:"verbs"`
Conjugaisons bool `json:"conj"`
Infinitif bool `json:"infi"`
Imperatif bool `json:"imp"`
Interrogatif bool `json:"inte"`
ParticipesPasses bool `json:"ppas"`
Verbose bool `json:"vmode"`
Style bool `json:"style"`
Populaire bool `json:"bs"`
Pleonasme bool `json:"pleo"`
ElisionEuphonie bool `json:"eleu"`
AdvNegation bool `json:"neg"`
RepetitionParag bool `json:"redon1"`
RepetitionPhrase bool `json:"redon2"`
Divers bool `json:"misc"`
MotsComposes bool `json:"mc"`
Dates bool `json:"date"`
Debug bool `json:"debug"`
IdRule bool `json:"idrule"`
}
type GrammalecteGrammarError struct {
Start int `json:"nStart"`
End int `json:"nEnd"`
LineId string `json:"sLineId"`
RuleId string `json:"sRuleId"`
Type string `json:"sType"`
Colors []int `json:"aColor"`
Message string `json:"sMessage"`
Suggestions []string `json:"aSuggestions"`
URL string `json:"url"`
}
type GrammalecteSpellingError struct {
I int `json:"i"`
Type string `json:"sType"`
Value string `json:"sValue"`
Start int `json:"nStart"`
End int `json:"nEnd"`
}
type GrammalecteData struct {
Paragraph int `json:"iparagraph"`
Text string `json:"sText"`
GrammarErrors []GrammalecteGrammarError `json:"lGrammarErrors"`
SpellingErrors []GrammalecteSpellingError `json:"lSpellingErrors"`
}
type GrammalecteResponse struct {
Program string `json:"program"`
Version string `json:"version"`
Lang string `json:"lang"`
Error string `json:"error,omitempty"`
Data []GrammalecteData `json:"data"`
}
type GrammalecteSuggestions struct {
Suggestions []string `json:"suggestions"`
}
func suggest(term string) (suggestions *GrammalecteSuggestions, err error) {
form := url.Values{}
form.Add("token", term)
resp, err := http.Post(GRAMMALECTE_LOCAL_URL+"/suggest/fr", "application/x-www-form-urlencoded", strings.NewReader(form.Encode()))
if err != nil {
return nil, err
}
defer resp.Body.Close()
dec := json.NewDecoder(resp.Body)
err = dec.Decode(&suggestions)
if err != nil {
return nil, err
}
return
}
var (
mdimg = regexp.MustCompile(`!\[([^\]]+)\]\([^)]+\)`)
)
func grammalecte(name string, text string, paragraph int, exceptions *sync.CheckExceptions, options *GrammalecteOptions) (errs error) {
// Remove Markdown elements
text = mdimg.ReplaceAllString(text, "Image : ${1}")
form := url.Values{}
form.Add("text", text)
form.Add("tf", "on")
if options != nil {
d, err := json.Marshal(options)
if err != nil {
log.Println("Unable to marshall GrammalecteOptions:", err.Error())
} else {
form.Add("options", string(d))
}
}
resp, err := http.Post(GRAMMALECTE_LOCAL_URL+"/gc_text/fr", "application/x-www-form-urlencoded", strings.NewReader(form.Encode()))
if err != nil {
log.Println("Unable to contact grammalecte server:", err.Error())
return
}
defer resp.Body.Close()
var gresponse GrammalecteResponse
dec := json.NewDecoder(resp.Body)
err = dec.Decode(&gresponse)
if err != nil {
log.Println("Unable to analyse grammalecte response: ", err.Error())
return
}
if len(gresponse.Error) > 0 {
log.Println("Grammalecte report an error: ", gresponse.Error)
}
for _, data := range gresponse.Data {
for _, serror := range data.SpellingErrors {
allowed := false
for _, w := range ALLOWED_WORDS {
if w == serror.Value {
allowed = true
break
}
}
if strings.HasPrefix(serror.Value, "CVE-20") {
continue
}
if allowed || exceptions.HasException(":spelling:"+serror.Value) {
continue
}
suggestions, _ := suggest(serror.Value)
errs = multierr.Append(errs, lib.SpellingError{
Prefix: name,
Source: data.Text,
NSource: data.Paragraph,
Start: serror.Start,
End: serror.End,
Type: serror.Type,
Value: serror.Value,
Suggestions: suggestions.Suggestions,
})
}
for _, gerror := range data.GrammarErrors {
if exceptions.HasException(fmt.Sprintf(":*:%s", gerror.RuleId)) || (paragraph == 0 && exceptions.HasException(fmt.Sprintf(":%d:%s", data.Paragraph, gerror.RuleId))) || (paragraph < 0 && exceptions.HasException(fmt.Sprintf(":%s", gerror.RuleId))) || (paragraph > 0 && exceptions.HasException(fmt.Sprintf(":%d:%s", paragraph, gerror.RuleId))) {
continue
}
err := lib.GrammarError{
Prefix: name,
Source: data.Text,
NSource: data.Paragraph,
Start: gerror.Start,
End: gerror.End,
RuleId: gerror.RuleId,
Type: gerror.Type,
Message: gerror.Message,
Suggestions: gerror.Suggestions,
URL: gerror.URL,
}
if err.RuleId == "mc_mot_composé" && exceptions.HasException(fmt.Sprintf(":spelling:%s", err.GetPassage())) {
continue
}
errs = multierr.Append(errs, err)
}
}
return
}