repochecker: Fix parsing of numstat (using -z option)

Also improve binary file detection and allow < 1M biary files
This commit is contained in:
nemunaire 2021-11-13 20:47:40 +01:00
parent 9fe1374a77
commit 23c43ad667
1 changed files with 51 additions and 7 deletions

View File

@ -11,38 +11,80 @@ import (
"os/exec"
"path"
"path/filepath"
"strconv"
"strings"
"srs.epita.fr/fic-server/admin/sync"
"srs.epita.fr/fic-server/libfic"
)
var skipFileChecks = false
var (
ignoreBinaryFileUnder = 1000000
skipFileChecks = false
strictBinaryFile = false
)
func searchBinaryInGit(edir string) (ret []string) {
// Check if git exists and if we are in a git repo
err := exec.Command("git", "-C", edir, "remote").Run()
if err == nil {
cmd := exec.Command("git", "-C", edir, "log", "--all", "--numstat")
cmd := exec.Command("git", "-C", edir, "log", "--all", "--numstat", "--no-renames", "-z")
var out bytes.Buffer
cmd.Stdout = &out
err := cmd.Run()
if err == nil {
scanner := bufio.NewScanner(&out)
alreadySeen := map[string]string{}
commit := ""
scanner := bufio.NewScanner(&out)
// Split on \n and \0 (-z option)
scanner.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) {
for i := 0; i < len(data); i++ {
if data[i] == '\n' || data[i] == '\000' {
return i + 1, data[:i], nil
}
}
if !atEOF {
return 0, nil, nil
}
return 0, data, bufio.ErrFinalToken
})
for scanner.Scan() {
if strings.HasPrefix(scanner.Text(), "commit ") {
commit = strings.TrimPrefix(scanner.Text(), "commit ")
} else if strings.HasPrefix(scanner.Text(), "-\t-\t") {
fname := strings.TrimPrefix(scanner.Text(), "-\t-\t")
if fname == "heading.jpg" {
continue
cmdfile := exec.Command("git", "-C", edir, "ls-tree", "-r", "-l", commit, fname)
var outfile bytes.Buffer
cmdfile.Stdout = &outfile
err = cmdfile.Run()
var fsize int = -1024
if err == nil {
fields := strings.Fields(outfile.String())
if len(fields) < 4 {
// This should be a file deletion
if _, ok := alreadySeen[fname]; !ok {
alreadySeen[fname] = fmt.Sprintf("%s (commit %s) deleted", fname, commit[:7])
}
continue
} else if fsize, err = strconv.Atoi(fields[3]); err == nil && fsize < ignoreBinaryFileUnder {
if _, ok := alreadySeen[fname]; !ok {
continue
}
} else if _, ok := alreadySeen[fname]; !ok && !strictBinaryFile {
alreadySeen[fname] = fmt.Sprintf("%s (commit %s) (size %d kB)", fname, commit[:7], fsize/1024)
continue
}
}
ret = append(ret, fmt.Sprintf("%s (%s)", fname, commit[:7]))
if as, ok := alreadySeen[fname]; ok && as != "" {
ret = append(ret, as)
alreadySeen[fname] = ""
}
ret = append(ret, fmt.Sprintf("%s (commit %s) (size %d kB)", fname, commit[:7], fsize/1024))
}
}
}
@ -113,6 +155,8 @@ func main() {
flag.BoolVar(&fic.StrongDigest, "strongdigest", fic.StrongDigest, "Are BLAKE2b digests required or is SHA-1 good enough?")
flag.BoolVar(&skipFileChecks, "skipfiledigests", skipFileChecks, "Don't perform DIGESTS checks on file to speed up the checks")
flag.BoolVar(&sync.LogMissingResolution, "skipresolution", sync.LogMissingResolution, "Don't fail if resolution.mp4 is absent")
flag.BoolVar(&strictBinaryFile, "strict-binary-file", strictBinaryFile, "In Git-LFS check, don't warn files")
flag.IntVar(&ignoreBinaryFileUnder, "skip-binary-files-under", ignoreBinaryFileUnder, "In Git-LFS check, don't warn files under this size")
flag.Parse()
log.SetPrefix("[repochecker] ")