repochecker: Fix parsing of numstat (using -z option)
Also improve binary file detection and allow < 1M biary files
This commit is contained in:
parent
9fe1374a77
commit
23c43ad667
|
@ -11,38 +11,80 @@ import (
|
|||
"os/exec"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"srs.epita.fr/fic-server/admin/sync"
|
||||
"srs.epita.fr/fic-server/libfic"
|
||||
)
|
||||
|
||||
var skipFileChecks = false
|
||||
var (
|
||||
ignoreBinaryFileUnder = 1000000
|
||||
skipFileChecks = false
|
||||
strictBinaryFile = false
|
||||
)
|
||||
|
||||
func searchBinaryInGit(edir string) (ret []string) {
|
||||
// Check if git exists and if we are in a git repo
|
||||
err := exec.Command("git", "-C", edir, "remote").Run()
|
||||
|
||||
if err == nil {
|
||||
cmd := exec.Command("git", "-C", edir, "log", "--all", "--numstat")
|
||||
cmd := exec.Command("git", "-C", edir, "log", "--all", "--numstat", "--no-renames", "-z")
|
||||
var out bytes.Buffer
|
||||
cmd.Stdout = &out
|
||||
err := cmd.Run()
|
||||
|
||||
if err == nil {
|
||||
scanner := bufio.NewScanner(&out)
|
||||
alreadySeen := map[string]string{}
|
||||
commit := ""
|
||||
|
||||
scanner := bufio.NewScanner(&out)
|
||||
// Split on \n and \0 (-z option)
|
||||
scanner.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||
for i := 0; i < len(data); i++ {
|
||||
if data[i] == '\n' || data[i] == '\000' {
|
||||
return i + 1, data[:i], nil
|
||||
}
|
||||
}
|
||||
if !atEOF {
|
||||
return 0, nil, nil
|
||||
}
|
||||
return 0, data, bufio.ErrFinalToken
|
||||
})
|
||||
|
||||
for scanner.Scan() {
|
||||
if strings.HasPrefix(scanner.Text(), "commit ") {
|
||||
commit = strings.TrimPrefix(scanner.Text(), "commit ")
|
||||
} else if strings.HasPrefix(scanner.Text(), "-\t-\t") {
|
||||
fname := strings.TrimPrefix(scanner.Text(), "-\t-\t")
|
||||
|
||||
if fname == "heading.jpg" {
|
||||
continue
|
||||
cmdfile := exec.Command("git", "-C", edir, "ls-tree", "-r", "-l", commit, fname)
|
||||
var outfile bytes.Buffer
|
||||
cmdfile.Stdout = &outfile
|
||||
err = cmdfile.Run()
|
||||
var fsize int = -1024
|
||||
if err == nil {
|
||||
fields := strings.Fields(outfile.String())
|
||||
if len(fields) < 4 {
|
||||
// This should be a file deletion
|
||||
if _, ok := alreadySeen[fname]; !ok {
|
||||
alreadySeen[fname] = fmt.Sprintf("%s (commit %s) deleted", fname, commit[:7])
|
||||
}
|
||||
continue
|
||||
} else if fsize, err = strconv.Atoi(fields[3]); err == nil && fsize < ignoreBinaryFileUnder {
|
||||
if _, ok := alreadySeen[fname]; !ok {
|
||||
continue
|
||||
}
|
||||
} else if _, ok := alreadySeen[fname]; !ok && !strictBinaryFile {
|
||||
alreadySeen[fname] = fmt.Sprintf("%s (commit %s) (size %d kB)", fname, commit[:7], fsize/1024)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
ret = append(ret, fmt.Sprintf("%s (%s)", fname, commit[:7]))
|
||||
if as, ok := alreadySeen[fname]; ok && as != "" {
|
||||
ret = append(ret, as)
|
||||
alreadySeen[fname] = ""
|
||||
}
|
||||
ret = append(ret, fmt.Sprintf("%s (commit %s) (size %d kB)", fname, commit[:7], fsize/1024))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -113,6 +155,8 @@ func main() {
|
|||
flag.BoolVar(&fic.StrongDigest, "strongdigest", fic.StrongDigest, "Are BLAKE2b digests required or is SHA-1 good enough?")
|
||||
flag.BoolVar(&skipFileChecks, "skipfiledigests", skipFileChecks, "Don't perform DIGESTS checks on file to speed up the checks")
|
||||
flag.BoolVar(&sync.LogMissingResolution, "skipresolution", sync.LogMissingResolution, "Don't fail if resolution.mp4 is absent")
|
||||
flag.BoolVar(&strictBinaryFile, "strict-binary-file", strictBinaryFile, "In Git-LFS check, don't warn files")
|
||||
flag.IntVar(&ignoreBinaryFileUnder, "skip-binary-files-under", ignoreBinaryFileUnder, "In Git-LFS check, don't warn files under this size")
|
||||
flag.Parse()
|
||||
|
||||
log.SetPrefix("[repochecker] ")
|
||||
|
|
Loading…
Reference in New Issue