From 931d0cf854afc06a4d3d3dc3277d2f383efd3423 Mon Sep 17 00:00:00 2001 From: zeripath Date: Sat, 20 Nov 2021 13:50:00 +0000 Subject: [PATCH] Enable show more files in diff for git <2.31 (#17733) Unfortunately due to a misread on my behalf I missed that git diff only learned --skip-to in version 2.31.0. Thus this functionality was not working on older versions of git. This PR adds a handler that simply allows for us to skip reading the diffs until we find the correct file to skip to. Fix #17731 Signed-off-by: Andrew Thornton --- modules/repofiles/temp_repo.go | 2 +- services/gitdiff/csv_test.go | 2 +- services/gitdiff/gitdiff.go | 88 +++++++++++--- services/gitdiff/gitdiff_test.go | 193 +++++++++++++++++++++++++++++-- 4 files changed, 253 insertions(+), 32 deletions(-) diff --git a/modules/repofiles/temp_repo.go b/modules/repofiles/temp_repo.go index 820f5f2542..700ce92ebf 100644 --- a/modules/repofiles/temp_repo.go +++ b/modules/repofiles/temp_repo.go @@ -302,7 +302,7 @@ func (t *TemporaryUploadRepository) DiffIndex() (*gitdiff.Diff, error) { if err := git.NewCommand("diff-index", "--src-prefix=\\a/", "--dst-prefix=\\b/", "--cached", "-p", "HEAD"). RunInDirTimeoutEnvFullPipelineFunc(nil, 30*time.Second, t.basePath, stdoutWriter, stderr, nil, func(ctx context.Context, cancel context.CancelFunc) error { _ = stdoutWriter.Close() - diff, finalErr = gitdiff.ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, stdoutReader) + diff, finalErr = gitdiff.ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, stdoutReader, "") if finalErr != nil { log.Error("ParsePatch: %v", finalErr) cancel() diff --git a/services/gitdiff/csv_test.go b/services/gitdiff/csv_test.go index b4f725e558..90d24d1c80 100644 --- a/services/gitdiff/csv_test.go +++ b/services/gitdiff/csv_test.go @@ -188,7 +188,7 @@ c,d,e`, } for n, c := range cases { - diff, err := ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.diff)) + diff, err := ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.diff), "") if err != nil { t.Errorf("ParsePatch failed: %s", err) } diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 33e66e89ec..c6d11ca89e 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -694,9 +694,11 @@ func (diff *Diff) LoadComments(issue *models.Issue, currentUser *models.User) er const cmdDiffHead = "diff --git " // ParsePatch builds a Diff object from a io.Reader and some parameters. -func ParsePatch(maxLines, maxLineCharacters, maxFiles int, reader io.Reader) (*Diff, error) { +func ParsePatch(maxLines, maxLineCharacters, maxFiles int, reader io.Reader, skipToFile string) (*Diff, error) { var curFile *DiffFile + skipping := skipToFile != "" + diff := &Diff{Files: make([]*DiffFile, 0)} sb := strings.Builder{} @@ -721,24 +723,36 @@ parsingLoop: // 1. A patch file always begins with `diff --git ` + `a/path b/path` (possibly quoted) // if it does not we have bad input! if !strings.HasPrefix(line, cmdDiffHead) { - return diff, fmt.Errorf("Invalid first file line: %s", line) + return diff, fmt.Errorf("invalid first file line: %s", line) } - // TODO: Handle skipping first n files if len(diff.Files) >= maxFiles { - lastFile := createDiffFile(diff, line) diff.End = lastFile.Name diff.IsIncomplete = true _, err := io.Copy(io.Discard, reader) if err != nil { // By the definition of io.Copy this never returns io.EOF - return diff, fmt.Errorf("Copy: %v", err) + return diff, fmt.Errorf("error during io.Copy: %w", err) } break parsingLoop } curFile = createDiffFile(diff, line) + if skipping { + if curFile.Name != skipToFile { + line, err = skipToNextDiffHead(input) + if err != nil { + if err == io.EOF { + return diff, nil + } + return diff, err + } + continue + } + skipping = false + } + diff.Files = append(diff.Files, curFile) // 2. It is followed by one or more extended header lines: @@ -892,7 +906,7 @@ parsingLoop: lineBytes, isFragment, err = input.ReadLine() if err != nil { // Now by the definition of ReadLine this cannot be io.EOF - return diff, fmt.Errorf("Unable to ReadLine: %v", err) + return diff, fmt.Errorf("unable to ReadLine: %w", err) } _, _ = sb.Write(lineBytes) } @@ -955,6 +969,36 @@ parsingLoop: return diff, nil } +func skipToNextDiffHead(input *bufio.Reader) (line string, err error) { + // need to skip until the next cmdDiffHead + isFragment, wasFragment := false, false + var lineBytes []byte + for { + lineBytes, isFragment, err = input.ReadLine() + if err != nil { + return + } + if wasFragment { + wasFragment = isFragment + continue + } + if bytes.HasPrefix(lineBytes, []byte(cmdDiffHead)) { + break + } + wasFragment = isFragment + } + line = string(lineBytes) + if isFragment { + var tail string + tail, err = input.ReadString('\n') + if err != nil { + return + } + line += tail + } + return +} + func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio.Reader) (lineBytes []byte, isFragment bool, err error) { sb := strings.Builder{} @@ -974,7 +1018,7 @@ func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio _, isFragment, err = input.ReadLine() if err != nil { // Now by the definition of ReadLine this cannot be io.EOF - err = fmt.Errorf("Unable to ReadLine: %v", err) + err = fmt.Errorf("unable to ReadLine: %w", err) return } } @@ -984,7 +1028,7 @@ func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio if err == io.EOF { return } - err = fmt.Errorf("Unable to ReadLine: %v", err) + err = fmt.Errorf("unable to ReadLine: %w", err) return } if lineBytes[0] == 'd' { @@ -1006,7 +1050,7 @@ func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio lineBytes, isFragment, err = input.ReadLine() if err != nil { // Now by the definition of ReadLine this cannot be io.EOF - err = fmt.Errorf("Unable to ReadLine: %v", err) + err = fmt.Errorf("unable to ReadLine: %w", err) return } _, _ = sb.Write(lineBytes) @@ -1037,7 +1081,7 @@ func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio } // This is used only to indicate that the current file does not have a terminal newline if !bytes.Equal(lineBytes, []byte("\\ No newline at end of file")) { - err = fmt.Errorf("Unexpected line in hunk: %s", string(lineBytes)) + err = fmt.Errorf("unexpected line in hunk: %s", string(lineBytes)) return } // Technically this should be the end the file! @@ -1106,7 +1150,7 @@ func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio curSection.Lines = append(curSection.Lines, diffLine) default: // This is unexpected - err = fmt.Errorf("Unexpected line in hunk: %s", string(lineBytes)) + err = fmt.Errorf("unexpected line in hunk: %s", string(lineBytes)) return } @@ -1118,7 +1162,7 @@ func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio lineBytes, isFragment, err = input.ReadLine() if err != nil { // Now by the definition of ReadLine this cannot be io.EOF - err = fmt.Errorf("Unable to ReadLine: %v", err) + err = fmt.Errorf("unable to ReadLine: %w", err) return } } @@ -1279,8 +1323,14 @@ func GetDiffRangeWithWhitespaceBehavior(gitRepo *git.Repository, beforeCommitID, diffArgs = append(diffArgs, afterCommitID) beforeCommitID = actualBeforeCommitID } - if skipTo != "" { + + // In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file + // so if we are using at least this version of git we don't have to tell ParsePatch to do + // the skipping for us + parsePatchSkipToFile := skipTo + if skipTo != "" && git.CheckGitVersionAtLeast("2.31") == nil { diffArgs = append(diffArgs, "--skip-to="+skipTo) + parsePatchSkipToFile = "" } cmd := exec.CommandContext(ctx, git.GitExecutable, diffArgs...) @@ -1289,19 +1339,19 @@ func GetDiffRangeWithWhitespaceBehavior(gitRepo *git.Repository, beforeCommitID, stdout, err := cmd.StdoutPipe() if err != nil { - return nil, fmt.Errorf("StdoutPipe: %v", err) + return nil, fmt.Errorf("error creating StdoutPipe: %w", err) } if err = cmd.Start(); err != nil { - return nil, fmt.Errorf("Start: %v", err) + return nil, fmt.Errorf("error during Start: %w", err) } pid := process.GetManager().Add(fmt.Sprintf("GetDiffRange [repo_path: %s]", repoPath), cancel) defer process.GetManager().Remove(pid) - diff, err := ParsePatch(maxLines, maxLineCharacters, maxFiles, stdout) + diff, err := ParsePatch(maxLines, maxLineCharacters, maxFiles, stdout, parsePatchSkipToFile) if err != nil { - return nil, fmt.Errorf("ParsePatch: %v", err) + return nil, fmt.Errorf("unable to ParsePatch: %w", err) } diff.Start = skipTo @@ -1383,7 +1433,7 @@ func GetDiffRangeWithWhitespaceBehavior(gitRepo *git.Repository, beforeCommitID, } if err = cmd.Wait(); err != nil { - return nil, fmt.Errorf("Wait: %v", err) + return nil, fmt.Errorf("error during cmd.Wait: %w", err) } separator := "..." @@ -1418,7 +1468,7 @@ func GetDiffCommitWithWhitespaceBehavior(gitRepo *git.Repository, commitID, skip // CommentAsDiff returns c.Patch as *Diff func CommentAsDiff(c *models.Comment) (*Diff, error) { diff, err := ParsePatch(setting.Git.MaxGitDiffLines, - setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.Patch)) + setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.Patch), "") if err != nil { log.Error("Unable to parse patch: %v", err) return nil, err diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index c6c6f3b0e3..76a8b4e7ca 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -104,6 +104,177 @@ func TestDiffToHTML(t *testing.T) { }, DiffLineAdd)) } +func TestParsePatch_skipTo(t *testing.T) { + type testcase struct { + name string + gitdiff string + wantErr bool + addition int + deletion int + oldFilename string + filename string + skipTo string + } + tests := []testcase{ + { + name: "readme.md2readme.md", + gitdiff: `diff --git "a/A \\ B" "b/A \\ B" +--- "a/A \\ B" ++++ "b/A \\ B" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off +diff --git "\\a/README.md" "\\b/README.md" +--- "\\a/README.md" ++++ "\\b/README.md" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off +`, + addition: 4, + deletion: 1, + filename: "README.md", + oldFilename: "README.md", + skipTo: "README.md", + }, + { + name: "A \\ B", + gitdiff: `diff --git "a/A \\ B" "b/A \\ B" +--- "a/A \\ B" ++++ "b/A \\ B" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off`, + addition: 4, + deletion: 1, + filename: "A \\ B", + oldFilename: "A \\ B", + skipTo: "A \\ B", + }, + { + name: "A \\ B", + gitdiff: `diff --git "\\a/README.md" "\\b/README.md" +--- "\\a/README.md" ++++ "\\b/README.md" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off +diff --git "a/A \\ B" "b/A \\ B" +--- "a/A \\ B" ++++ "b/A \\ B" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off`, + addition: 4, + deletion: 1, + filename: "A \\ B", + oldFilename: "A \\ B", + skipTo: "A \\ B", + }, + { + name: "readme.md2readme.md", + gitdiff: `diff --git "a/A \\ B" "b/A \\ B" +--- "a/A \\ B" ++++ "b/A \\ B" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off +diff --git "a/A \\ B" "b/A \\ B" +--- "a/A \\ B" ++++ "b/A \\ B" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off +diff --git "\\a/README.md" "\\b/README.md" +--- "\\a/README.md" ++++ "\\b/README.md" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off +`, + addition: 4, + deletion: 1, + filename: "README.md", + oldFilename: "README.md", + skipTo: "README.md", + }, + } + for _, testcase := range tests { + t.Run(testcase.name, func(t *testing.T) { + got, err := ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(testcase.gitdiff), testcase.skipTo) + if (err != nil) != testcase.wantErr { + t.Errorf("ParsePatch(%q) error = %v, wantErr %v", testcase.name, err, testcase.wantErr) + return + } + + gotMarshaled, _ := json.MarshalIndent(got, "", " ") + if got.NumFiles != 1 { + t.Errorf("ParsePath(%q) did not receive 1 file:\n%s", testcase.name, string(gotMarshaled)) + return + } + if got.TotalAddition != testcase.addition { + t.Errorf("ParsePath(%q) does not have correct totalAddition %d, wanted %d", testcase.name, got.TotalAddition, testcase.addition) + } + if got.TotalDeletion != testcase.deletion { + t.Errorf("ParsePath(%q) did not have correct totalDeletion %d, wanted %d", testcase.name, got.TotalDeletion, testcase.deletion) + } + file := got.Files[0] + if file.Addition != testcase.addition { + t.Errorf("ParsePath(%q) does not have correct file addition %d, wanted %d", testcase.name, file.Addition, testcase.addition) + } + if file.Deletion != testcase.deletion { + t.Errorf("ParsePath(%q) did not have correct file deletion %d, wanted %d", testcase.name, file.Deletion, testcase.deletion) + } + if file.OldName != testcase.oldFilename { + t.Errorf("ParsePath(%q) did not have correct OldName %q, wanted %q", testcase.name, file.OldName, testcase.oldFilename) + } + if file.Name != testcase.filename { + t.Errorf("ParsePath(%q) did not have correct Name %q, wanted %q", testcase.name, file.Name, testcase.filename) + } + }) + } +} + func TestParsePatch_singlefile(t *testing.T) { type testcase struct { name string @@ -295,7 +466,7 @@ index 6961180..9ba1a00 100644 for _, testcase := range tests { t.Run(testcase.name, func(t *testing.T) { - got, err := ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(testcase.gitdiff)) + got, err := ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(testcase.gitdiff), "") if (err != nil) != testcase.wantErr { t.Errorf("ParsePatch(%q) error = %v, wantErr %v", testcase.name, err, testcase.wantErr) return @@ -344,21 +515,21 @@ index 0000000..6bb8f39 diffBuilder.WriteString("+line" + strconv.Itoa(i) + "\n") } diff = diffBuilder.String() - result, err := ParsePatch(20, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff)) + result, err := ParsePatch(20, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "") if err != nil { t.Errorf("There should not be an error: %v", err) } if !result.Files[0].IsIncomplete { t.Errorf("Files should be incomplete! %v", result.Files[0]) } - result, err = ParsePatch(40, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff)) + result, err = ParsePatch(40, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "") if err != nil { t.Errorf("There should not be an error: %v", err) } if result.Files[0].IsIncomplete { t.Errorf("Files should not be incomplete! %v", result.Files[0]) } - result, err = ParsePatch(40, 5, setting.Git.MaxGitDiffFiles, strings.NewReader(diff)) + result, err = ParsePatch(40, 5, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "") if err != nil { t.Errorf("There should not be an error: %v", err) } @@ -389,14 +560,14 @@ index 0000000..6bb8f39 diffBuilder.WriteString("+line" + strconv.Itoa(35) + "\n") diff = diffBuilder.String() - result, err = ParsePatch(20, 4096, setting.Git.MaxGitDiffFiles, strings.NewReader(diff)) + result, err = ParsePatch(20, 4096, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "") if err != nil { t.Errorf("There should not be an error: %v", err) } if !result.Files[0].IsIncomplete { t.Errorf("Files should be incomplete! %v", result.Files[0]) } - result, err = ParsePatch(40, 4096, setting.Git.MaxGitDiffFiles, strings.NewReader(diff)) + result, err = ParsePatch(40, 4096, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "") if err != nil { t.Errorf("There should not be an error: %v", err) } @@ -415,7 +586,7 @@ index 0000000..6bb8f39 Docker Pulls + cut off + cut off` - result, err = ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff)) + result, err = ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "") if err != nil { t.Errorf("ParsePatch failed: %s", err) } @@ -432,7 +603,7 @@ index 0000000..6bb8f39 Docker Pulls + cut off + cut off` - result, err = ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff2)) + result, err = ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff2), "") if err != nil { t.Errorf("ParsePatch failed: %s", err) } @@ -449,7 +620,7 @@ index 0000000..6bb8f39 Docker Pulls + cut off + cut off` - result, err = ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff2a)) + result, err = ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff2a), "") if err != nil { t.Errorf("ParsePatch failed: %s", err) } @@ -466,7 +637,7 @@ index 0000000..6bb8f39 Docker Pulls + cut off + cut off` - result, err = ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff3)) + result, err = ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff3), "") if err != nil { t.Errorf("ParsePatch failed: %s", err) } @@ -557,6 +728,6 @@ func TestNoCrashes(t *testing.T) { } for _, testcase := range tests { // It shouldn't crash, so don't care about the output. - ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(testcase.gitdiff)) + ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(testcase.gitdiff), "") } }