From e6b3be460840f1f982d5358198466e7d6f509d21 Mon Sep 17 00:00:00 2001 From: zeripath Date: Sun, 4 Sep 2022 11:47:56 +0100 Subject: [PATCH] Add more checks in migration code (#21011) When migrating add several more important sanity checks: * SHAs must be SHAs * Refs must be valid Refs * URLs must be reasonable Signed-off-by: Andrew Thornton Signed-off-by: Andrew Thornton Co-authored-by: techknowlogick --- models/activities/action.go | 2 +- modules/git/ref.go | 28 +- modules/git/repo_commit_nogogit.go | 2 +- modules/git/sha1.go | 7 +- modules/migration/pullrequest.go | 9 +- modules/migration/release.go | 9 +- modules/migration/repo.go | 2 +- modules/validation/binding.go | 34 +-- routers/api/v1/repo/commits.go | 3 +- routers/api/v1/repo/notes.go | 3 +- services/migrations/codebase.go | 22 +- services/migrations/common.go | 82 ++++++ services/migrations/dump.go | 328 +++++++++++++-------- services/migrations/gitbucket.go | 17 ++ services/migrations/gitea_downloader.go | 43 ++- services/migrations/gitea_uploader.go | 263 ++++++++++++----- services/migrations/gitea_uploader_test.go | 7 +- services/migrations/github.go | 76 +++-- services/migrations/gitlab.go | 24 ++ services/migrations/gogs.go | 14 + services/migrations/migrate.go | 16 +- services/migrations/onedev.go | 17 ++ services/migrations/restore.go | 1 + services/release/release.go | 7 +- 24 files changed, 714 insertions(+), 302 deletions(-) create mode 100644 services/migrations/common.go diff --git a/models/activities/action.go b/models/activities/action.go index 22419e20db..31ecdedd5b 100644 --- a/models/activities/action.go +++ b/models/activities/action.go @@ -267,7 +267,7 @@ func (a *Action) GetRefLink() string { return a.GetRepoLink() + "/src/branch/" + util.PathEscapeSegments(strings.TrimPrefix(a.RefName, git.BranchPrefix)) case strings.HasPrefix(a.RefName, git.TagPrefix): return a.GetRepoLink() + "/src/tag/" + util.PathEscapeSegments(strings.TrimPrefix(a.RefName, git.TagPrefix)) - case len(a.RefName) == 40 && git.SHAPattern.MatchString(a.RefName): + case len(a.RefName) == 40 && git.IsValidSHAPattern(a.RefName): return a.GetRepoLink() + "/src/commit/" + a.RefName default: // FIXME: we will just assume it's a branch - this was the old way - at some point we may want to enforce that there is always a ref here. diff --git a/modules/git/ref.go b/modules/git/ref.go index 9fd071ce58..2f459148a2 100644 --- a/modules/git/ref.go +++ b/modules/git/ref.go @@ -4,7 +4,10 @@ package git -import "strings" +import ( + "regexp" + "strings" +) const ( // RemotePrefix is the base directory of the remotes information of git. @@ -15,6 +18,29 @@ const ( pullLen = len(PullPrefix) ) +// refNamePatternInvalid is regular expression with unallowed characters in git reference name +// They cannot have ASCII control characters (i.e. bytes whose values are lower than \040, or \177 DEL), space, tilde ~, caret ^, or colon : anywhere. +// They cannot have question-mark ?, asterisk *, or open bracket [ anywhere +var refNamePatternInvalid = regexp.MustCompile( + `[\000-\037\177 \\~^:?*[]|` + // No absolutely invalid characters + `(?:^[/.])|` + // Not HasPrefix("/") or "." + `(?:/\.)|` + // no "/." + `(?:\.lock$)|(?:\.lock/)|` + // No ".lock/"" or ".lock" at the end + `(?:\.\.)|` + // no ".." anywhere + `(?://)|` + // no "//" anywhere + `(?:@{)|` + // no "@{" + `(?:[/.]$)|` + // no terminal '/' or '.' + `(?:^@$)`) // Not "@" + +// IsValidRefPattern ensures that the provided string could be a valid reference +func IsValidRefPattern(name string) bool { + return !refNamePatternInvalid.MatchString(name) +} + +func SanitizeRefPattern(name string) string { + return refNamePatternInvalid.ReplaceAllString(name, "_") +} + // Reference represents a Git ref. type Reference struct { Name string diff --git a/modules/git/repo_commit_nogogit.go b/modules/git/repo_commit_nogogit.go index e528af0ffb..efea307b37 100644 --- a/modules/git/repo_commit_nogogit.go +++ b/modules/git/repo_commit_nogogit.go @@ -138,7 +138,7 @@ func (repo *Repository) getCommitFromBatchReader(rd *bufio.Reader, id SHA1) (*Co // ConvertToSHA1 returns a Hash object from a potential ID string func (repo *Repository) ConvertToSHA1(commitID string) (SHA1, error) { - if len(commitID) == 40 && SHAPattern.MatchString(commitID) { + if len(commitID) == 40 && IsValidSHAPattern(commitID) { sha1, err := NewIDFromString(commitID) if err == nil { return sha1, nil diff --git a/modules/git/sha1.go b/modules/git/sha1.go index 2da74733df..15f282c6e4 100644 --- a/modules/git/sha1.go +++ b/modules/git/sha1.go @@ -19,7 +19,12 @@ const EmptySHA = "0000000000000000000000000000000000000000" const EmptyTreeSHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" // SHAPattern can be used to determine if a string is an valid sha -var SHAPattern = regexp.MustCompile(`^[0-9a-f]{4,40}$`) +var shaPattern = regexp.MustCompile(`^[0-9a-f]{4,40}$`) + +// IsValidSHAPattern will check if the provided string matches the SHA Pattern +func IsValidSHAPattern(sha string) bool { + return shaPattern.MatchString(sha) +} // MustID always creates a new SHA1 from a [20]byte array with no validation of input. func MustID(b []byte) SHA1 { diff --git a/modules/migration/pullrequest.go b/modules/migration/pullrequest.go index eaa0dd45e2..dd520cd63f 100644 --- a/modules/migration/pullrequest.go +++ b/modules/migration/pullrequest.go @@ -26,7 +26,7 @@ type PullRequest struct { Updated time.Time Closed *time.Time Labels []*Label - PatchURL string `yaml:"patch_url"` + PatchURL string `yaml:"patch_url"` // SECURITY: This must be safe to download directly from Merged bool MergedTime *time.Time `yaml:"merged_time"` MergeCommitSHA string `yaml:"merge_commit_sha"` @@ -37,6 +37,7 @@ type PullRequest struct { Reactions []*Reaction ForeignIndex int64 Context DownloaderContext `yaml:"-"` + EnsuredSafe bool `yaml:"ensured_safe"` } func (p *PullRequest) GetLocalIndex() int64 { return p.Number } @@ -55,9 +56,9 @@ func (p PullRequest) GetGitRefName() string { // PullRequestBranch represents a pull request branch type PullRequestBranch struct { - CloneURL string `yaml:"clone_url"` - Ref string - SHA string + CloneURL string `yaml:"clone_url"` // SECURITY: This must be safe to download from + Ref string // SECURITY: this must be a git.IsValidRefPattern + SHA string // SECURITY: this must be a git.IsValidSHAPattern RepoName string `yaml:"repo_name"` OwnerName string `yaml:"owner_name"` } diff --git a/modules/migration/release.go b/modules/migration/release.go index cbdf01a3ed..923b3817b0 100644 --- a/modules/migration/release.go +++ b/modules/migration/release.go @@ -18,15 +18,16 @@ type ReleaseAsset struct { DownloadCount *int `yaml:"download_count"` Created time.Time Updated time.Time - DownloadURL *string `yaml:"download_url"` + + DownloadURL *string `yaml:"download_url"` // SECURITY: It is the responsibility of downloader to make sure this is safe // if DownloadURL is nil, the function should be invoked - DownloadFunc func() (io.ReadCloser, error) `yaml:"-"` + DownloadFunc func() (io.ReadCloser, error) `yaml:"-"` // SECURITY: It is the responsibility of downloader to make sure this is safe } // Release represents a release type Release struct { - TagName string `yaml:"tag_name"` - TargetCommitish string `yaml:"target_commitish"` + TagName string `yaml:"tag_name"` // SECURITY: This must pass git.IsValidRefPattern + TargetCommitish string `yaml:"target_commitish"` // SECURITY: This must pass git.IsValidRefPattern Name string Body string Draft bool diff --git a/modules/migration/repo.go b/modules/migration/repo.go index d0d62de8da..75622595d9 100644 --- a/modules/migration/repo.go +++ b/modules/migration/repo.go @@ -12,7 +12,7 @@ type Repository struct { IsPrivate bool `yaml:"is_private"` IsMirror bool `yaml:"is_mirror"` Description string - CloneURL string `yaml:"clone_url"` + CloneURL string `yaml:"clone_url"` // SECURITY: This must be checked to ensure that is safe to be used OriginalURL string `yaml:"original_url"` DefaultBranch string } diff --git a/modules/validation/binding.go b/modules/validation/binding.go index 7baa36ccc5..f08f632426 100644 --- a/modules/validation/binding.go +++ b/modules/validation/binding.go @@ -9,6 +9,8 @@ import ( "regexp" "strings" + "code.gitea.io/gitea/modules/git" + "gitea.com/go-chi/binding" "github.com/gobwas/glob" ) @@ -24,30 +26,6 @@ const ( ErrRegexPattern = "RegexPattern" ) -// GitRefNamePatternInvalid is regular expression with unallowed characters in git reference name -// They cannot have ASCII control characters (i.e. bytes whose values are lower than \040, or \177 DEL), space, tilde ~, caret ^, or colon : anywhere. -// They cannot have question-mark ?, asterisk *, or open bracket [ anywhere -var GitRefNamePatternInvalid = regexp.MustCompile(`[\000-\037\177 \\~^:?*[]+`) - -// CheckGitRefAdditionalRulesValid check name is valid on additional rules -func CheckGitRefAdditionalRulesValid(name string) bool { - // Additional rules as described at https://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html - if strings.HasPrefix(name, "/") || strings.HasSuffix(name, "/") || - strings.HasSuffix(name, ".") || strings.Contains(name, "..") || - strings.Contains(name, "//") || strings.Contains(name, "@{") || - name == "@" { - return false - } - parts := strings.Split(name, "/") - for _, part := range parts { - if strings.HasSuffix(part, ".lock") || strings.HasPrefix(part, ".") { - return false - } - } - - return true -} - // AddBindingRules adds additional binding rules func AddBindingRules() { addGitRefNameBindingRule() @@ -67,16 +45,10 @@ func addGitRefNameBindingRule() { IsValid: func(errs binding.Errors, name string, val interface{}) (bool, binding.Errors) { str := fmt.Sprintf("%v", val) - if GitRefNamePatternInvalid.MatchString(str) { + if !git.IsValidRefPattern(str) { errs.Add([]string{name}, ErrGitRefName, "GitRefName") return false, errs } - - if !CheckGitRefAdditionalRulesValid(str) { - errs.Add([]string{name}, ErrGitRefName, "GitRefName") - return false, errs - } - return true, errs }, }) diff --git a/routers/api/v1/repo/commits.go b/routers/api/v1/repo/commits.go index b196ce9774..12c329c203 100644 --- a/routers/api/v1/repo/commits.go +++ b/routers/api/v1/repo/commits.go @@ -17,7 +17,6 @@ import ( "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/setting" api "code.gitea.io/gitea/modules/structs" - "code.gitea.io/gitea/modules/validation" "code.gitea.io/gitea/routers/api/v1/utils" ) @@ -53,7 +52,7 @@ func GetSingleCommit(ctx *context.APIContext) { // "$ref": "#/responses/notFound" sha := ctx.Params(":sha") - if (validation.GitRefNamePatternInvalid.MatchString(sha) || !validation.CheckGitRefAdditionalRulesValid(sha)) && !git.SHAPattern.MatchString(sha) { + if !git.IsValidRefPattern(sha) { ctx.Error(http.StatusUnprocessableEntity, "no valid ref or sha", fmt.Sprintf("no valid ref or sha: %s", sha)) return } diff --git a/routers/api/v1/repo/notes.go b/routers/api/v1/repo/notes.go index bd8e27e40b..67f097a424 100644 --- a/routers/api/v1/repo/notes.go +++ b/routers/api/v1/repo/notes.go @@ -12,7 +12,6 @@ import ( "code.gitea.io/gitea/modules/convert" "code.gitea.io/gitea/modules/git" api "code.gitea.io/gitea/modules/structs" - "code.gitea.io/gitea/modules/validation" ) // GetNote Get a note corresponding to a single commit from a repository @@ -47,7 +46,7 @@ func GetNote(ctx *context.APIContext) { // "$ref": "#/responses/notFound" sha := ctx.Params(":sha") - if (validation.GitRefNamePatternInvalid.MatchString(sha) || !validation.CheckGitRefAdditionalRulesValid(sha)) && !git.SHAPattern.MatchString(sha) { + if !git.IsValidRefPattern(sha) { ctx.Error(http.StatusUnprocessableEntity, "no valid ref or sha", fmt.Sprintf("no valid ref or sha: %s", sha)) return } diff --git a/services/migrations/codebase.go b/services/migrations/codebase.go index bb74c0a49d..edeb276773 100644 --- a/services/migrations/codebase.go +++ b/services/migrations/codebase.go @@ -107,9 +107,24 @@ func NewCodebaseDownloader(ctx context.Context, projectURL *url.URL, project, re commitMap: make(map[string]string), } + log.Trace("Create Codebase downloader. BaseURL: %s Project: %s RepoName: %s", baseURL, project, repoName) return downloader } +// String implements Stringer +func (d *CodebaseDownloader) String() string { + return fmt.Sprintf("migration from codebase server %s %s/%s", d.baseURL, d.project, d.repoName) +} + +// ColorFormat provides a basic color format for a GogsDownloader +func (d *CodebaseDownloader) ColorFormat(s fmt.State) { + if d == nil { + log.ColorFprintf(s, "") + return + } + log.ColorFprintf(s, "migration from codebase server %s %s/%s", d.baseURL, d.project, d.repoName) +} + // FormatCloneURL add authentication into remote URLs func (d *CodebaseDownloader) FormatCloneURL(opts base.MigrateOptions, remoteAddr string) (string, error) { return opts.CloneAddr, nil @@ -451,8 +466,8 @@ func (d *CodebaseDownloader) GetPullRequests(page, perPage int) ([]*base.PullReq Value int64 `xml:",chardata"` Type string `xml:"type,attr"` } `xml:"id"` - SourceRef string `xml:"source-ref"` - TargetRef string `xml:"target-ref"` + SourceRef string `xml:"source-ref"` // NOTE: from the documentation these are actually just branches NOT full refs + TargetRef string `xml:"target-ref"` // NOTE: from the documentation these are actually just branches NOT full refs Subject string `xml:"subject"` Status string `xml:"status"` UserID struct { @@ -564,6 +579,9 @@ func (d *CodebaseDownloader) GetPullRequests(page, perPage int) ([]*base.PullReq Comments: comments[1:], }, }) + + // SECURITY: Ensure that the PR is safe + _ = CheckAndEnsureSafePR(pullRequests[len(pullRequests)-1], d.baseURL.String(), d) } return pullRequests, true, nil diff --git a/services/migrations/common.go b/services/migrations/common.go new file mode 100644 index 0000000000..305ae89b2d --- /dev/null +++ b/services/migrations/common.go @@ -0,0 +1,82 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package migrations + +import ( + "fmt" + "strings" + + admin_model "code.gitea.io/gitea/models/admin" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/log" + base "code.gitea.io/gitea/modules/migration" +) + +// WarnAndNotice will log the provided message and send a repository notice +func WarnAndNotice(fmtStr string, args ...interface{}) { + log.Warn(fmtStr, args...) + if err := admin_model.CreateRepositoryNotice(fmt.Sprintf(fmtStr, args...)); err != nil { + log.Error("create repository notice failed: ", err) + } +} + +func hasBaseURL(toCheck, baseURL string) bool { + if len(baseURL) > 0 && baseURL[len(baseURL)-1] != '/' { + baseURL += "/" + } + return strings.HasPrefix(toCheck, baseURL) +} + +// CheckAndEnsureSafePR will check that a given PR is safe to download +func CheckAndEnsureSafePR(pr *base.PullRequest, commonCloneBaseURL string, g base.Downloader) bool { + valid := true + // SECURITY: the patchURL must be checked to have the same baseURL as the current to prevent open redirect + if pr.PatchURL != "" && !hasBaseURL(pr.PatchURL, commonCloneBaseURL) { + // TODO: Should we check that this url has the expected format for a patch url? + WarnAndNotice("PR #%d in %s has invalid PatchURL: %s baseURL: %s", pr.Number, g, pr.PatchURL, commonCloneBaseURL) + pr.PatchURL = "" + valid = false + } + + // SECURITY: the headCloneURL must be checked to have the same baseURL as the current to prevent open redirect + if pr.Head.CloneURL != "" && !hasBaseURL(pr.Head.CloneURL, commonCloneBaseURL) { + // TODO: Should we check that this url has the expected format for a patch url? + WarnAndNotice("PR #%d in %s has invalid HeadCloneURL: %s baseURL: %s", pr.Number, g, pr.Head.CloneURL, commonCloneBaseURL) + pr.Head.CloneURL = "" + valid = false + } + + // SECURITY: SHAs Must be a SHA + if pr.MergeCommitSHA != "" && !git.IsValidSHAPattern(pr.MergeCommitSHA) { + WarnAndNotice("PR #%d in %s has invalid MergeCommitSHA: %s", pr.Number, g, pr.MergeCommitSHA) + pr.MergeCommitSHA = "" + } + if pr.Head.SHA != "" && !git.IsValidSHAPattern(pr.Head.SHA) { + WarnAndNotice("PR #%d in %s has invalid HeadSHA: %s", pr.Number, g, pr.Head.SHA) + pr.Head.SHA = "" + valid = false + } + if pr.Base.SHA != "" && !git.IsValidSHAPattern(pr.Base.SHA) { + WarnAndNotice("PR #%d in %s has invalid BaseSHA: %s", pr.Number, g, pr.Base.SHA) + pr.Base.SHA = "" + valid = false + } + + // SECURITY: Refs must be valid refs or SHAs + if pr.Head.Ref != "" && !git.IsValidRefPattern(pr.Head.Ref) { + WarnAndNotice("PR #%d in %s has invalid HeadRef: %s", pr.Number, g, pr.Head.Ref) + pr.Head.Ref = "" + valid = false + } + if pr.Base.Ref != "" && !git.IsValidRefPattern(pr.Base.Ref) { + WarnAndNotice("PR #%d in %s has invalid BaseRef: %s", pr.Number, g, pr.Base.Ref) + pr.Base.Ref = "" + valid = false + } + + pr.EnsuredSafe = true + + return valid +} diff --git a/services/migrations/dump.go b/services/migrations/dump.go index f040c6c663..2dc35b9d9f 100644 --- a/services/migrations/dump.go +++ b/services/migrations/dump.go @@ -12,7 +12,6 @@ import ( "net/http" "net/url" "os" - "path" "path/filepath" "strconv" "strings" @@ -26,6 +25,8 @@ import ( "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/structs" + "github.com/google/uuid" + "gopkg.in/yaml.v2" ) @@ -47,7 +48,7 @@ type RepositoryDumper struct { reviewFiles map[int64]*os.File gitRepo *git.Repository - prHeadCache map[string]struct{} + prHeadCache map[string]string } // NewRepositoryDumper creates an gitea Uploader @@ -62,7 +63,7 @@ func NewRepositoryDumper(ctx context.Context, baseDir, repoOwner, repoName strin baseDir: baseDir, repoOwner: repoOwner, repoName: repoName, - prHeadCache: make(map[string]struct{}), + prHeadCache: make(map[string]string), commentFiles: make(map[int64]*os.File), reviewFiles: make(map[int64]*os.File), }, nil @@ -296,8 +297,10 @@ func (g *RepositoryDumper) CreateReleases(releases ...*base.Release) error { } for _, asset := range release.Assets { attachLocalPath := filepath.Join(attachDir, asset.Name) - // download attachment + // SECURITY: We cannot check the DownloadURL and DownloadFunc are safe here + // ... we must assume that they are safe and simply download the attachment + // download attachment err := func(attachPath string) error { var rc io.ReadCloser var err error @@ -317,7 +320,7 @@ func (g *RepositoryDumper) CreateReleases(releases ...*base.Release) error { fw, err := os.Create(attachPath) if err != nil { - return fmt.Errorf("Create: %v", err) + return fmt.Errorf("create: %w", err) } defer fw.Close() @@ -385,22 +388,7 @@ func (g *RepositoryDumper) createItems(dir string, itemFiles map[int64]*os.File, } for number, items := range itemsMap { - var err error - itemFile := itemFiles[number] - if itemFile == nil { - itemFile, err = os.Create(filepath.Join(dir, fmt.Sprintf("%d.yml", number))) - if err != nil { - return err - } - itemFiles[number] = itemFile - } - - bs, err := yaml.Marshal(items) - if err != nil { - return err - } - - if _, err := itemFile.Write(bs); err != nil { + if err := g.encodeItems(number, items, dir, itemFiles); err != nil { return err } } @@ -408,6 +396,23 @@ func (g *RepositoryDumper) createItems(dir string, itemFiles map[int64]*os.File, return nil } +func (g *RepositoryDumper) encodeItems(number int64, items []interface{}, dir string, itemFiles map[int64]*os.File) error { + itemFile := itemFiles[number] + if itemFile == nil { + var err error + itemFile, err = os.Create(filepath.Join(dir, fmt.Sprintf("%d.yml", number))) + if err != nil { + return err + } + itemFiles[number] = itemFile + } + + encoder := yaml.NewEncoder(itemFile) + defer encoder.Close() + + return encoder.Encode(items) +} + // CreateComments creates comments of issues func (g *RepositoryDumper) CreateComments(comments ...*base.Comment) error { commentsMap := make(map[int64][]interface{}, len(comments)) @@ -418,102 +423,175 @@ func (g *RepositoryDumper) CreateComments(comments ...*base.Comment) error { return g.createItems(g.commentDir(), g.commentFiles, commentsMap) } -// CreatePullRequests creates pull requests -func (g *RepositoryDumper) CreatePullRequests(prs ...*base.PullRequest) error { - for _, pr := range prs { - // download patch file - err := func() error { - u, err := g.setURLToken(pr.PatchURL) - if err != nil { - return err - } - resp, err := http.Get(u) - if err != nil { - return err - } - defer resp.Body.Close() - pullDir := filepath.Join(g.gitPath(), "pulls") - if err = os.MkdirAll(pullDir, os.ModePerm); err != nil { - return err - } - fPath := filepath.Join(pullDir, fmt.Sprintf("%d.patch", pr.Number)) - f, err := os.Create(fPath) - if err != nil { - return err - } - defer f.Close() - if _, err = io.Copy(f, resp.Body); err != nil { - return err - } - pr.PatchURL = "git/pulls/" + fmt.Sprintf("%d.patch", pr.Number) - - return nil - }() - if err != nil { - return err - } - - // set head information - pullHead := filepath.Join(g.gitPath(), "refs", "pull", fmt.Sprintf("%d", pr.Number)) - if err := os.MkdirAll(pullHead, os.ModePerm); err != nil { - return err - } - p, err := os.Create(filepath.Join(pullHead, "head")) - if err != nil { - return err - } - _, err = p.WriteString(pr.Head.SHA) - p.Close() - if err != nil { - return err - } - - if pr.IsForkPullRequest() && pr.State != "closed" { - if pr.Head.OwnerName != "" { - remote := pr.Head.OwnerName - _, ok := g.prHeadCache[remote] - if !ok { - // git remote add - // TODO: how to handle private CloneURL? - err := g.gitRepo.AddRemote(remote, pr.Head.CloneURL, true) - if err != nil { - log.Error("AddRemote failed: %s", err) - } else { - g.prHeadCache[remote] = struct{}{} - ok = true - } - } - - if ok { - _, _, err = git.NewCommand(g.ctx, "fetch", remote, pr.Head.Ref).RunStdString(&git.RunOpts{Dir: g.gitPath()}) - if err != nil { - log.Error("Fetch branch from %s failed: %v", pr.Head.CloneURL, err) - } else { - // a new branch name with will be created to as new head branch - ref := path.Join(pr.Head.OwnerName, pr.Head.Ref) - headBranch := filepath.Join(g.gitPath(), "refs", "heads", ref) - if err := os.MkdirAll(filepath.Dir(headBranch), os.ModePerm); err != nil { - return err - } - b, err := os.Create(headBranch) - if err != nil { - return err - } - _, err = b.WriteString(pr.Head.SHA) - b.Close() - if err != nil { - return err - } - pr.Head.Ref = ref - } - } - } - } - // whatever it's a forked repo PR, we have to change head info as the same as the base info - pr.Head.OwnerName = pr.Base.OwnerName - pr.Head.RepoName = pr.Base.RepoName +func (g *RepositoryDumper) handlePullRequest(pr *base.PullRequest) error { + // SECURITY: this pr must have been ensured safe + if !pr.EnsuredSafe { + log.Error("PR #%d in %s/%s has not been checked for safety ... We will ignore this.", pr.Number, g.repoOwner, g.repoName) + return fmt.Errorf("unsafe PR #%d", pr.Number) } + // First we download the patch file + err := func() error { + // if the patchURL is empty there is nothing to download + if pr.PatchURL == "" { + return nil + } + + // SECURITY: We will assume that the pr.PatchURL has been checked + // pr.PatchURL maybe a local file - but note EnsureSafe should be asserting that this safe + u, err := g.setURLToken(pr.PatchURL) + if err != nil { + return err + } + + // SECURITY: We will assume that the pr.PatchURL has been checked + // pr.PatchURL maybe a local file - but note EnsureSafe should be asserting that this safe + resp, err := http.Get(u) // TODO: This probably needs to use the downloader as there may be rate limiting issues here + if err != nil { + return err + } + defer resp.Body.Close() + pullDir := filepath.Join(g.gitPath(), "pulls") + if err = os.MkdirAll(pullDir, os.ModePerm); err != nil { + return err + } + fPath := filepath.Join(pullDir, fmt.Sprintf("%d.patch", pr.Number)) + f, err := os.Create(fPath) + if err != nil { + return err + } + defer f.Close() + + // TODO: Should there be limits on the size of this file? + if _, err = io.Copy(f, resp.Body); err != nil { + return err + } + pr.PatchURL = "git/pulls/" + fmt.Sprintf("%d.patch", pr.Number) + + return nil + }() + if err != nil { + log.Error("PR #%d in %s/%s unable to download patch: %v", pr.Number, g.repoOwner, g.repoName, err) + return err + } + + isFork := pr.IsForkPullRequest() + + // Even if it's a forked repo PR, we have to change head info as the same as the base info + oldHeadOwnerName := pr.Head.OwnerName + pr.Head.OwnerName, pr.Head.RepoName = pr.Base.OwnerName, pr.Base.RepoName + + if !isFork || pr.State == "closed" { + return nil + } + + // OK we want to fetch the current head as a branch from its CloneURL + + // 1. Is there a head clone URL available? + // 2. Is there a head ref available? + if pr.Head.CloneURL == "" || pr.Head.Ref == "" { + // Set head information if pr.Head.SHA is available + if pr.Head.SHA != "" { + _, _, err = git.NewCommand(g.ctx, "update-ref", "--no-deref", pr.GetGitRefName(), pr.Head.SHA).RunStdString(&git.RunOpts{Dir: g.gitPath()}) + if err != nil { + log.Error("PR #%d in %s/%s unable to update-ref for pr HEAD: %v", pr.Number, g.repoOwner, g.repoName, err) + } + } + return nil + } + + // 3. We need to create a remote for this clone url + // ... maybe we already have a name for this remote + remote, ok := g.prHeadCache[pr.Head.CloneURL+":"] + if !ok { + // ... let's try ownername as a reasonable name + remote = oldHeadOwnerName + if !git.IsValidRefPattern(remote) { + // ... let's try something less nice + remote = "head-pr-" + strconv.FormatInt(pr.Number, 10) + } + // ... now add the remote + err := g.gitRepo.AddRemote(remote, pr.Head.CloneURL, true) + if err != nil { + log.Error("PR #%d in %s/%s AddRemote[%s] failed: %v", pr.Number, g.repoOwner, g.repoName, remote, err) + } else { + g.prHeadCache[pr.Head.CloneURL+":"] = remote + ok = true + } + } + if !ok { + // Set head information if pr.Head.SHA is available + if pr.Head.SHA != "" { + _, _, err = git.NewCommand(g.ctx, "update-ref", "--no-deref", pr.GetGitRefName(), pr.Head.SHA).RunStdString(&git.RunOpts{Dir: g.gitPath()}) + if err != nil { + log.Error("PR #%d in %s/%s unable to update-ref for pr HEAD: %v", pr.Number, g.repoOwner, g.repoName, err) + } + } + + return nil + } + + // 4. Check if we already have this ref? + localRef, ok := g.prHeadCache[pr.Head.CloneURL+":"+pr.Head.Ref] + if !ok { + // ... We would normally name this migrated branch as / but we need to ensure that is safe + localRef = git.SanitizeRefPattern(oldHeadOwnerName + "/" + pr.Head.Ref) + + // ... Now we must assert that this does not exist + if g.gitRepo.IsBranchExist(localRef) { + localRef = "head-pr-" + strconv.FormatInt(pr.Number, 10) + "/" + localRef + i := 0 + for g.gitRepo.IsBranchExist(localRef) { + if i > 5 { + // ... We tried, we really tried but this is just a seriously unfriendly repo + return fmt.Errorf("unable to create unique local reference from %s", pr.Head.Ref) + } + // OK just try some uuids! + localRef = git.SanitizeRefPattern("head-pr-" + strconv.FormatInt(pr.Number, 10) + uuid.New().String()) + i++ + } + } + + fetchArg := pr.Head.Ref + ":" + git.BranchPrefix + localRef + if strings.HasPrefix(fetchArg, "-") { + fetchArg = git.BranchPrefix + fetchArg + } + + _, _, err = git.NewCommand(g.ctx, "fetch", "--no-tags", "--", remote, fetchArg).RunStdString(&git.RunOpts{Dir: g.gitPath()}) + if err != nil { + log.Error("Fetch branch from %s failed: %v", pr.Head.CloneURL, err) + // We need to continue here so that the Head.Ref is reset and we attempt to set the gitref for the PR + // (This last step will likely fail but we should try to do as much as we can.) + } else { + // Cache the localRef as the Head.Ref - if we've failed we can always try again. + g.prHeadCache[pr.Head.CloneURL+":"+pr.Head.Ref] = localRef + } + } + + // Set the pr.Head.Ref to the localRef + pr.Head.Ref = localRef + + // 5. Now if pr.Head.SHA == "" we should recover this to the head of this branch + if pr.Head.SHA == "" { + headSha, err := g.gitRepo.GetBranchCommitID(localRef) + if err != nil { + log.Error("unable to get head SHA of local head for PR #%d from %s in %s/%s. Error: %v", pr.Number, pr.Head.Ref, g.repoOwner, g.repoName, err) + return nil + } + pr.Head.SHA = headSha + } + if pr.Head.SHA != "" { + _, _, err = git.NewCommand(g.ctx, "update-ref", "--no-deref", pr.GetGitRefName(), pr.Head.SHA).RunStdString(&git.RunOpts{Dir: g.gitPath()}) + if err != nil { + log.Error("unable to set %s as the local head for PR #%d from %s in %s/%s. Error: %v", pr.Head.SHA, pr.Number, pr.Head.Ref, g.repoOwner, g.repoName, err) + } + } + + return nil +} + +// CreatePullRequests creates pull requests +func (g *RepositoryDumper) CreatePullRequests(prs ...*base.PullRequest) error { var err error if g.pullrequestFile == nil { if err := os.MkdirAll(g.baseDir, os.ModePerm); err != nil { @@ -525,16 +603,22 @@ func (g *RepositoryDumper) CreatePullRequests(prs ...*base.PullRequest) error { } } - bs, err := yaml.Marshal(prs) - if err != nil { - return err - } + encoder := yaml.NewEncoder(g.pullrequestFile) + defer encoder.Close() - if _, err := g.pullrequestFile.Write(bs); err != nil { - return err + count := 0 + for i := 0; i < len(prs); i++ { + pr := prs[i] + if err := g.handlePullRequest(pr); err != nil { + log.Error("PR #%d in %s/%s failed - skipping", pr.Number, g.repoOwner, g.repoName, err) + continue + } + prs[count] = pr + count++ } + prs = prs[:count] - return nil + return encoder.Encode(prs) } // CreateReviews create pull request reviews diff --git a/services/migrations/gitbucket.go b/services/migrations/gitbucket.go index 92b6cac738..21d8c672dd 100644 --- a/services/migrations/gitbucket.go +++ b/services/migrations/gitbucket.go @@ -6,9 +6,11 @@ package migrations import ( "context" + "fmt" "net/url" "strings" + "code.gitea.io/gitea/modules/log" base "code.gitea.io/gitea/modules/migration" "code.gitea.io/gitea/modules/structs" ) @@ -37,6 +39,7 @@ func (f *GitBucketDownloaderFactory) New(ctx context.Context, opts base.MigrateO oldOwner := fields[1] oldName := strings.TrimSuffix(fields[2], ".git") + log.Trace("Create GitBucket downloader. BaseURL: %s RepoOwner: %s RepoName: %s", baseURL, oldOwner, oldName) return NewGitBucketDownloader(ctx, baseURL, opts.AuthUsername, opts.AuthPassword, opts.AuthToken, oldOwner, oldName), nil } @@ -51,6 +54,20 @@ type GitBucketDownloader struct { *GithubDownloaderV3 } +// String implements Stringer +func (g *GitBucketDownloader) String() string { + return fmt.Sprintf("migration from gitbucket server %s %s/%s", g.baseURL, g.repoOwner, g.repoName) +} + +// ColorFormat provides a basic color format for a GitBucketDownloader +func (g *GitBucketDownloader) ColorFormat(s fmt.State) { + if g == nil { + log.ColorFprintf(s, "") + return + } + log.ColorFprintf(s, "migration from gitbucket server %s %s/%s", g.baseURL, g.repoOwner, g.repoName) +} + // NewGitBucketDownloader creates a GitBucket downloader func NewGitBucketDownloader(ctx context.Context, baseURL, userName, password, token, repoOwner, repoName string) *GitBucketDownloader { githubDownloader := NewGithubDownloaderV3(ctx, baseURL, userName, password, token, repoOwner, repoName) diff --git a/services/migrations/gitea_downloader.go b/services/migrations/gitea_downloader.go index f05ee0cb7d..c52f302691 100644 --- a/services/migrations/gitea_downloader.go +++ b/services/migrations/gitea_downloader.go @@ -14,7 +14,6 @@ import ( "strings" "time" - admin_model "code.gitea.io/gitea/models/admin" "code.gitea.io/gitea/modules/log" base "code.gitea.io/gitea/modules/migration" "code.gitea.io/gitea/modules/structs" @@ -71,6 +70,7 @@ type GiteaDownloader struct { base.NullDownloader ctx context.Context client *gitea_sdk.Client + baseURL string repoOwner string repoName string pagination bool @@ -117,6 +117,7 @@ func NewGiteaDownloader(ctx context.Context, baseURL, repoPath, username, passwo return &GiteaDownloader{ ctx: ctx, client: giteaClient, + baseURL: baseURL, repoOwner: path[0], repoName: path[1], pagination: paginationSupport, @@ -129,6 +130,20 @@ func (g *GiteaDownloader) SetContext(ctx context.Context) { g.ctx = ctx } +// String implements Stringer +func (g *GiteaDownloader) String() string { + return fmt.Sprintf("migration from gitea server %s %s/%s", g.baseURL, g.repoOwner, g.repoName) +} + +// ColorFormat provides a basic color format for a GiteaDownloader +func (g *GiteaDownloader) ColorFormat(s fmt.State) { + if g == nil { + log.ColorFprintf(s, "") + return + } + log.ColorFprintf(s, "migration from gitea server %s %s/%s", g.baseURL, g.repoOwner, g.repoName) +} + // GetRepoInfo returns a repository information func (g *GiteaDownloader) GetRepoInfo() (*base.Repository, error) { if g == nil { @@ -284,6 +299,12 @@ func (g *GiteaDownloader) convertGiteaRelease(rel *gitea_sdk.Release) *base.Rele if err != nil { return nil, err } + + if !hasBaseURL(asset.DownloadURL, g.baseURL) { + WarnAndNotice("Unexpected AssetURL for assetID[%d] in %s: %s", asset.ID, g, asset.DownloadURL) + return io.NopCloser(strings.NewReader(asset.DownloadURL)), nil + } + // FIXME: for a private download? req, err := http.NewRequest("GET", asset.DownloadURL, nil) if err != nil { @@ -403,11 +424,7 @@ func (g *GiteaDownloader) GetIssues(page, perPage int) ([]*base.Issue, bool, err reactions, err := g.getIssueReactions(issue.Index) if err != nil { - log.Warn("Unable to load reactions during migrating issue #%d to %s/%s. Error: %v", issue.Index, g.repoOwner, g.repoName, err) - if err2 := admin_model.CreateRepositoryNotice( - fmt.Sprintf("Unable to load reactions during migrating issue #%d to %s/%s. Error: %v", issue.Index, g.repoOwner, g.repoName, err)); err2 != nil { - log.Error("create repository notice failed: ", err2) - } + WarnAndNotice("Unable to load reactions during migrating issue #%d in %s. Error: %v", issue.Index, g, err) } var assignees []string @@ -465,11 +482,7 @@ func (g *GiteaDownloader) GetComments(commentable base.Commentable) ([]*base.Com for _, comment := range comments { reactions, err := g.getCommentReactions(comment.ID) if err != nil { - log.Warn("Unable to load comment reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", commentable.GetForeignIndex(), comment.ID, g.repoOwner, g.repoName, err) - if err2 := admin_model.CreateRepositoryNotice( - fmt.Sprintf("Unable to load reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", commentable.GetForeignIndex(), comment.ID, g.repoOwner, g.repoName, err)); err2 != nil { - log.Error("create repository notice failed: ", err2) - } + WarnAndNotice("Unable to load comment reactions during migrating issue #%d for comment %d in %s. Error: %v", commentable.GetForeignIndex(), comment.ID, g, err) } allComments = append(allComments, &base.Comment{ @@ -544,11 +557,7 @@ func (g *GiteaDownloader) GetPullRequests(page, perPage int) ([]*base.PullReques reactions, err := g.getIssueReactions(pr.Index) if err != nil { - log.Warn("Unable to load reactions during migrating pull #%d to %s/%s. Error: %v", pr.Index, g.repoOwner, g.repoName, err) - if err2 := admin_model.CreateRepositoryNotice( - fmt.Sprintf("Unable to load reactions during migrating pull #%d to %s/%s. Error: %v", pr.Index, g.repoOwner, g.repoName, err)); err2 != nil { - log.Error("create repository notice failed: ", err2) - } + WarnAndNotice("Unable to load reactions during migrating pull #%d in %s. Error: %v", pr.Index, g, err) } var assignees []string @@ -605,6 +614,8 @@ func (g *GiteaDownloader) GetPullRequests(page, perPage int) ([]*base.PullReques }, ForeignIndex: pr.Index, }) + // SECURITY: Ensure that the PR is safe + _ = CheckAndEnsureSafePR(allPRs[len(allPRs)-1], g.baseURL, g) } isEnd := len(prs) < perPage diff --git a/services/migrations/gitea_uploader.go b/services/migrations/gitea_uploader.go index 79b22bef78..5bf77e6332 100644 --- a/services/migrations/gitea_uploader.go +++ b/services/migrations/gitea_uploader.go @@ -10,6 +10,7 @@ import ( "fmt" "io" "os" + "path" "path/filepath" "strconv" "strings" @@ -32,7 +33,7 @@ import ( "code.gitea.io/gitea/modules/uri" "code.gitea.io/gitea/services/pull" - gouuid "github.com/google/uuid" + "github.com/google/uuid" ) var _ base.Uploader = &GiteaLocalUploader{} @@ -48,7 +49,7 @@ type GiteaLocalUploader struct { milestones map[string]int64 issues map[int64]*issues_model.Issue gitRepo *git.Repository - prHeadCache map[string]struct{} + prHeadCache map[string]string sameApp bool userMap map[int64]int64 // external user id mapping to user id prCache map[int64]*issues_model.PullRequest @@ -65,7 +66,7 @@ func NewGiteaLocalUploader(ctx context.Context, doer *user_model.User, repoOwner labels: make(map[string]*issues_model.Label), milestones: make(map[string]int64), issues: make(map[int64]*issues_model.Issue), - prHeadCache: make(map[string]struct{}), + prHeadCache: make(map[string]string), userMap: make(map[int64]int64), prCache: make(map[int64]*issues_model.PullRequest), } @@ -125,7 +126,7 @@ func (g *GiteaLocalUploader) CreateRepo(repo *base.Repository, opts base.Migrate Mirror: repo.IsMirror, LFS: opts.LFS, LFSEndpoint: opts.LFSEndpoint, - CloneAddr: repo.CloneURL, + CloneAddr: repo.CloneURL, // SECURITY: we will assume that this has already been checked Private: repo.IsPrivate, Wiki: opts.Wiki, Releases: opts.Releases, // if didn't get releases, then sync them from tags @@ -150,13 +151,15 @@ func (g *GiteaLocalUploader) Close() { // CreateTopics creates topics func (g *GiteaLocalUploader) CreateTopics(topics ...string) error { - // ignore topics to long for the db + // Ignore topics too long for the db c := 0 - for i := range topics { - if len(topics[i]) <= 50 { - topics[c] = topics[i] - c++ + for _, topic := range topics { + if len(topic) > 50 { + continue } + + topics[c] = topic + c++ } topics = topics[:c] return repo_model.SaveTopics(g.repo.ID, topics...) @@ -217,11 +220,17 @@ func (g *GiteaLocalUploader) CreateMilestones(milestones ...*base.Milestone) err func (g *GiteaLocalUploader) CreateLabels(labels ...*base.Label) error { lbs := make([]*issues_model.Label, 0, len(labels)) for _, label := range labels { + // We must validate color here: + if !issues_model.LabelColorPattern.MatchString("#" + label.Color) { + log.Warn("Invalid label color: #%s for label: %s in migration to %s/%s", label.Color, label.Name, g.repoOwner, g.repoName) + label.Color = "ffffff" + } + lbs = append(lbs, &issues_model.Label{ RepoID: g.repo.ID, Name: label.Name, Description: label.Description, - Color: fmt.Sprintf("#%s", label.Color), + Color: "#" + label.Color, }) } @@ -247,6 +256,16 @@ func (g *GiteaLocalUploader) CreateReleases(releases ...*base.Release) error { } } + // SECURITY: The TagName must be a valid git ref + if release.TagName != "" && !git.IsValidRefPattern(release.TagName) { + release.TagName = "" + } + + // SECURITY: The TargetCommitish must be a valid git ref + if release.TargetCommitish != "" && !git.IsValidRefPattern(release.TargetCommitish) { + release.TargetCommitish = "" + } + rel := repo_model.Release{ RepoID: g.repo.ID, TagName: release.TagName, @@ -288,14 +307,15 @@ func (g *GiteaLocalUploader) CreateReleases(releases ...*base.Release) error { } } attach := repo_model.Attachment{ - UUID: gouuid.New().String(), + UUID: uuid.New().String(), Name: asset.Name, DownloadCount: int64(*asset.DownloadCount), Size: int64(*asset.Size), CreatedUnix: timeutil.TimeStamp(asset.Created.Unix()), } - // download attachment + // SECURITY: We cannot check the DownloadURL and DownloadFunc are safe here + // ... we must assume that they are safe and simply download the attachment err := func() error { // asset.DownloadURL maybe a local file var rc io.ReadCloser @@ -365,6 +385,12 @@ func (g *GiteaLocalUploader) CreateIssues(issues ...*base.Issue) error { } } + // SECURITY: issue.Ref needs to be a valid reference + if !git.IsValidRefPattern(issue.Ref) { + log.Warn("Invalid issue.Ref[%s] in issue #%d in %s/%s", issue.Ref, issue.Number, g.repoOwner, g.repoName) + issue.Ref = "" + } + is := issues_model.Issue{ RepoID: g.repo.ID, Repo: g.repo, @@ -496,102 +522,151 @@ func (g *GiteaLocalUploader) CreatePullRequests(prs ...*base.PullRequest) error } func (g *GiteaLocalUploader) updateGitForPullRequest(pr *base.PullRequest) (head string, err error) { - // download patch file + // SECURITY: this pr must have been must have been ensured safe + if !pr.EnsuredSafe { + log.Error("PR #%d in %s/%s has not been checked for safety.", pr.Number, g.repoOwner, g.repoName) + return "", fmt.Errorf("the PR[%d] was not checked for safety", pr.Number) + } + + // Anonymous function to download the patch file (allows us to use defer) err = func() error { + // if the patchURL is empty there is nothing to download if pr.PatchURL == "" { return nil } - // pr.PatchURL maybe a local file - ret, err := uri.Open(pr.PatchURL) + + // SECURITY: We will assume that the pr.PatchURL has been checked + // pr.PatchURL maybe a local file - but note EnsureSafe should be asserting that this safe + ret, err := uri.Open(pr.PatchURL) // TODO: This probably needs to use the downloader as there may be rate limiting issues here if err != nil { return err } defer ret.Close() + pullDir := filepath.Join(g.repo.RepoPath(), "pulls") if err = os.MkdirAll(pullDir, os.ModePerm); err != nil { return err } + f, err := os.Create(filepath.Join(pullDir, fmt.Sprintf("%d.patch", pr.Number))) if err != nil { return err } defer f.Close() + + // TODO: Should there be limits on the size of this file? _, err = io.Copy(f, ret) + return err }() if err != nil { return "", err } - // set head information - pullHead := filepath.Join(g.repo.RepoPath(), "refs", "pull", fmt.Sprintf("%d", pr.Number)) - if err := os.MkdirAll(pullHead, os.ModePerm); err != nil { - return "", err - } - p, err := os.Create(filepath.Join(pullHead, "head")) - if err != nil { - return "", err - } - _, err = p.WriteString(pr.Head.SHA) - p.Close() - if err != nil { - return "", err - } - head = "unknown repository" if pr.IsForkPullRequest() && pr.State != "closed" { - if pr.Head.OwnerName != "" { - remote := pr.Head.OwnerName - _, ok := g.prHeadCache[remote] - if !ok { - // git remote add - err := g.gitRepo.AddRemote(remote, pr.Head.CloneURL, true) - if err != nil { - log.Error("AddRemote failed: %s", err) - } else { - g.prHeadCache[remote] = struct{}{} - ok = true + // OK we want to fetch the current head as a branch from its CloneURL + + // 1. Is there a head clone URL available? + // 2. Is there a head ref available? + if pr.Head.CloneURL == "" || pr.Head.Ref == "" { + return head, nil + } + + // 3. We need to create a remote for this clone url + // ... maybe we already have a name for this remote + remote, ok := g.prHeadCache[pr.Head.CloneURL+":"] + if !ok { + // ... let's try ownername as a reasonable name + remote = pr.Head.OwnerName + if !git.IsValidRefPattern(remote) { + // ... let's try something less nice + remote = "head-pr-" + strconv.FormatInt(pr.Number, 10) + } + // ... now add the remote + err := g.gitRepo.AddRemote(remote, pr.Head.CloneURL, true) + if err != nil { + log.Error("PR #%d in %s/%s AddRemote[%s] failed: %v", pr.Number, g.repoOwner, g.repoName, remote, err) + } else { + g.prHeadCache[pr.Head.CloneURL+":"] = remote + ok = true + } + } + if !ok { + return head, nil + } + + // 4. Check if we already have this ref? + localRef, ok := g.prHeadCache[pr.Head.CloneURL+":"+pr.Head.Ref] + if !ok { + // ... We would normally name this migrated branch as / but we need to ensure that is safe + localRef = git.SanitizeRefPattern(pr.Head.OwnerName + "/" + pr.Head.Ref) + + // ... Now we must assert that this does not exist + if g.gitRepo.IsBranchExist(localRef) { + localRef = "head-pr-" + strconv.FormatInt(pr.Number, 10) + "/" + localRef + i := 0 + for g.gitRepo.IsBranchExist(localRef) { + if i > 5 { + // ... We tried, we really tried but this is just a seriously unfriendly repo + return head, nil + } + // OK just try some uuids! + localRef = git.SanitizeRefPattern("head-pr-" + strconv.FormatInt(pr.Number, 10) + uuid.New().String()) + i++ } } - if ok { - _, _, err = git.NewCommand(g.ctx, "fetch", "--no-tags", "--", remote, pr.Head.Ref).RunStdString(&git.RunOpts{Dir: g.repo.RepoPath()}) - if err != nil { - log.Error("Fetch branch from %s failed: %v", pr.Head.CloneURL, err) - } else { - headBranch := filepath.Join(g.repo.RepoPath(), "refs", "heads", pr.Head.OwnerName, pr.Head.Ref) - if err := os.MkdirAll(filepath.Dir(headBranch), os.ModePerm); err != nil { - return "", err - } - b, err := os.Create(headBranch) - if err != nil { - return "", err - } - _, err = b.WriteString(pr.Head.SHA) - b.Close() - if err != nil { - return "", err - } - head = pr.Head.OwnerName + "/" + pr.Head.Ref - } + fetchArg := pr.Head.Ref + ":" + git.BranchPrefix + localRef + if strings.HasPrefix(fetchArg, "-") { + fetchArg = git.BranchPrefix + fetchArg } + + _, _, err = git.NewCommand(g.ctx, "fetch", "--no-tags", "--", remote, fetchArg).RunStdString(&git.RunOpts{Dir: g.repo.RepoPath()}) + if err != nil { + log.Error("Fetch branch from %s failed: %v", pr.Head.CloneURL, err) + return head, nil + } + g.prHeadCache[pr.Head.CloneURL+":"+pr.Head.Ref] = localRef + head = localRef } - } else { + + // 5. Now if pr.Head.SHA == "" we should recover this to the head of this branch + if pr.Head.SHA == "" { + headSha, err := g.gitRepo.GetBranchCommitID(localRef) + if err != nil { + log.Error("unable to get head SHA of local head for PR #%d from %s in %s/%s. Error: %v", pr.Number, pr.Head.Ref, g.repoOwner, g.repoName, err) + return head, nil + } + pr.Head.SHA = headSha + } + + _, _, err = git.NewCommand(g.ctx, "update-ref", "--no-deref", pr.GetGitRefName(), pr.Head.SHA).RunStdString(&git.RunOpts{Dir: g.repo.RepoPath()}) + if err != nil { + return "", err + } + + return head, nil + } + + if pr.Head.Ref != "" { head = pr.Head.Ref - // Ensure the closed PR SHA still points to an existing ref + } + + // Ensure the closed PR SHA still points to an existing ref + if pr.Head.SHA == "" { + // The SHA is empty + log.Warn("Empty reference, no pull head for PR #%d in %s/%s", pr.Number, g.repoOwner, g.repoName) + } else { _, _, err = git.NewCommand(g.ctx, "rev-list", "--quiet", "-1", pr.Head.SHA).RunStdString(&git.RunOpts{Dir: g.repo.RepoPath()}) if err != nil { - if pr.Head.SHA != "" { - // Git update-ref remove bad references with a relative path - log.Warn("Deprecated local head, removing : %v", pr.Head.SHA) - err = g.gitRepo.RemoveReference(pr.GetGitRefName()) - } else { - // The SHA is empty, remove the head file - log.Warn("Empty reference, removing : %v", pullHead) - err = os.Remove(filepath.Join(pullHead, "head")) - } + // Git update-ref remove bad references with a relative path + log.Warn("Deprecated local head %s for PR #%d in %s/%s, removing %s", pr.Head.SHA, pr.Number, g.repoOwner, g.repoName, pr.GetGitRefName()) + } else { + // set head information + _, _, err = git.NewCommand(g.ctx, "update-ref", "--no-deref", pr.GetGitRefName(), pr.Head.SHA).RunStdString(&git.RunOpts{Dir: g.repo.RepoPath()}) if err != nil { - log.Error("Cannot remove local head ref, %v", err) + log.Error("unable to set %s as the local head for PR #%d from %s in %s/%s. Error: %v", pr.Head.SHA, pr.Number, pr.Head.Ref, g.repoOwner, g.repoName, err) } } } @@ -615,6 +690,20 @@ func (g *GiteaLocalUploader) newPullRequest(pr *base.PullRequest) (*issues_model return nil, fmt.Errorf("updateGitForPullRequest: %w", err) } + // Now we may need to fix the mergebase + if pr.Base.SHA == "" { + if pr.Base.Ref != "" && pr.Head.SHA != "" { + // A PR against a tag base does not make sense - therefore pr.Base.Ref must be a branch + // TODO: should we be checking for the refs/heads/ prefix on the pr.Base.Ref? (i.e. are these actually branches or refs) + pr.Base.SHA, _, err = g.gitRepo.GetMergeBase("", git.BranchPrefix+pr.Base.Ref, pr.Head.SHA) + if err != nil { + log.Error("Cannot determine the merge base for PR #%d in %s/%s. Error: %v", pr.Number, g.repoOwner, g.repoName, err) + } + } else { + log.Error("Cannot determine the merge base for PR #%d in %s/%s. Not enough information", pr.Number, g.repoOwner, g.repoName) + } + } + if pr.Created.IsZero() { if pr.Closed != nil { pr.Created = *pr.Closed @@ -728,6 +817,8 @@ func (g *GiteaLocalUploader) CreateReviews(reviews ...*base.Review) error { return err } + cms = append(cms, &cm) + // get pr pr, ok := g.prCache[issue.ID] if !ok { @@ -738,6 +829,17 @@ func (g *GiteaLocalUploader) CreateReviews(reviews ...*base.Review) error { } g.prCache[issue.ID] = pr } + if pr.MergeBase == "" { + // No mergebase -> no basis for any patches + log.Warn("PR #%d in %s/%s: does not have a merge base, all review comments will be ignored", pr.Index, g.repoOwner, g.repoName) + continue + } + + headCommitID, err := g.gitRepo.GetRefCommitID(pr.GetGitRefName()) + if err != nil { + log.Warn("PR #%d GetRefCommitID[%s] in %s/%s: %v, all review comments will be ignored", pr.Index, pr.GetGitRefName(), g.repoOwner, g.repoName, err) + continue + } for _, comment := range review.Comments { line := comment.Line @@ -746,11 +848,9 @@ func (g *GiteaLocalUploader) CreateReviews(reviews ...*base.Review) error { } else { _, _, line, _ = git.ParseDiffHunkString(comment.DiffHunk) } - headCommitID, err := g.gitRepo.GetRefCommitID(pr.GetGitRefName()) - if err != nil { - log.Warn("GetRefCommitID[%s]: %v, the review comment will be ignored", pr.GetGitRefName(), err) - continue - } + + // SECURITY: The TreePath must be cleaned! + comment.TreePath = path.Clean("/" + comment.TreePath)[1:] var patch string reader, writer := io.Pipe() @@ -775,6 +875,11 @@ func (g *GiteaLocalUploader) CreateReviews(reviews ...*base.Review) error { comment.UpdatedAt = comment.CreatedAt } + if !git.IsValidSHAPattern(comment.CommitID) { + log.Warn("Invalid comment CommitID[%s] on comment[%d] in PR #%d of %s/%s replaced with %s", comment.CommitID, pr.Index, g.repoOwner, g.repoName, headCommitID) + comment.CommitID = headCommitID + } + c := issues_model.Comment{ Type: issues_model.CommentTypeCode, IssueID: issue.ID, @@ -793,8 +898,6 @@ func (g *GiteaLocalUploader) CreateReviews(reviews ...*base.Review) error { cm.Comments = append(cm.Comments, &c) } - - cms = append(cms, &cm) } return issues_model.InsertReviews(cms) diff --git a/services/migrations/gitea_uploader_test.go b/services/migrations/gitea_uploader_test.go index 715dbf7a5c..af6230decb 100644 --- a/services/migrations/gitea_uploader_test.go +++ b/services/migrations/gitea_uploader_test.go @@ -390,7 +390,7 @@ func TestGiteaUploadUpdateGitForPullRequest(t *testing.T) { }, }, assertContent: func(t *testing.T, content string) { - assert.Contains(t, content, "AddRemote failed") + assert.Contains(t, content, "AddRemote") }, }, { @@ -439,7 +439,7 @@ func TestGiteaUploadUpdateGitForPullRequest(t *testing.T) { }, }, assertContent: func(t *testing.T, content string) { - assert.Contains(t, content, "Empty reference, removing") + assert.Contains(t, content, "Empty reference") assert.NotContains(t, content, "Cannot remove local head") }, }, @@ -467,7 +467,6 @@ func TestGiteaUploadUpdateGitForPullRequest(t *testing.T) { }, assertContent: func(t *testing.T, content string) { assert.Contains(t, content, "Deprecated local head") - assert.Contains(t, content, "Cannot remove local head") }, }, { @@ -504,6 +503,8 @@ func TestGiteaUploadUpdateGitForPullRequest(t *testing.T) { logger.SetLogger("buffer", "buffer", "{}") defer logger.DelLogger("buffer") + testCase.pr.EnsuredSafe = true + head, err := uploader.updateGitForPullRequest(&testCase.pr) assert.NoError(t, err) assert.EqualValues(t, testCase.head, head) diff --git a/services/migrations/github.go b/services/migrations/github.go index 5f5b430fa9..0ffdbb042a 100644 --- a/services/migrations/github.go +++ b/services/migrations/github.go @@ -51,7 +51,7 @@ func (f *GithubDownloaderV3Factory) New(ctx context.Context, opts base.MigrateOp oldOwner := fields[1] oldName := strings.TrimSuffix(fields[2], ".git") - log.Trace("Create github downloader: %s/%s", oldOwner, oldName) + log.Trace("Create github downloader BaseURL: %s %s/%s", baseURL, oldOwner, oldName) return NewGithubDownloaderV3(ctx, baseURL, opts.AuthUsername, opts.AuthPassword, opts.AuthToken, oldOwner, oldName), nil } @@ -67,6 +67,7 @@ type GithubDownloaderV3 struct { base.NullDownloader ctx context.Context clients []*github.Client + baseURL string repoOwner string repoName string userName string @@ -81,6 +82,7 @@ type GithubDownloaderV3 struct { func NewGithubDownloaderV3(ctx context.Context, baseURL, userName, password, token, repoOwner, repoName string) *GithubDownloaderV3 { downloader := GithubDownloaderV3{ userName: userName, + baseURL: baseURL, password: password, ctx: ctx, repoOwner: repoOwner, @@ -118,6 +120,20 @@ func NewGithubDownloaderV3(ctx context.Context, baseURL, userName, password, tok return &downloader } +// String implements Stringer +func (g *GithubDownloaderV3) String() string { + return fmt.Sprintf("migration from github server %s %s/%s", g.baseURL, g.repoOwner, g.repoName) +} + +// ColorFormat provides a basic color format for a GithubDownloader +func (g *GithubDownloaderV3) ColorFormat(s fmt.State) { + if g == nil { + log.ColorFprintf(s, "") + return + } + log.ColorFprintf(s, "migration from github server %s %s/%s", g.baseURL, g.repoOwner, g.repoName) +} + func (g *GithubDownloaderV3) addClient(client *http.Client, baseURL string) { githubClient := github.NewClient(client) if baseURL != "https://github.com" { @@ -322,33 +338,44 @@ func (g *GithubDownloaderV3) convertGithubRelease(rel *github.RepositoryRelease) Updated: asset.UpdatedAt.Time, DownloadFunc: func() (io.ReadCloser, error) { g.waitAndPickClient() - asset, redirectURL, err := g.getClient().Repositories.DownloadReleaseAsset(g.ctx, g.repoOwner, g.repoName, assetID, nil) + readCloser, redirectURL, err := g.getClient().Repositories.DownloadReleaseAsset(g.ctx, g.repoOwner, g.repoName, assetID, nil) if err != nil { return nil, err } if err := g.RefreshRate(); err != nil { log.Error("g.getClient().RateLimits: %s", err) } - if asset == nil { - if redirectURL != "" { - g.waitAndPickClient() - req, err := http.NewRequestWithContext(g.ctx, "GET", redirectURL, nil) - if err != nil { - return nil, err - } - resp, err := httpClient.Do(req) - err1 := g.RefreshRate() - if err1 != nil { - log.Error("g.getClient().RateLimits: %s", err1) - } - if err != nil { - return nil, err - } - return resp.Body, nil - } - return nil, fmt.Errorf("No release asset found for %d", assetID) + + if readCloser != nil { + return readCloser, nil } - return asset, nil + + if redirectURL == "" { + return nil, fmt.Errorf("no release asset found for %d", assetID) + } + + // Prevent open redirect + if !hasBaseURL(redirectURL, g.baseURL) && + !hasBaseURL(redirectURL, "https://objects.githubusercontent.com/") { + WarnAndNotice("Unexpected AssetURL for assetID[%d] in %s: %s", asset.GetID(), g, redirectURL) + + return io.NopCloser(strings.NewReader(redirectURL)), nil + } + + g.waitAndPickClient() + req, err := http.NewRequestWithContext(g.ctx, "GET", redirectURL, nil) + if err != nil { + return nil, err + } + resp, err := httpClient.Do(req) + err1 := g.RefreshRate() + if err1 != nil { + log.Error("g.RefreshRate(): %s", err1) + } + if err != nil { + return nil, err + } + return resp.Body, nil }, }) } @@ -697,7 +724,7 @@ func (g *GithubDownloaderV3) GetPullRequests(page, perPage int) ([]*base.PullReq SHA: pr.GetHead().GetSHA(), OwnerName: pr.GetHead().GetUser().GetLogin(), RepoName: pr.GetHead().GetRepo().GetName(), - CloneURL: pr.GetHead().GetRepo().GetCloneURL(), + CloneURL: pr.GetHead().GetRepo().GetCloneURL(), // see below for SECURITY related issues here }, Base: base.PullRequestBranch{ Ref: pr.GetBase().GetRef(), @@ -705,10 +732,13 @@ func (g *GithubDownloaderV3) GetPullRequests(page, perPage int) ([]*base.PullReq RepoName: pr.GetBase().GetRepo().GetName(), OwnerName: pr.GetBase().GetUser().GetLogin(), }, - PatchURL: pr.GetPatchURL(), + PatchURL: pr.GetPatchURL(), // see below for SECURITY related issues here Reactions: reactions, ForeignIndex: int64(*pr.Number), }) + + // SECURITY: Ensure that the PR is safe + _ = CheckAndEnsureSafePR(allPRs[len(allPRs)-1], g.baseURL, g) } return allPRs, len(prs) < perPage, nil diff --git a/services/migrations/gitlab.go b/services/migrations/gitlab.go index 1919ad4dc6..95bec59e83 100644 --- a/services/migrations/gitlab.go +++ b/services/migrations/gitlab.go @@ -63,6 +63,7 @@ type GitlabDownloader struct { base.NullDownloader ctx context.Context client *gitlab.Client + baseURL string repoID int repoName string issueCount int64 @@ -125,12 +126,27 @@ func NewGitlabDownloader(ctx context.Context, baseURL, repoPath, username, passw return &GitlabDownloader{ ctx: ctx, client: gitlabClient, + baseURL: baseURL, repoID: gr.ID, repoName: gr.Name, maxPerPage: 100, }, nil } +// String implements Stringer +func (g *GitlabDownloader) String() string { + return fmt.Sprintf("migration from gitlab server %s [%d]/%s", g.baseURL, g.repoID, g.repoName) +} + +// ColorFormat provides a basic color format for a GitlabDownloader +func (g *GitlabDownloader) ColorFormat(s fmt.State) { + if g == nil { + log.ColorFprintf(s, "") + return + } + log.ColorFprintf(s, "migration from gitlab server %s [%d]/%s", g.baseURL, g.repoID, g.repoName) +} + // SetContext set context func (g *GitlabDownloader) SetContext(ctx context.Context) { g.ctx = ctx @@ -308,6 +324,11 @@ func (g *GitlabDownloader) convertGitlabRelease(rel *gitlab.Release) *base.Relea return nil, err } + if !hasBaseURL(link.URL, g.baseURL) { + WarnAndNotice("Unexpected AssetURL for assetID[%d] in %s: %s", asset.ID, g, link.URL) + return io.NopCloser(strings.NewReader(link.URL)), nil + } + req, err := http.NewRequest("GET", link.URL, nil) if err != nil { return nil, err @@ -612,6 +633,9 @@ func (g *GitlabDownloader) GetPullRequests(page, perPage int) ([]*base.PullReque ForeignIndex: int64(pr.IID), Context: gitlabIssueContext{IsMergeRequest: true}, }) + + // SECURITY: Ensure that the PR is safe + _ = CheckAndEnsureSafePR(allPRs[len(allPRs)-1], g.baseURL, g) } return allPRs, len(prs) < perPage, nil diff --git a/services/migrations/gogs.go b/services/migrations/gogs.go index a28033218e..46cc3ca416 100644 --- a/services/migrations/gogs.go +++ b/services/migrations/gogs.go @@ -73,6 +73,20 @@ type GogsDownloader struct { transport http.RoundTripper } +// String implements Stringer +func (g *GogsDownloader) String() string { + return fmt.Sprintf("migration from gogs server %s %s/%s", g.baseURL, g.repoOwner, g.repoName) +} + +// ColorFormat provides a basic color format for a GogsDownloader +func (g *GogsDownloader) ColorFormat(s fmt.State) { + if g == nil { + log.ColorFprintf(s, "") + return + } + log.ColorFprintf(s, "migration from gogs server %s %s/%s", g.baseURL, g.repoOwner, g.repoName) +} + // SetContext set context func (g *GogsDownloader) SetContext(ctx context.Context) { g.ctx = ctx diff --git a/services/migrations/migrate.go b/services/migrations/migrate.go index a5715072c5..040f0aebb1 100644 --- a/services/migrations/migrate.go +++ b/services/migrations/migrate.go @@ -198,19 +198,25 @@ func migrateRepository(doer *user_model.User, downloader base.Downloader, upload return err } - // If the downloader is not a RepositoryRestorer then we need to recheck the CloneURL + // SECURITY: If the downloader is not a RepositoryRestorer then we need to recheck the CloneURL if _, ok := downloader.(*RepositoryRestorer); !ok { // Now the clone URL can be rewritten by the downloader so we must recheck if err := IsMigrateURLAllowed(repo.CloneURL, doer); err != nil { return err } - // And so can the original URL too so again we must recheck - if repo.OriginalURL != "" { - if err := IsMigrateURLAllowed(repo.OriginalURL, doer); err != nil { - return err + // SECURITY: Ensure that we haven't been redirected from an external to a local filesystem + // Now we know all of these must parse + cloneAddrURL, _ := url.Parse(opts.CloneAddr) + cloneURL, _ := url.Parse(repo.CloneURL) + + if cloneURL.Scheme == "file" || cloneURL.Scheme == "" { + if cloneAddrURL.Scheme != "file" && cloneAddrURL.Scheme != "" { + return fmt.Errorf("repo info has changed from external to local filesystem") } } + + // We don't actually need to check the OriginalURL as it isn't used anywhere } log.Trace("migrating git data from %s", repo.CloneURL) diff --git a/services/migrations/onedev.go b/services/migrations/onedev.go index a46ba35f72..8cc826c3b4 100644 --- a/services/migrations/onedev.go +++ b/services/migrations/onedev.go @@ -110,6 +110,20 @@ func NewOneDevDownloader(ctx context.Context, baseURL *url.URL, username, passwo return downloader } +// String implements Stringer +func (d *OneDevDownloader) String() string { + return fmt.Sprintf("migration from oneDev server %s [%d]/%s", d.baseURL, d.repoID, d.repoName) +} + +// ColorFormat provides a basic color format for a OneDevDownloader +func (d *OneDevDownloader) ColorFormat(s fmt.State) { + if d == nil { + log.ColorFprintf(s, "") + return + } + log.ColorFprintf(s, "migration from oneDev server %s [%d]/%s", d.baseURL, d.repoID, d.repoName) +} + func (d *OneDevDownloader) callAPI(endpoint string, parameter map[string]string, result interface{}) error { u, err := d.baseURL.Parse(endpoint) if err != nil { @@ -542,6 +556,9 @@ func (d *OneDevDownloader) GetPullRequests(page, perPage int) ([]*base.PullReque ForeignIndex: pr.ID, Context: onedevIssueContext{IsPullRequest: true}, }) + + // SECURITY: Ensure that the PR is safe + _ = CheckAndEnsureSafePR(pullRequests[len(pullRequests)-1], d.baseURL.String(), d) } return pullRequests, len(pullRequests) == 0, nil diff --git a/services/migrations/restore.go b/services/migrations/restore.go index 8c9654a7e3..c3fbcbb25f 100644 --- a/services/migrations/restore.go +++ b/services/migrations/restore.go @@ -243,6 +243,7 @@ func (r *RepositoryRestorer) GetPullRequests(page, perPage int) ([]*base.PullReq } for _, pr := range pulls { pr.PatchURL = "file://" + filepath.Join(r.baseDir, pr.PatchURL) + CheckAndEnsureSafePR(pr, "", r) } return pulls, true, nil } diff --git a/services/release/release.go b/services/release/release.go index e2e8c13699..ae610b0e3c 100644 --- a/services/release/release.go +++ b/services/release/release.go @@ -37,6 +37,9 @@ func createTag(gitRepo *git.Repository, rel *repo_model.Release, msg string) (bo if err != nil { return false, fmt.Errorf("GetProtectedTags: %v", err) } + + // Trim '--' prefix to prevent command line argument vulnerability. + rel.TagName = strings.TrimPrefix(rel.TagName, "--") isAllowed, err := git_model.IsUserAllowedToControlTag(protectedTags, rel.TagName, rel.PublisherID) if err != nil { return false, err @@ -52,8 +55,6 @@ func createTag(gitRepo *git.Repository, rel *repo_model.Release, msg string) (bo return false, fmt.Errorf("createTag::GetCommit[%v]: %v", rel.Target, err) } - // Trim '--' prefix to prevent command line argument vulnerability. - rel.TagName = strings.TrimPrefix(rel.TagName, "--") if len(msg) > 0 { if err = gitRepo.CreateAnnotatedTag(rel.TagName, msg, commit.ID.String()); err != nil { if strings.Contains(err.Error(), "is not a valid tag name") { @@ -308,7 +309,7 @@ func DeleteReleaseByID(ctx context.Context, id int64, doer *user_model.User, del } } - if stdout, _, err := git.NewCommand(ctx, "tag", "-d", rel.TagName). + if stdout, _, err := git.NewCommand(ctx, "tag", "-d", "--", rel.TagName). SetDescription(fmt.Sprintf("DeleteReleaseByID (git tag -d): %d", rel.ID)). RunStdString(&git.RunOpts{Dir: repo.RepoPath()}); err != nil && !strings.Contains(err.Error(), "not found") { log.Error("DeleteReleaseByID (git tag -d): %d in %v Failed:\nStdout: %s\nError: %v", rel.ID, repo, stdout, err)