From 6e2a59e4ceb89c4e369a5ff1cac95c31f7e7ecd6 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 2 Jul 2019 04:15:14 +0200 Subject: [PATCH] Use commit graph files for listing pages (#7314) * Experimental support for git commit graph files and bloom filter index Signed-off-by: Filip Navara * Force vendor of commitgraph Signed-off-by: Filip Navara * Remove bloom filter experiment and debug prints * Remove old code for building commit graphs * Remove unused function * Remove mmap usage * gofmt * sort vendor/modules.txt * Add copyright header and log commit-graph error --- modules/git/commit_info.go | 38 ++- modules/git/notes.go | 12 +- modules/git/repo_commitgraph.go | 35 +++ .../format/commitgraph/commitgraph.go | 35 +++ .../plumbing/format/commitgraph/doc.go | 103 +++++++ .../plumbing/format/commitgraph/encoder.go | 190 +++++++++++++ .../plumbing/format/commitgraph/file.go | 259 ++++++++++++++++++ .../plumbing/format/commitgraph/memory.go | 72 +++++ .../plumbing/object/commitgraph/commitnode.go | 98 +++++++ .../object/commitgraph/commitnode_graph.go | 131 +++++++++ .../object/commitgraph/commitnode_object.go | 90 ++++++ .../commitgraph/commitnode_walker_ctime.go | 105 +++++++ .../plumbing/object/commitgraph/doc.go | 7 + vendor/modules.txt | 4 +- 14 files changed, 1166 insertions(+), 13 deletions(-) create mode 100644 modules/git/repo_commitgraph.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/commitgraph.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/doc.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/encoder.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/file.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/memory.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_graph.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_object.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_walker_ctime.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/doc.go diff --git a/modules/git/commit_info.go b/modules/git/commit_info.go index 9270878c7f..8417226f8b 100644 --- a/modules/git/commit_info.go +++ b/modules/git/commit_info.go @@ -8,6 +8,7 @@ import ( "github.com/emirpasic/gods/trees/binaryheap" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/object" + cgobject "gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph" ) // GetCommitsInfo gets information of all commits that are corresponding to these entries @@ -19,7 +20,12 @@ func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCom entryPaths[i+1] = entry.Name() } - c, err := commit.repo.gogitRepo.CommitObject(plumbing.Hash(commit.ID)) + commitNodeIndex, commitGraphFile := commit.repo.CommitNodeIndex() + if commitGraphFile != nil { + defer commitGraphFile.Close() + } + + c, err := commitNodeIndex.Get(plumbing.Hash(commit.ID)) if err != nil { return nil, nil, err } @@ -69,14 +75,14 @@ func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCom } type commitAndPaths struct { - commit *object.Commit + commit cgobject.CommitNode // Paths that are still on the branch represented by commit paths []string // Set of hashes for the paths hashes map[string]plumbing.Hash } -func getCommitTree(c *object.Commit, treePath string) (*object.Tree, error) { +func getCommitTree(c cgobject.CommitNode, treePath string) (*object.Tree, error) { tree, err := c.Tree() if err != nil { return nil, err @@ -93,7 +99,7 @@ func getCommitTree(c *object.Commit, treePath string) (*object.Tree, error) { return tree, nil } -func getFileHashes(c *object.Commit, treePath string, paths []string) (map[string]plumbing.Hash, error) { +func getFileHashes(c cgobject.CommitNode, treePath string, paths []string) (map[string]plumbing.Hash, error) { tree, err := getCommitTree(c, treePath) if err == object.ErrDirectoryNotFound { // The whole tree didn't exist, so return empty map @@ -118,16 +124,16 @@ func getFileHashes(c *object.Commit, treePath string, paths []string) (map[strin return hashes, nil } -func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (map[string]*object.Commit, error) { +func getLastCommitForPaths(c cgobject.CommitNode, treePath string, paths []string) (map[string]*object.Commit, error) { // We do a tree traversal with nodes sorted by commit time heap := binaryheap.NewWith(func(a, b interface{}) int { - if a.(*commitAndPaths).commit.Committer.When.Before(b.(*commitAndPaths).commit.Committer.When) { + if a.(*commitAndPaths).commit.CommitTime().Before(b.(*commitAndPaths).commit.CommitTime()) { return 1 } return -1 }) - result := make(map[string]*object.Commit) + resultNodes := make(map[string]cgobject.CommitNode) initialHashes, err := getFileHashes(c, treePath, paths) if err != nil { return nil, err @@ -145,9 +151,9 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m // Load the parent commits for the one we are currently examining numParents := current.commit.NumParents() - var parents []*object.Commit + var parents []cgobject.CommitNode for i := 0; i < numParents; i++ { - parent, err := current.commit.Parent(i) + parent, err := current.commit.ParentNode(i) if err != nil { break } @@ -174,7 +180,7 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m for i, path := range current.paths { // The results could already contain some newer change for the same path, // so don't override that and bail out on the file early. - if result[path] == nil { + if resultNodes[path] == nil { if pathUnchanged[i] { // The path existed with the same hash in at least one parent so it could // not have been changed in this commit directly. @@ -188,7 +194,7 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m // - We are looking at a merge commit and the hash of the file doesn't // match any of the hashes being merged. This is more common for directories, // but it can also happen if a file is changed through conflict resolution. - result[path] = current.commit + resultNodes[path] = current.commit } } } @@ -222,5 +228,15 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m } } + // Post-processing + result := make(map[string]*object.Commit) + for path, commitNode := range resultNodes { + var err error + result[path], err = commitNode.Commit() + if err != nil { + return nil, err + } + } + return result, nil } diff --git a/modules/git/notes.go b/modules/git/notes.go index 7aa5d89a79..a62c558787 100644 --- a/modules/git/notes.go +++ b/modules/git/notes.go @@ -50,7 +50,17 @@ func GetNote(repo *Repository, commitID string, note *Note) error { return err } - lastCommits, err := getLastCommitForPaths(commit, "", []string{commitID}) + commitNodeIndex, commitGraphFile := repo.CommitNodeIndex() + if commitGraphFile != nil { + defer commitGraphFile.Close() + } + + commitNode, err := commitNodeIndex.Get(commit.Hash) + if err != nil { + return nil + } + + lastCommits, err := getLastCommitForPaths(commitNode, "", []string{commitID}) if err != nil { return err } diff --git a/modules/git/repo_commitgraph.go b/modules/git/repo_commitgraph.go new file mode 100644 index 0000000000..52263852dc --- /dev/null +++ b/modules/git/repo_commitgraph.go @@ -0,0 +1,35 @@ +// Copyright 2019 The Gitea Authors. +// All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package git + +import ( + "os" + "path" + + gitealog "code.gitea.io/gitea/modules/log" + "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" + cgobject "gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph" +) + +// CommitNodeIndex returns the index for walking commit graph +func (r *Repository) CommitNodeIndex() (cgobject.CommitNodeIndex, *os.File) { + indexPath := path.Join(r.Path, "objects", "info", "commit-graph") + + file, err := os.Open(indexPath) + if err == nil { + var index commitgraph.Index + index, err = commitgraph.OpenFileIndex(file) + if err == nil { + return cgobject.NewGraphCommitNodeIndex(index, r.gogitRepo.Storer), file + } + } + + if !os.IsNotExist(err) { + gitealog.Warn("Unable to read commit-graph for %s: %v", r.Path, err) + } + + return cgobject.NewObjectCommitNodeIndex(r.gogitRepo.Storer), nil +} diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/commitgraph.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/commitgraph.go new file mode 100644 index 0000000000..e43cd8978a --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/commitgraph.go @@ -0,0 +1,35 @@ +package commitgraph + +import ( + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" +) + +// CommitData is a reduced representation of Commit as presented in the commit graph +// file. It is merely useful as an optimization for walking the commit graphs. +type CommitData struct { + // TreeHash is the hash of the root tree of the commit. + TreeHash plumbing.Hash + // ParentIndexes are the indexes of the parent commits of the commit. + ParentIndexes []int + // ParentHashes are the hashes of the parent commits of the commit. + ParentHashes []plumbing.Hash + // Generation number is the pre-computed generation in the commit graph + // or zero if not available + Generation int + // When is the timestamp of the commit. + When time.Time +} + +// Index represents a representation of commit graph that allows indexed +// access to the nodes using commit object hash +type Index interface { + // GetIndexByHash gets the index in the commit graph from commit hash, if available + GetIndexByHash(h plumbing.Hash) (int, error) + // GetNodeByIndex gets the commit node from the commit graph using index + // obtained from child node, if available + GetCommitDataByIndex(i int) (*CommitData, error) + // Hashes returns all the hashes that are available in the index + Hashes() []plumbing.Hash +} diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/doc.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/doc.go new file mode 100644 index 0000000000..41cd8b1e31 --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/doc.go @@ -0,0 +1,103 @@ +// Package commitgraph implements encoding and decoding of commit-graph files. +// +// Git commit graph format +// ======================= +// +// The Git commit graph stores a list of commit OIDs and some associated +// metadata, including: +// +// - The generation number of the commit. Commits with no parents have +// generation number 1; commits with parents have generation number +// one more than the maximum generation number of its parents. We +// reserve zero as special, and can be used to mark a generation +// number invalid or as "not computed". +// +// - The root tree OID. +// +// - The commit date. +// +// - The parents of the commit, stored using positional references within +// the graph file. +// +// These positional references are stored as unsigned 32-bit integers +// corresponding to the array position within the list of commit OIDs. Due +// to some special constants we use to track parents, we can store at most +// (1 << 30) + (1 << 29) + (1 << 28) - 1 (around 1.8 billion) commits. +// +// == Commit graph files have the following format: +// +// In order to allow extensions that add extra data to the graph, we organize +// the body into "chunks" and provide a binary lookup table at the beginning +// of the body. The header includes certain values, such as number of chunks +// and hash type. +// +// All 4-byte numbers are in network order. +// +// HEADER: +// +// 4-byte signature: +// The signature is: {'C', 'G', 'P', 'H'} +// +// 1-byte version number: +// Currently, the only valid version is 1. +// +// 1-byte Hash Version (1 = SHA-1) +// We infer the hash length (H) from this value. +// +// 1-byte number (C) of "chunks" +// +// 1-byte (reserved for later use) +// Current clients should ignore this value. +// +// CHUNK LOOKUP: +// +// (C + 1) * 12 bytes listing the table of contents for the chunks: +// First 4 bytes describe the chunk id. Value 0 is a terminating label. +// Other 8 bytes provide the byte-offset in current file for chunk to +// start. (Chunks are ordered contiguously in the file, so you can infer +// the length using the next chunk position if necessary.) Each chunk +// ID appears at most once. +// +// The remaining data in the body is described one chunk at a time, and +// these chunks may be given in any order. Chunks are required unless +// otherwise specified. +// +// CHUNK DATA: +// +// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes) +// The ith entry, F[i], stores the number of OIDs with first +// byte at most i. Thus F[255] stores the total +// number of commits (N). +// +// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes) +// The OIDs for all commits in the graph, sorted in ascending order. +// +// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes) +// * The first H bytes are for the OID of the root tree. +// * The next 8 bytes are for the positions of the first two parents +// of the ith commit. Stores value 0x7000000 if no parent in that +// position. If there are more than two parents, the second value +// has its most-significant bit on and the other bits store an array +// position into the Extra Edge List chunk. +// * The next 8 bytes store the generation number of the commit and +// the commit time in seconds since EPOCH. The generation number +// uses the higher 30 bits of the first 4 bytes, while the commit +// time uses the 32 bits of the second 4 bytes, along with the lowest +// 2 bits of the lowest byte, storing the 33rd and 34th bit of the +// commit time. +// +// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] +// This list of 4-byte values store the second through nth parents for +// all octopus merges. The second parent value in the commit data stores +// an array position within this list along with the most-significant bit +// on. Starting at that array position, iterate through this list of commit +// positions for the parents until reaching a value with the most-significant +// bit on. The other bits correspond to the position of the last parent. +// +// TRAILER: +// +// H-byte HASH-checksum of all of the above. +// +// Source: +// https://raw.githubusercontent.com/git/git/master/Documentation/technical/commit-graph-format.txt +package commitgraph diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/encoder.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/encoder.go new file mode 100644 index 0000000000..a06871cb7c --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/encoder.go @@ -0,0 +1,190 @@ +package commitgraph + +import ( + "crypto/sha1" + "hash" + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +// Encoder writes MemoryIndex structs to an output stream. +type Encoder struct { + io.Writer + hash hash.Hash +} + +// NewEncoder returns a new stream encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + h := sha1.New() + mw := io.MultiWriter(w, h) + return &Encoder{mw, h} +} + +// Encode writes an index into the commit-graph file +func (e *Encoder) Encode(idx Index) error { + var err error + + // Get all the hashes in the input index + hashes := idx.Hashes() + + // Sort the inout and prepare helper structures we'll need for encoding + hashToIndex, fanout, extraEdgesCount := e.prepare(idx, hashes) + + chunkSignatures := [][]byte{oidFanoutSignature, oidLookupSignature, commitDataSignature} + chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * 20, uint64(len(hashes)) * 36} + if extraEdgesCount > 0 { + chunkSignatures = append(chunkSignatures, extraEdgeListSignature) + chunkSizes = append(chunkSizes, uint64(extraEdgesCount)*4) + } + + if err = e.encodeFileHeader(len(chunkSignatures)); err != nil { + return err + } + if err = e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil { + return err + } + if err = e.encodeFanout(fanout); err != nil { + return err + } + if err = e.encodeOidLookup(hashes); err != nil { + return err + } + if extraEdges, err := e.encodeCommitData(hashes, hashToIndex, idx); err == nil { + if err = e.encodeExtraEdges(extraEdges); err != nil { + return err + } + } + if err != nil { + return err + } + return e.encodeChecksum() +} + +func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[plumbing.Hash]uint32, fanout []uint32, extraEdgesCount uint32) { + // Sort the hashes and build our index + plumbing.HashesSort(hashes) + hashToIndex = make(map[plumbing.Hash]uint32) + fanout = make([]uint32, 256) + for i, hash := range hashes { + hashToIndex[hash] = uint32(i) + fanout[hash[0]]++ + } + + // Convert the fanout to cumulative values + for i := 1; i <= 0xff; i++ { + fanout[i] += fanout[i-1] + } + + // Find out if we will need extra edge table + for i := 0; i < len(hashes); i++ { + v, _ := idx.GetCommitDataByIndex(i) + if len(v.ParentHashes) > 2 { + extraEdgesCount += uint32(len(v.ParentHashes) - 1) + break + } + } + + return +} + +func (e *Encoder) encodeFileHeader(chunkCount int) (err error) { + if _, err = e.Write(commitFileSignature); err == nil { + _, err = e.Write([]byte{1, 1, byte(chunkCount), 0}) + } + return +} + +func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) { + // 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator + offset := uint64(8 + len(chunkSignatures)*12 + 12) + for i, signature := range chunkSignatures { + if _, err = e.Write(signature); err == nil { + err = binary.WriteUint64(e, offset) + } + if err != nil { + return + } + offset += chunkSizes[i] + } + if _, err = e.Write(lastSignature); err == nil { + err = binary.WriteUint64(e, offset) + } + return +} + +func (e *Encoder) encodeFanout(fanout []uint32) (err error) { + for i := 0; i <= 0xff; i++ { + if err = binary.WriteUint32(e, fanout[i]); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) { + for _, hash := range hashes { + if _, err = e.Write(hash[:]); err != nil { + return err + } + } + return +} + +func (e *Encoder) encodeCommitData(hashes []plumbing.Hash, hashToIndex map[plumbing.Hash]uint32, idx Index) (extraEdges []uint32, err error) { + for _, hash := range hashes { + origIndex, _ := idx.GetIndexByHash(hash) + commitData, _ := idx.GetCommitDataByIndex(origIndex) + if _, err = e.Write(commitData.TreeHash[:]); err != nil { + return + } + + var parent1, parent2 uint32 + if len(commitData.ParentHashes) == 0 { + parent1 = parentNone + parent2 = parentNone + } else if len(commitData.ParentHashes) == 1 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = parentNone + } else if len(commitData.ParentHashes) == 2 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = hashToIndex[commitData.ParentHashes[1]] + } else if len(commitData.ParentHashes) > 2 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = uint32(len(extraEdges)) | parentOctopusUsed + for _, parentHash := range commitData.ParentHashes[1:] { + extraEdges = append(extraEdges, hashToIndex[parentHash]) + } + extraEdges[len(extraEdges)-1] |= parentLast + } + + if err = binary.WriteUint32(e, parent1); err == nil { + err = binary.WriteUint32(e, parent2) + } + if err != nil { + return + } + + unixTime := uint64(commitData.When.Unix()) + unixTime |= uint64(commitData.Generation) << 34 + if err = binary.WriteUint64(e, unixTime); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeExtraEdges(extraEdges []uint32) (err error) { + for _, parent := range extraEdges { + if err = binary.WriteUint32(e, parent); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeChecksum() error { + _, err := e.Write(e.hash.Sum(nil)[:20]) + return err +} diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/file.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/file.go new file mode 100644 index 0000000000..175d279333 --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/file.go @@ -0,0 +1,259 @@ +package commitgraph + +import ( + "bytes" + encbin "encoding/binary" + "errors" + "io" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +var ( + // ErrUnsupportedVersion is returned by OpenFileIndex when the commit graph + // file version is not supported. + ErrUnsupportedVersion = errors.New("Unsupported version") + // ErrUnsupportedHash is returned by OpenFileIndex when the commit graph + // hash function is not supported. Currently only SHA-1 is defined and + // supported + ErrUnsupportedHash = errors.New("Unsupported hash algorithm") + // ErrMalformedCommitGraphFile is returned by OpenFileIndex when the commit + // graph file is corrupted. + ErrMalformedCommitGraphFile = errors.New("Malformed commit graph file") + + commitFileSignature = []byte{'C', 'G', 'P', 'H'} + oidFanoutSignature = []byte{'O', 'I', 'D', 'F'} + oidLookupSignature = []byte{'O', 'I', 'D', 'L'} + commitDataSignature = []byte{'C', 'D', 'A', 'T'} + extraEdgeListSignature = []byte{'E', 'D', 'G', 'E'} + lastSignature = []byte{0, 0, 0, 0} + + parentNone = uint32(0x70000000) + parentOctopusUsed = uint32(0x80000000) + parentOctopusMask = uint32(0x7fffffff) + parentLast = uint32(0x80000000) +) + +type fileIndex struct { + reader io.ReaderAt + fanout [256]int + oidFanoutOffset int64 + oidLookupOffset int64 + commitDataOffset int64 + extraEdgeListOffset int64 +} + +// OpenFileIndex opens a serialized commit graph file in the format described at +// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt +func OpenFileIndex(reader io.ReaderAt) (Index, error) { + fi := &fileIndex{reader: reader} + + if err := fi.verifyFileHeader(); err != nil { + return nil, err + } + if err := fi.readChunkHeaders(); err != nil { + return nil, err + } + if err := fi.readFanout(); err != nil { + return nil, err + } + + return fi, nil +} + +func (fi *fileIndex) verifyFileHeader() error { + // Verify file signature + var signature = make([]byte, 4) + if _, err := fi.reader.ReadAt(signature, 0); err != nil { + return err + } + if !bytes.Equal(signature, commitFileSignature) { + return ErrMalformedCommitGraphFile + } + + // Read and verify the file header + var header = make([]byte, 4) + if _, err := fi.reader.ReadAt(header, 4); err != nil { + return err + } + if header[0] != 1 { + return ErrUnsupportedVersion + } + if header[1] != 1 { + return ErrUnsupportedHash + } + + return nil +} + +func (fi *fileIndex) readChunkHeaders() error { + var chunkID = make([]byte, 4) + for i := 0; ; i++ { + chunkHeader := io.NewSectionReader(fi.reader, 8+(int64(i)*12), 12) + if _, err := io.ReadAtLeast(chunkHeader, chunkID, 4); err != nil { + return err + } + chunkOffset, err := binary.ReadUint64(chunkHeader) + if err != nil { + return err + } + + if bytes.Equal(chunkID, oidFanoutSignature) { + fi.oidFanoutOffset = int64(chunkOffset) + } else if bytes.Equal(chunkID, oidLookupSignature) { + fi.oidLookupOffset = int64(chunkOffset) + } else if bytes.Equal(chunkID, commitDataSignature) { + fi.commitDataOffset = int64(chunkOffset) + } else if bytes.Equal(chunkID, extraEdgeListSignature) { + fi.extraEdgeListOffset = int64(chunkOffset) + } else if bytes.Equal(chunkID, lastSignature) { + break + } + } + + if fi.oidFanoutOffset <= 0 || fi.oidLookupOffset <= 0 || fi.commitDataOffset <= 0 { + return ErrMalformedCommitGraphFile + } + + return nil +} + +func (fi *fileIndex) readFanout() error { + fanoutReader := io.NewSectionReader(fi.reader, fi.oidFanoutOffset, 256*4) + for i := 0; i < 256; i++ { + fanoutValue, err := binary.ReadUint32(fanoutReader) + if err != nil { + return err + } + if fanoutValue > 0x7fffffff { + return ErrMalformedCommitGraphFile + } + fi.fanout[i] = int(fanoutValue) + } + return nil +} + +func (fi *fileIndex) GetIndexByHash(h plumbing.Hash) (int, error) { + var oid plumbing.Hash + + // Find the hash in the oid lookup table + var low int + if h[0] == 0 { + low = 0 + } else { + low = fi.fanout[h[0]-1] + } + high := fi.fanout[h[0]] + for low < high { + mid := (low + high) >> 1 + offset := fi.oidLookupOffset + int64(mid)*20 + if _, err := fi.reader.ReadAt(oid[:], offset); err != nil { + return 0, err + } + cmp := bytes.Compare(h[:], oid[:]) + if cmp < 0 { + high = mid + } else if cmp == 0 { + return mid, nil + } else { + low = mid + 1 + } + } + + return 0, plumbing.ErrObjectNotFound +} + +func (fi *fileIndex) GetCommitDataByIndex(idx int) (*CommitData, error) { + if idx >= fi.fanout[0xff] { + return nil, plumbing.ErrObjectNotFound + } + + offset := fi.commitDataOffset + int64(idx)*36 + commitDataReader := io.NewSectionReader(fi.reader, offset, 36) + + treeHash, err := binary.ReadHash(commitDataReader) + if err != nil { + return nil, err + } + parent1, err := binary.ReadUint32(commitDataReader) + if err != nil { + return nil, err + } + parent2, err := binary.ReadUint32(commitDataReader) + if err != nil { + return nil, err + } + genAndTime, err := binary.ReadUint64(commitDataReader) + if err != nil { + return nil, err + } + + var parentIndexes []int + if parent2&parentOctopusUsed == parentOctopusUsed { + // Octopus merge + parentIndexes = []int{int(parent1 & parentOctopusMask)} + offset := fi.extraEdgeListOffset + 4*int64(parent2&parentOctopusMask) + buf := make([]byte, 4) + for { + _, err := fi.reader.ReadAt(buf, offset) + if err != nil { + return nil, err + } + + parent := encbin.BigEndian.Uint32(buf) + offset += 4 + parentIndexes = append(parentIndexes, int(parent&parentOctopusMask)) + if parent&parentLast == parentLast { + break + } + } + } else if parent2 != parentNone { + parentIndexes = []int{int(parent1 & parentOctopusMask), int(parent2 & parentOctopusMask)} + } else if parent1 != parentNone { + parentIndexes = []int{int(parent1 & parentOctopusMask)} + } + + parentHashes, err := fi.getHashesFromIndexes(parentIndexes) + if err != nil { + return nil, err + } + + return &CommitData{ + TreeHash: treeHash, + ParentIndexes: parentIndexes, + ParentHashes: parentHashes, + Generation: int(genAndTime >> 34), + When: time.Unix(int64(genAndTime&0x3FFFFFFFF), 0), + }, nil +} + +func (fi *fileIndex) getHashesFromIndexes(indexes []int) ([]plumbing.Hash, error) { + hashes := make([]plumbing.Hash, len(indexes)) + + for i, idx := range indexes { + if idx >= fi.fanout[0xff] { + return nil, ErrMalformedCommitGraphFile + } + + offset := fi.oidLookupOffset + int64(idx)*20 + if _, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil { + return nil, err + } + } + + return hashes, nil +} + +// Hashes returns all the hashes that are available in the index +func (fi *fileIndex) Hashes() []plumbing.Hash { + hashes := make([]plumbing.Hash, fi.fanout[0xff]) + for i := 0; i < int(fi.fanout[0xff]); i++ { + offset := fi.oidLookupOffset + int64(i)*20 + if n, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil || n < 20 { + return nil + } + } + return hashes +} diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/memory.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/memory.go new file mode 100644 index 0000000000..a4a96e9612 --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/memory.go @@ -0,0 +1,72 @@ +package commitgraph + +import ( + "gopkg.in/src-d/go-git.v4/plumbing" +) + +// MemoryIndex provides a way to build the commit-graph in memory +// for later encoding to file. +type MemoryIndex struct { + commitData []*CommitData + indexMap map[plumbing.Hash]int +} + +// NewMemoryIndex creates in-memory commit graph representation +func NewMemoryIndex() *MemoryIndex { + return &MemoryIndex{ + indexMap: make(map[plumbing.Hash]int), + } +} + +// GetIndexByHash gets the index in the commit graph from commit hash, if available +func (mi *MemoryIndex) GetIndexByHash(h plumbing.Hash) (int, error) { + i, ok := mi.indexMap[h] + if ok { + return i, nil + } + + return 0, plumbing.ErrObjectNotFound +} + +// GetCommitDataByIndex gets the commit node from the commit graph using index +// obtained from child node, if available +func (mi *MemoryIndex) GetCommitDataByIndex(i int) (*CommitData, error) { + if int(i) >= len(mi.commitData) { + return nil, plumbing.ErrObjectNotFound + } + + commitData := mi.commitData[i] + + // Map parent hashes to parent indexes + if commitData.ParentIndexes == nil { + parentIndexes := make([]int, len(commitData.ParentHashes)) + for i, parentHash := range commitData.ParentHashes { + var err error + if parentIndexes[i], err = mi.GetIndexByHash(parentHash); err != nil { + return nil, err + } + } + commitData.ParentIndexes = parentIndexes + } + + return commitData, nil +} + +// Hashes returns all the hashes that are available in the index +func (mi *MemoryIndex) Hashes() []plumbing.Hash { + hashes := make([]plumbing.Hash, 0, len(mi.indexMap)) + for k := range mi.indexMap { + hashes = append(hashes, k) + } + return hashes +} + +// Add adds new node to the memory index +func (mi *MemoryIndex) Add(hash plumbing.Hash, commitData *CommitData) { + // The parent indexes are calculated lazily in GetNodeByIndex + // which allows adding nodes out of order as long as all parents + // are eventually resolved + commitData.ParentIndexes = nil + mi.indexMap[hash] = len(mi.commitData) + mi.commitData = append(mi.commitData, commitData) +} diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode.go new file mode 100644 index 0000000000..e218d3210b --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode.go @@ -0,0 +1,98 @@ +package commitgraph + +import ( + "io" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +// CommitNode is generic interface encapsulating a lightweight commit object retrieved +// from CommitNodeIndex +type CommitNode interface { + // ID returns the Commit object id referenced by the commit graph node. + ID() plumbing.Hash + // Tree returns the Tree referenced by the commit graph node. + Tree() (*object.Tree, error) + // CommitTime returns the Commiter.When time of the Commit referenced by the commit graph node. + CommitTime() time.Time + // NumParents returns the number of parents in a commit. + NumParents() int + // ParentNodes return a CommitNodeIter for parents of specified node. + ParentNodes() CommitNodeIter + // ParentNode returns the ith parent of a commit. + ParentNode(i int) (CommitNode, error) + // ParentHashes returns hashes of the parent commits for a specified node + ParentHashes() []plumbing.Hash + // Generation returns the generation of the commit for reachability analysis. + // Objects with newer generation are not reachable from objects of older generation. + Generation() uint64 + // Commit returns the full commit object from the node + Commit() (*object.Commit, error) +} + +// CommitNodeIndex is generic interface encapsulating an index of CommitNode objects +type CommitNodeIndex interface { + // Get returns a commit node from a commit hash + Get(hash plumbing.Hash) (CommitNode, error) +} + +// CommitNodeIter is a generic closable interface for iterating over commit nodes. +type CommitNodeIter interface { + Next() (CommitNode, error) + ForEach(func(CommitNode) error) error + Close() +} + +// parentCommitNodeIter provides an iterator for parent commits from associated CommitNodeIndex. +type parentCommitNodeIter struct { + node CommitNode + i int +} + +func newParentgraphCommitNodeIter(node CommitNode) CommitNodeIter { + return &parentCommitNodeIter{node, 0} +} + +// Next moves the iterator to the next commit and returns a pointer to it. If +// there are no more commits, it returns io.EOF. +func (iter *parentCommitNodeIter) Next() (CommitNode, error) { + obj, err := iter.node.ParentNode(iter.i) + if err == object.ErrParentNotFound { + return nil, io.EOF + } + if err == nil { + iter.i++ + } + + return obj, err +} + +// ForEach call the cb function for each commit contained on this iter until +// an error appends or the end of the iter is reached. If ErrStop is sent +// the iteration is stopped but no error is returned. The iterator is closed. +func (iter *parentCommitNodeIter) ForEach(cb func(CommitNode) error) error { + for { + obj, err := iter.Next() + if err != nil { + if err == io.EOF { + return nil + } + + return err + } + + if err := cb(obj); err != nil { + if err == storer.ErrStop { + return nil + } + + return err + } + } +} + +func (iter *parentCommitNodeIter) Close() { +} diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_graph.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_graph.go new file mode 100644 index 0000000000..bd54e18886 --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_graph.go @@ -0,0 +1,131 @@ +package commitgraph + +import ( + "fmt" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +// graphCommitNode is a reduced representation of Commit as presented in the commit +// graph file (commitgraph.Node). It is merely useful as an optimization for walking +// the commit graphs. +// +// graphCommitNode implements the CommitNode interface. +type graphCommitNode struct { + // Hash for the Commit object + hash plumbing.Hash + // Index of the node in the commit graph file + index int + + commitData *commitgraph.CommitData + gci *graphCommitNodeIndex +} + +// graphCommitNodeIndex is an index that can load CommitNode objects from both the commit +// graph files and the object store. +// +// graphCommitNodeIndex implements the CommitNodeIndex interface +type graphCommitNodeIndex struct { + commitGraph commitgraph.Index + s storer.EncodedObjectStorer +} + +// NewGraphCommitNodeIndex returns CommitNodeIndex implementation that uses commit-graph +// files as backing storage and falls back to object storage when necessary +func NewGraphCommitNodeIndex(commitGraph commitgraph.Index, s storer.EncodedObjectStorer) CommitNodeIndex { + return &graphCommitNodeIndex{commitGraph, s} +} + +func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { + // Check the commit graph first + parentIndex, err := gci.commitGraph.GetIndexByHash(hash) + if err == nil { + parent, err := gci.commitGraph.GetCommitDataByIndex(parentIndex) + if err != nil { + return nil, err + } + + return &graphCommitNode{ + hash: hash, + index: parentIndex, + commitData: parent, + gci: gci, + }, nil + } + + // Fallback to loading full commit object + commit, err := object.GetCommit(gci.s, hash) + if err != nil { + return nil, err + } + + return &objectCommitNode{ + nodeIndex: gci, + commit: commit, + }, nil +} + +func (c *graphCommitNode) ID() plumbing.Hash { + return c.hash +} + +func (c *graphCommitNode) Tree() (*object.Tree, error) { + return object.GetTree(c.gci.s, c.commitData.TreeHash) +} + +func (c *graphCommitNode) CommitTime() time.Time { + return c.commitData.When +} + +func (c *graphCommitNode) NumParents() int { + return len(c.commitData.ParentIndexes) +} + +func (c *graphCommitNode) ParentNodes() CommitNodeIter { + return newParentgraphCommitNodeIter(c) +} + +func (c *graphCommitNode) ParentNode(i int) (CommitNode, error) { + if i < 0 || i >= len(c.commitData.ParentIndexes) { + return nil, object.ErrParentNotFound + } + + parent, err := c.gci.commitGraph.GetCommitDataByIndex(c.commitData.ParentIndexes[i]) + if err != nil { + return nil, err + } + + return &graphCommitNode{ + hash: c.commitData.ParentHashes[i], + index: c.commitData.ParentIndexes[i], + commitData: parent, + gci: c.gci, + }, nil +} + +func (c *graphCommitNode) ParentHashes() []plumbing.Hash { + return c.commitData.ParentHashes +} + +func (c *graphCommitNode) Generation() uint64 { + // If the commit-graph file was generated with older Git version that + // set the generation to zero for every commit the generation assumption + // is still valid. It is just less useful. + return uint64(c.commitData.Generation) +} + +func (c *graphCommitNode) Commit() (*object.Commit, error) { + return object.GetCommit(c.gci.s, c.hash) +} + +func (c *graphCommitNode) String() string { + return fmt.Sprintf( + "%s %s\nDate: %s", + plumbing.CommitObject, c.ID(), + c.CommitTime().Format(object.DateFormat), + ) +} diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_object.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_object.go new file mode 100644 index 0000000000..2779a54bc7 --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_object.go @@ -0,0 +1,90 @@ +package commitgraph + +import ( + "math" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +// objectCommitNode is a representation of Commit as presented in the GIT object format. +// +// objectCommitNode implements the CommitNode interface. +type objectCommitNode struct { + nodeIndex CommitNodeIndex + commit *object.Commit +} + +// NewObjectCommitNodeIndex returns CommitNodeIndex implementation that uses +// only object storage to load the nodes +func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex { + return &objectCommitNodeIndex{s} +} + +func (oci *objectCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { + commit, err := object.GetCommit(oci.s, hash) + if err != nil { + return nil, err + } + + return &objectCommitNode{ + nodeIndex: oci, + commit: commit, + }, nil +} + +// objectCommitNodeIndex is an index that can load CommitNode objects only from the +// object store. +// +// objectCommitNodeIndex implements the CommitNodeIndex interface +type objectCommitNodeIndex struct { + s storer.EncodedObjectStorer +} + +func (c *objectCommitNode) CommitTime() time.Time { + return c.commit.Committer.When +} + +func (c *objectCommitNode) ID() plumbing.Hash { + return c.commit.ID() +} + +func (c *objectCommitNode) Tree() (*object.Tree, error) { + return c.commit.Tree() +} + +func (c *objectCommitNode) NumParents() int { + return c.commit.NumParents() +} + +func (c *objectCommitNode) ParentNodes() CommitNodeIter { + return newParentgraphCommitNodeIter(c) +} + +func (c *objectCommitNode) ParentNode(i int) (CommitNode, error) { + if i < 0 || i >= len(c.commit.ParentHashes) { + return nil, object.ErrParentNotFound + } + + // Note: It's necessary to go through CommitNodeIndex here to ensure + // that if the commit-graph file covers only part of the history we + // start using it when that part is reached. + return c.nodeIndex.Get(c.commit.ParentHashes[i]) +} + +func (c *objectCommitNode) ParentHashes() []plumbing.Hash { + return c.commit.ParentHashes +} + +func (c *objectCommitNode) Generation() uint64 { + // Commit nodes representing objects outside of the commit graph can never + // be reached by objects from the commit-graph thus we return the highest + // possible value. + return math.MaxUint64 +} + +func (c *objectCommitNode) Commit() (*object.Commit, error) { + return c.commit, nil +} diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_walker_ctime.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_walker_ctime.go new file mode 100644 index 0000000000..f6a1b6a4ef --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_walker_ctime.go @@ -0,0 +1,105 @@ +package commitgraph + +import ( + "io" + + "github.com/emirpasic/gods/trees/binaryheap" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +type commitNodeIteratorByCTime struct { + heap *binaryheap.Heap + seenExternal map[plumbing.Hash]bool + seen map[plumbing.Hash]bool +} + +// NewCommitNodeIterCTime returns a CommitNodeIter that walks the commit history, +// starting at the given commit and visiting its parents while preserving Committer Time order. +// this appears to be the closest order to `git log` +// The given callback will be called for each visited commit. Each commit will +// be visited only once. If the callback returns an error, walking will stop +// and will return the error. Other errors might be returned if the history +// cannot be traversed (e.g. missing objects). Ignore allows to skip some +// commits from being iterated. +func NewCommitNodeIterCTime( + c CommitNode, + seenExternal map[plumbing.Hash]bool, + ignore []plumbing.Hash, +) CommitNodeIter { + seen := make(map[plumbing.Hash]bool) + for _, h := range ignore { + seen[h] = true + } + + heap := binaryheap.NewWith(func(a, b interface{}) int { + if a.(CommitNode).CommitTime().Before(b.(CommitNode).CommitTime()) { + return 1 + } + return -1 + }) + + heap.Push(c) + + return &commitNodeIteratorByCTime{ + heap: heap, + seenExternal: seenExternal, + seen: seen, + } +} + +func (w *commitNodeIteratorByCTime) Next() (CommitNode, error) { + var c CommitNode + for { + cIn, ok := w.heap.Pop() + if !ok { + return nil, io.EOF + } + c = cIn.(CommitNode) + cID := c.ID() + + if w.seen[cID] || w.seenExternal[cID] { + continue + } + + w.seen[cID] = true + + for i, h := range c.ParentHashes() { + if w.seen[h] || w.seenExternal[h] { + continue + } + pc, err := c.ParentNode(i) + if err != nil { + return nil, err + } + w.heap.Push(pc) + } + + return c, nil + } +} + +func (w *commitNodeIteratorByCTime) ForEach(cb func(CommitNode) error) error { + for { + c, err := w.Next() + if err == io.EOF { + break + } + if err != nil { + return err + } + + err = cb(c) + if err == storer.ErrStop { + break + } + if err != nil { + return err + } + } + + return nil +} + +func (w *commitNodeIteratorByCTime) Close() {} diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/doc.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/doc.go new file mode 100644 index 0000000000..0a55ad5b01 --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/doc.go @@ -0,0 +1,7 @@ +// Package commitgraph provides an interface for efficient traversal over Git +// commit graph either through the regular object storage, or optionally with +// the index stored in commit-graph file (Git 2.18+). +// +// The API and functionality of this package are considered EXPERIMENTAL and is +// not considered stable nor production ready. +package commitgraph diff --git a/vendor/modules.txt b/vendor/modules.txt index fe21d938d3..70dff4c9ef 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -432,7 +432,9 @@ gopkg.in/src-d/go-git.v4/config gopkg.in/src-d/go-git.v4/plumbing gopkg.in/src-d/go-git.v4/plumbing/cache gopkg.in/src-d/go-git.v4/plumbing/filemode +gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph gopkg.in/src-d/go-git.v4/plumbing/object +gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph gopkg.in/src-d/go-git.v4/storage/filesystem gopkg.in/src-d/go-git.v4/internal/revision gopkg.in/src-d/go-git.v4/plumbing/format/gitignore @@ -455,8 +457,8 @@ gopkg.in/src-d/go-git.v4/utils/merkletrie/index gopkg.in/src-d/go-git.v4/utils/merkletrie/noder gopkg.in/src-d/go-git.v4/internal/url gopkg.in/src-d/go-git.v4/plumbing/format/config -gopkg.in/src-d/go-git.v4/plumbing/format/diff gopkg.in/src-d/go-git.v4/utils/binary +gopkg.in/src-d/go-git.v4/plumbing/format/diff gopkg.in/src-d/go-git.v4/plumbing/format/idxfile gopkg.in/src-d/go-git.v4/plumbing/format/objfile gopkg.in/src-d/go-git.v4/storage/filesystem/dotgit