package packfile import ( "sort" "sync" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/storer" ) const ( // deltas based on deltas, how many steps we can do. // 50 is the default value used in JGit maxDepth = int64(50) ) // applyDelta is the set of object types that we should apply deltas var applyDelta = map[plumbing.ObjectType]bool{ plumbing.BlobObject: true, plumbing.TreeObject: true, } type deltaSelector struct { storer storer.EncodedObjectStorer } func newDeltaSelector(s storer.EncodedObjectStorer) *deltaSelector { return &deltaSelector{s} } // ObjectsToPack creates a list of ObjectToPack from the hashes // provided, creating deltas if it's suitable, using an specific // internal logic. `packWindow` specifies the size of the sliding // window used to compare objects for delta compression; 0 turns off // delta compression entirely. func (dw *deltaSelector) ObjectsToPack( hashes []plumbing.Hash, packWindow uint, ) ([]*ObjectToPack, error) { otp, err := dw.objectsToPack(hashes, packWindow) if err != nil { return nil, err } if packWindow == 0 { return otp, nil } dw.sort(otp) var objectGroups [][]*ObjectToPack var prev *ObjectToPack i := -1 for _, obj := range otp { if prev == nil || prev.Type() != obj.Type() { objectGroups = append(objectGroups, []*ObjectToPack{obj}) i++ prev = obj } else { objectGroups[i] = append(objectGroups[i], obj) } } var wg sync.WaitGroup var once sync.Once for _, objs := range objectGroups { objs := objs wg.Add(1) go func() { if walkErr := dw.walk(objs, packWindow); walkErr != nil { once.Do(func() { err = walkErr }) } wg.Done() }() } wg.Wait() if err != nil { return nil, err } return otp, nil } func (dw *deltaSelector) objectsToPack( hashes []plumbing.Hash, packWindow uint, ) ([]*ObjectToPack, error) { var objectsToPack []*ObjectToPack for _, h := range hashes { var o plumbing.EncodedObject var err error if packWindow == 0 { o, err = dw.encodedObject(h) } else { o, err = dw.encodedDeltaObject(h) } if err != nil { return nil, err } otp := newObjectToPack(o) if _, ok := o.(plumbing.DeltaObject); ok { otp.CleanOriginal() } objectsToPack = append(objectsToPack, otp) } if packWindow == 0 { return objectsToPack, nil } if err := dw.fixAndBreakChains(objectsToPack); err != nil { return nil, err } return objectsToPack, nil } func (dw *deltaSelector) encodedDeltaObject(h plumbing.Hash) (plumbing.EncodedObject, error) { edos, ok := dw.storer.(storer.DeltaObjectStorer) if !ok { return dw.encodedObject(h) } return edos.DeltaObject(plumbing.AnyObject, h) } func (dw *deltaSelector) encodedObject(h plumbing.Hash) (plumbing.EncodedObject, error) { return dw.storer.EncodedObject(plumbing.AnyObject, h) } func (dw *deltaSelector) fixAndBreakChains(objectsToPack []*ObjectToPack) error { m := make(map[plumbing.Hash]*ObjectToPack, len(objectsToPack)) for _, otp := range objectsToPack { m[otp.Hash()] = otp } for _, otp := range objectsToPack { if err := dw.fixAndBreakChainsOne(m, otp); err != nil { return err } } return nil } func (dw *deltaSelector) fixAndBreakChainsOne(objectsToPack map[plumbing.Hash]*ObjectToPack, otp *ObjectToPack) error { if !otp.Object.Type().IsDelta() { return nil } // Initial ObjectToPack instances might have a delta assigned to Object // but no actual base initially. Once Base is assigned to a delta, it means // we already fixed it. if otp.Base != nil { return nil } do, ok := otp.Object.(plumbing.DeltaObject) if !ok { // if this is not a DeltaObject, then we cannot retrieve its base, // so we have to break the delta chain here. return dw.undeltify(otp) } base, ok := objectsToPack[do.BaseHash()] if !ok { // The base of the delta is not in our list of objects to pack, so // we break the chain. return dw.undeltify(otp) } if err := dw.fixAndBreakChainsOne(objectsToPack, base); err != nil { return err } otp.SetDelta(base, otp.Object) return nil } func (dw *deltaSelector) restoreOriginal(otp *ObjectToPack) error { if otp.Original != nil { return nil } if !otp.Object.Type().IsDelta() { return nil } obj, err := dw.encodedObject(otp.Hash()) if err != nil { return err } otp.SetOriginal(obj) return nil } // undeltify undeltifies an *ObjectToPack by retrieving the original object from // the storer and resetting it. func (dw *deltaSelector) undeltify(otp *ObjectToPack) error { if err := dw.restoreOriginal(otp); err != nil { return err } otp.Object = otp.Original otp.Depth = 0 return nil } func (dw *deltaSelector) sort(objectsToPack []*ObjectToPack) { sort.Sort(byTypeAndSize(objectsToPack)) } func (dw *deltaSelector) walk( objectsToPack []*ObjectToPack, packWindow uint, ) error { indexMap := make(map[plumbing.Hash]*deltaIndex) for i := 0; i < len(objectsToPack); i++ { // Clean up the index map and reconstructed delta objects for anything // outside our pack window, to save memory. if i > int(packWindow) { obj := objectsToPack[i-int(packWindow)] delete(indexMap, obj.Hash()) if obj.IsDelta() { obj.SaveOriginalMetadata() obj.CleanOriginal() } } target := objectsToPack[i] // If we already have a delta, we don't try to find a new one for this // object. This happens when a delta is set to be reused from an existing // packfile. if target.IsDelta() { continue } // We only want to create deltas from specific types. if !applyDelta[target.Type()] { continue } for j := i - 1; j >= 0 && i-j < int(packWindow); j-- { base := objectsToPack[j] // Objects must use only the same type as their delta base. // Since objectsToPack is sorted by type and size, once we find // a different type, we know we won't find more of them. if base.Type() != target.Type() { break } if err := dw.tryToDeltify(indexMap, base, target); err != nil { return err } } } return nil } func (dw *deltaSelector) tryToDeltify(indexMap map[plumbing.Hash]*deltaIndex, base, target *ObjectToPack) error { // Original object might not be present if we're reusing a delta, so we // ensure it is restored. if err := dw.restoreOriginal(target); err != nil { return err } if err := dw.restoreOriginal(base); err != nil { return err } // If the sizes are radically different, this is a bad pairing. if target.Size() < base.Size()>>4 { return nil } msz := dw.deltaSizeLimit( target.Object.Size(), base.Depth, target.Depth, target.IsDelta(), ) // Nearly impossible to fit useful delta. if msz <= 8 { return nil } // If we have to insert a lot to make this work, find another. if base.Size()-target.Size() > msz { return nil } if _, ok := indexMap[base.Hash()]; !ok { indexMap[base.Hash()] = new(deltaIndex) } // Now we can generate the delta using originals delta, err := getDelta(indexMap[base.Hash()], base.Original, target.Original) if err != nil { return err } // if delta better than target if delta.Size() < msz { target.SetDelta(base, delta) } return nil } func (dw *deltaSelector) deltaSizeLimit(targetSize int64, baseDepth int, targetDepth int, targetDelta bool) int64 { if !targetDelta { // Any delta should be no more than 50% of the original size // (for text files deflate of whole form should shrink 50%). n := targetSize >> 1 // Evenly distribute delta size limits over allowed depth. // If src is non-delta (depth = 0), delta <= 50% of original. // If src is almost at limit (9/10), delta <= 10% of original. return n * (maxDepth - int64(baseDepth)) / maxDepth } // With a delta base chosen any new delta must be "better". // Retain the distribution described above. d := int64(targetDepth) n := targetSize // If target depth is bigger than maxDepth, this delta is not suitable to be used. if d >= maxDepth { return 0 } // If src is whole (depth=0) and base is near limit (depth=9/10) // any delta using src can be 10x larger and still be better. // // If src is near limit (depth=9/10) and base is whole (depth=0) // a new delta dependent on src must be 1/10th the size. return n * (maxDepth - int64(baseDepth)) / (maxDepth - d) } type byTypeAndSize []*ObjectToPack func (a byTypeAndSize) Len() int { return len(a) } func (a byTypeAndSize) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a byTypeAndSize) Less(i, j int) bool { if a[i].Type() < a[j].Type() { return false } if a[i].Type() > a[j].Type() { return true } return a[i].Size() > a[j].Size() }