// Copyright (c) 2014 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package search import ( "fmt" "reflect" "sort" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/size" ) var reflectStaticSizeDocumentMatch int var reflectStaticSizeSearchContext int var reflectStaticSizeLocation int func init() { var dm DocumentMatch reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size()) var sc SearchContext reflectStaticSizeSearchContext = int(reflect.TypeOf(sc).Size()) var l Location reflectStaticSizeLocation = int(reflect.TypeOf(l).Size()) } type ArrayPositions []uint64 func (ap ArrayPositions) Equals(other ArrayPositions) bool { if len(ap) != len(other) { return false } for i := range ap { if ap[i] != other[i] { return false } } return true } func (ap ArrayPositions) Compare(other ArrayPositions) int { for i, p := range ap { if i >= len(other) { return 1 } if p < other[i] { return -1 } if p > other[i] { return 1 } } if len(ap) < len(other) { return -1 } return 0 } type Location struct { // Pos is the position of the term within the field, starting at 1 Pos uint64 `json:"pos"` // Start and End are the byte offsets of the term in the field Start uint64 `json:"start"` End uint64 `json:"end"` // ArrayPositions contains the positions of the term within any elements. ArrayPositions ArrayPositions `json:"array_positions"` } func (l *Location) Size() int { return reflectStaticSizeLocation + size.SizeOfPtr + len(l.ArrayPositions)*size.SizeOfUint64 } type Locations []*Location func (p Locations) Len() int { return len(p) } func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] } func (p Locations) Less(i, j int) bool { c := p[i].ArrayPositions.Compare(p[j].ArrayPositions) if c < 0 { return true } if c > 0 { return false } return p[i].Pos < p[j].Pos } func (p Locations) Dedupe() Locations { // destructive! if len(p) <= 1 { return p } sort.Sort(p) slow := 0 for _, pfast := range p { pslow := p[slow] if pslow.Pos == pfast.Pos && pslow.Start == pfast.Start && pslow.End == pfast.End && pslow.ArrayPositions.Equals(pfast.ArrayPositions) { continue // duplicate, so only move fast ahead } slow++ p[slow] = pfast } return p[:slow+1] } type TermLocationMap map[string]Locations func (t TermLocationMap) AddLocation(term string, location *Location) { t[term] = append(t[term], location) } type FieldTermLocationMap map[string]TermLocationMap type FieldTermLocation struct { Field string Term string Location Location } type FieldFragmentMap map[string][]string type DocumentMatch struct { Index string `json:"index,omitempty"` ID string `json:"id"` IndexInternalID index.IndexInternalID `json:"-"` Score float64 `json:"score"` Expl *Explanation `json:"explanation,omitempty"` Locations FieldTermLocationMap `json:"locations,omitempty"` Fragments FieldFragmentMap `json:"fragments,omitempty"` Sort []string `json:"sort,omitempty"` // Fields contains the values for document fields listed in // SearchRequest.Fields. Text fields are returned as strings, numeric // fields as float64s and date fields as time.RFC3339 formatted strings. Fields map[string]interface{} `json:"fields,omitempty"` // used to maintain natural index order HitNumber uint64 `json:"-"` // used to temporarily hold field term location information during // search processing in an efficient, recycle-friendly manner, to // be later incorporated into the Locations map when search // results are completed FieldTermLocations []FieldTermLocation `json:"-"` } func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) { if dm.Fields == nil { dm.Fields = make(map[string]interface{}) } existingVal, ok := dm.Fields[name] if !ok { dm.Fields[name] = value return } valSlice, ok := existingVal.([]interface{}) if ok { // already a slice, append to it valSlice = append(valSlice, value) } else { // create a slice valSlice = []interface{}{existingVal, value} } dm.Fields[name] = valSlice } // Reset allows an already allocated DocumentMatch to be reused func (dm *DocumentMatch) Reset() *DocumentMatch { // remember the []byte used for the IndexInternalID indexInternalID := dm.IndexInternalID // remember the []interface{} used for sort sort := dm.Sort // remember the FieldTermLocations backing array ftls := dm.FieldTermLocations for i := range ftls { // recycle the ArrayPositions of each location ftls[i].Location.ArrayPositions = ftls[i].Location.ArrayPositions[:0] } // idiom to copy over from empty DocumentMatch (0 allocations) *dm = DocumentMatch{} // reuse the []byte already allocated (and reset len to 0) dm.IndexInternalID = indexInternalID[:0] // reuse the []interface{} already allocated (and reset len to 0) dm.Sort = sort[:0] // reuse the FieldTermLocations already allocated (and reset len to 0) dm.FieldTermLocations = ftls[:0] return dm } func (dm *DocumentMatch) Size() int { sizeInBytes := reflectStaticSizeDocumentMatch + size.SizeOfPtr + len(dm.Index) + len(dm.ID) + len(dm.IndexInternalID) if dm.Expl != nil { sizeInBytes += dm.Expl.Size() } for k, v := range dm.Locations { sizeInBytes += size.SizeOfString + len(k) for k1, v1 := range v { sizeInBytes += size.SizeOfString + len(k1) + size.SizeOfSlice for _, entry := range v1 { sizeInBytes += entry.Size() } } } for k, v := range dm.Fragments { sizeInBytes += size.SizeOfString + len(k) + size.SizeOfSlice for _, entry := range v { sizeInBytes += size.SizeOfString + len(entry) } } for _, entry := range dm.Sort { sizeInBytes += size.SizeOfString + len(entry) } for k, _ := range dm.Fields { sizeInBytes += size.SizeOfString + len(k) + size.SizeOfPtr } return sizeInBytes } // Complete performs final preparation & transformation of the // DocumentMatch at the end of search processing, also allowing the // caller to provide an optional preallocated locations slice func (dm *DocumentMatch) Complete(prealloc []Location) []Location { // transform the FieldTermLocations slice into the Locations map nlocs := len(dm.FieldTermLocations) if nlocs > 0 { if cap(prealloc) < nlocs { prealloc = make([]Location, nlocs) } prealloc = prealloc[:nlocs] var lastField string var tlm TermLocationMap var needsDedupe bool for i, ftl := range dm.FieldTermLocations { if lastField != ftl.Field { lastField = ftl.Field if dm.Locations == nil { dm.Locations = make(FieldTermLocationMap) } tlm = dm.Locations[ftl.Field] if tlm == nil { tlm = make(TermLocationMap) dm.Locations[ftl.Field] = tlm } } loc := &prealloc[i] *loc = ftl.Location if len(loc.ArrayPositions) > 0 { // copy loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...) } locs := tlm[ftl.Term] // if the loc is before or at the last location, then there // might be duplicates that need to be deduplicated if !needsDedupe && len(locs) > 0 { last := locs[len(locs)-1] cmp := loc.ArrayPositions.Compare(last.ArrayPositions) if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) { needsDedupe = true } } tlm[ftl.Term] = append(locs, loc) dm.FieldTermLocations[i] = FieldTermLocation{ // recycle Location: Location{ ArrayPositions: ftl.Location.ArrayPositions[:0], }, } } if needsDedupe { for _, tlm := range dm.Locations { for term, locs := range tlm { tlm[term] = locs.Dedupe() } } } } dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle return prealloc } func (dm *DocumentMatch) String() string { return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score) } type DocumentMatchCollection []*DocumentMatch func (c DocumentMatchCollection) Len() int { return len(c) } func (c DocumentMatchCollection) Swap(i, j int) { c[i], c[j] = c[j], c[i] } func (c DocumentMatchCollection) Less(i, j int) bool { return c[i].Score > c[j].Score } type Searcher interface { Next(ctx *SearchContext) (*DocumentMatch, error) Advance(ctx *SearchContext, ID index.IndexInternalID) (*DocumentMatch, error) Close() error Weight() float64 SetQueryNorm(float64) Count() uint64 Min() int Size() int DocumentMatchPoolSize() int } type SearcherOptions struct { Explain bool IncludeTermVectors bool Score string } // SearchContext represents the context around a single search type SearchContext struct { DocumentMatchPool *DocumentMatchPool Collector Collector IndexReader index.IndexReader } func (sc *SearchContext) Size() int { sizeInBytes := reflectStaticSizeSearchContext + size.SizeOfPtr + reflectStaticSizeDocumentMatchPool + size.SizeOfPtr if sc.DocumentMatchPool != nil { for _, entry := range sc.DocumentMatchPool.avail { if entry != nil { sizeInBytes += entry.Size() } } } return sizeInBytes }