// Copyright (c) 2014 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package search import ( "bytes" "encoding/json" "fmt" "math" "sort" "strings" "github.com/blevesearch/bleve/geo" "github.com/blevesearch/bleve/numeric" ) var HighTerm = strings.Repeat(string([]byte{0xff}), 10) var LowTerm = string([]byte{0x00}) type SearchSort interface { UpdateVisitor(field string, term []byte) Value(a *DocumentMatch) string Descending() bool RequiresDocID() bool RequiresScoring() bool RequiresFields() []string Reverse() Copy() SearchSort } func ParseSearchSortObj(input map[string]interface{}) (SearchSort, error) { descending, ok := input["desc"].(bool) by, ok := input["by"].(string) if !ok { return nil, fmt.Errorf("search sort must specify by") } switch by { case "id": return &SortDocID{ Desc: descending, }, nil case "score": return &SortScore{ Desc: descending, }, nil case "geo_distance": field, ok := input["field"].(string) if !ok { return nil, fmt.Errorf("search sort mode geo_distance must specify field") } lon, lat, foundLocation := geo.ExtractGeoPoint(input["location"]) if !foundLocation { return nil, fmt.Errorf("unable to parse geo_distance location") } rvd := &SortGeoDistance{ Field: field, Desc: descending, Lon: lon, Lat: lat, unitMult: 1.0, } if distUnit, ok := input["unit"].(string); ok { var err error rvd.unitMult, err = geo.ParseDistanceUnit(distUnit) if err != nil { return nil, err } rvd.Unit = distUnit } return rvd, nil case "field": field, ok := input["field"].(string) if !ok { return nil, fmt.Errorf("search sort mode field must specify field") } rv := &SortField{ Field: field, Desc: descending, } typ, ok := input["type"].(string) if ok { switch typ { case "auto": rv.Type = SortFieldAuto case "string": rv.Type = SortFieldAsString case "number": rv.Type = SortFieldAsNumber case "date": rv.Type = SortFieldAsDate default: return nil, fmt.Errorf("unknown sort field type: %s", typ) } } mode, ok := input["mode"].(string) if ok { switch mode { case "default": rv.Mode = SortFieldDefault case "min": rv.Mode = SortFieldMin case "max": rv.Mode = SortFieldMax default: return nil, fmt.Errorf("unknown sort field mode: %s", mode) } } missing, ok := input["missing"].(string) if ok { switch missing { case "first": rv.Missing = SortFieldMissingFirst case "last": rv.Missing = SortFieldMissingLast default: return nil, fmt.Errorf("unknown sort field missing: %s", missing) } } return rv, nil } return nil, fmt.Errorf("unknown search sort by: %s", by) } func ParseSearchSortString(input string) SearchSort { descending := false if strings.HasPrefix(input, "-") { descending = true input = input[1:] } else if strings.HasPrefix(input, "+") { input = input[1:] } if input == "_id" { return &SortDocID{ Desc: descending, } } else if input == "_score" { return &SortScore{ Desc: descending, } } return &SortField{ Field: input, Desc: descending, } } func ParseSearchSortJSON(input json.RawMessage) (SearchSort, error) { // first try to parse it as string var sortString string err := json.Unmarshal(input, &sortString) if err != nil { var sortObj map[string]interface{} err = json.Unmarshal(input, &sortObj) if err != nil { return nil, err } return ParseSearchSortObj(sortObj) } return ParseSearchSortString(sortString), nil } func ParseSortOrderStrings(in []string) SortOrder { rv := make(SortOrder, 0, len(in)) for _, i := range in { ss := ParseSearchSortString(i) rv = append(rv, ss) } return rv } func ParseSortOrderJSON(in []json.RawMessage) (SortOrder, error) { rv := make(SortOrder, 0, len(in)) for _, i := range in { ss, err := ParseSearchSortJSON(i) if err != nil { return nil, err } rv = append(rv, ss) } return rv, nil } type SortOrder []SearchSort func (so SortOrder) Value(doc *DocumentMatch) { for _, soi := range so { doc.Sort = append(doc.Sort, soi.Value(doc)) } } func (so SortOrder) UpdateVisitor(field string, term []byte) { for _, soi := range so { soi.UpdateVisitor(field, term) } } func (so SortOrder) Copy() SortOrder { rv := make(SortOrder, len(so)) for i, soi := range so { rv[i] = soi.Copy() } return rv } // Compare will compare two document matches using the specified sort order // if both are numbers, we avoid converting back to term func (so SortOrder) Compare(cachedScoring, cachedDesc []bool, i, j *DocumentMatch) int { // compare the documents on all search sorts until a differences is found for x := range so { c := 0 if cachedScoring[x] { if i.Score < j.Score { c = -1 } else if i.Score > j.Score { c = 1 } } else { iVal := i.Sort[x] jVal := j.Sort[x] c = strings.Compare(iVal, jVal) } if c == 0 { continue } if cachedDesc[x] { c = -c } return c } // if they are the same at this point, impose order based on index natural sort order if i.HitNumber == j.HitNumber { return 0 } else if i.HitNumber > j.HitNumber { return 1 } return -1 } func (so SortOrder) RequiresScore() bool { for _, soi := range so { if soi.RequiresScoring() { return true } } return false } func (so SortOrder) RequiresDocID() bool { for _, soi := range so { if soi.RequiresDocID() { return true } } return false } func (so SortOrder) RequiredFields() []string { var rv []string for _, soi := range so { rv = append(rv, soi.RequiresFields()...) } return rv } func (so SortOrder) CacheIsScore() []bool { rv := make([]bool, 0, len(so)) for _, soi := range so { rv = append(rv, soi.RequiresScoring()) } return rv } func (so SortOrder) CacheDescending() []bool { rv := make([]bool, 0, len(so)) for _, soi := range so { rv = append(rv, soi.Descending()) } return rv } func (so SortOrder) Reverse() { for _, soi := range so { soi.Reverse() } } // SortFieldType lets you control some internal sort behavior // normally leaving this to the zero-value of SortFieldAuto is fine type SortFieldType int const ( // SortFieldAuto applies heuristics attempt to automatically sort correctly SortFieldAuto SortFieldType = iota // SortFieldAsString forces sort as string (no prefix coded terms removed) SortFieldAsString // SortFieldAsNumber forces sort as string (prefix coded terms with shift > 0 removed) SortFieldAsNumber // SortFieldAsDate forces sort as string (prefix coded terms with shift > 0 removed) SortFieldAsDate ) // SortFieldMode describes the behavior if the field has multiple values type SortFieldMode int const ( // SortFieldDefault uses the first (or only) value, this is the default zero-value SortFieldDefault SortFieldMode = iota // FIXME name is confusing // SortFieldMin uses the minimum value SortFieldMin // SortFieldMax uses the maximum value SortFieldMax ) // SortFieldMissing controls where documents missing a field value should be sorted type SortFieldMissing int const ( // SortFieldMissingLast sorts documents missing a field at the end SortFieldMissingLast SortFieldMissing = iota // SortFieldMissingFirst sorts documents missing a field at the beginning SortFieldMissingFirst ) // SortField will sort results by the value of a stored field // Field is the name of the field // Descending reverse the sort order (default false) // Type allows forcing of string/number/date behavior (default auto) // Mode controls behavior for multi-values fields (default first) // Missing controls behavior of missing values (default last) type SortField struct { Field string Desc bool Type SortFieldType Mode SortFieldMode Missing SortFieldMissing values [][]byte tmp [][]byte } // UpdateVisitor notifies this sort field that in this document // this field has the specified term func (s *SortField) UpdateVisitor(field string, term []byte) { if field == s.Field { s.values = append(s.values, term) } } // Value returns the sort value of the DocumentMatch // it also resets the state of this SortField for // processing the next document func (s *SortField) Value(i *DocumentMatch) string { iTerms := s.filterTermsByType(s.values) iTerm := s.filterTermsByMode(iTerms) s.values = s.values[:0] return iTerm } // Descending determines the order of the sort func (s *SortField) Descending() bool { return s.Desc } func (s *SortField) filterTermsByMode(terms [][]byte) string { if len(terms) == 1 || (len(terms) > 1 && s.Mode == SortFieldDefault) { return string(terms[0]) } else if len(terms) > 1 { switch s.Mode { case SortFieldMin: sort.Sort(BytesSlice(terms)) return string(terms[0]) case SortFieldMax: sort.Sort(BytesSlice(terms)) return string(terms[len(terms)-1]) } } // handle missing terms if s.Missing == SortFieldMissingLast { if s.Desc { return LowTerm } return HighTerm } if s.Desc { return HighTerm } return LowTerm } // filterTermsByType attempts to make one pass on the terms // if we are in auto-mode AND all the terms look like prefix-coded numbers // return only the terms which had shift of 0 // if we are in explicit number or date mode, return only valid // prefix coded numbers with shift of 0 func (s *SortField) filterTermsByType(terms [][]byte) [][]byte { stype := s.Type if stype == SortFieldAuto { allTermsPrefixCoded := true termsWithShiftZero := s.tmp[:0] for _, term := range terms { valid, shift := numeric.ValidPrefixCodedTermBytes(term) if valid && shift == 0 { termsWithShiftZero = append(termsWithShiftZero, term) } else if !valid { allTermsPrefixCoded = false } } if allTermsPrefixCoded { terms = termsWithShiftZero s.tmp = termsWithShiftZero[:0] } } else if stype == SortFieldAsNumber || stype == SortFieldAsDate { termsWithShiftZero := s.tmp[:0] for _, term := range terms { valid, shift := numeric.ValidPrefixCodedTermBytes(term) if valid && shift == 0 { termsWithShiftZero = append(termsWithShiftZero, term) } } terms = termsWithShiftZero s.tmp = termsWithShiftZero[:0] } return terms } // RequiresDocID says this SearchSort does not require the DocID be loaded func (s *SortField) RequiresDocID() bool { return false } // RequiresScoring says this SearchStore does not require scoring func (s *SortField) RequiresScoring() bool { return false } // RequiresFields says this SearchStore requires the specified stored field func (s *SortField) RequiresFields() []string { return []string{s.Field} } func (s *SortField) MarshalJSON() ([]byte, error) { // see if simple format can be used if s.Missing == SortFieldMissingLast && s.Mode == SortFieldDefault && s.Type == SortFieldAuto { if s.Desc { return json.Marshal("-" + s.Field) } return json.Marshal(s.Field) } sfm := map[string]interface{}{ "by": "field", "field": s.Field, } if s.Desc { sfm["desc"] = true } if s.Missing > SortFieldMissingLast { switch s.Missing { case SortFieldMissingFirst: sfm["missing"] = "first" } } if s.Mode > SortFieldDefault { switch s.Mode { case SortFieldMin: sfm["mode"] = "min" case SortFieldMax: sfm["mode"] = "max" } } if s.Type > SortFieldAuto { switch s.Type { case SortFieldAsString: sfm["type"] = "string" case SortFieldAsNumber: sfm["type"] = "number" case SortFieldAsDate: sfm["type"] = "date" } } return json.Marshal(sfm) } func (s *SortField) Copy() SearchSort { rv := *s return &rv } func (s *SortField) Reverse() { s.Desc = !s.Desc if s.Missing == SortFieldMissingFirst { s.Missing = SortFieldMissingLast } else { s.Missing = SortFieldMissingFirst } } // SortDocID will sort results by the document identifier type SortDocID struct { Desc bool } // UpdateVisitor is a no-op for SortDocID as it's value // is not dependent on any field terms func (s *SortDocID) UpdateVisitor(field string, term []byte) { } // Value returns the sort value of the DocumentMatch func (s *SortDocID) Value(i *DocumentMatch) string { return i.ID } // Descending determines the order of the sort func (s *SortDocID) Descending() bool { return s.Desc } // RequiresDocID says this SearchSort does require the DocID be loaded func (s *SortDocID) RequiresDocID() bool { return true } // RequiresScoring says this SearchStore does not require scoring func (s *SortDocID) RequiresScoring() bool { return false } // RequiresFields says this SearchStore does not require any stored fields func (s *SortDocID) RequiresFields() []string { return nil } func (s *SortDocID) MarshalJSON() ([]byte, error) { if s.Desc { return json.Marshal("-_id") } return json.Marshal("_id") } func (s *SortDocID) Copy() SearchSort { rv := *s return &rv } func (s *SortDocID) Reverse() { s.Desc = !s.Desc } // SortScore will sort results by the document match score type SortScore struct { Desc bool } // UpdateVisitor is a no-op for SortScore as it's value // is not dependent on any field terms func (s *SortScore) UpdateVisitor(field string, term []byte) { } // Value returns the sort value of the DocumentMatch func (s *SortScore) Value(i *DocumentMatch) string { return "_score" } // Descending determines the order of the sort func (s *SortScore) Descending() bool { return s.Desc } // RequiresDocID says this SearchSort does not require the DocID be loaded func (s *SortScore) RequiresDocID() bool { return false } // RequiresScoring says this SearchStore does require scoring func (s *SortScore) RequiresScoring() bool { return true } // RequiresFields says this SearchStore does not require any store fields func (s *SortScore) RequiresFields() []string { return nil } func (s *SortScore) MarshalJSON() ([]byte, error) { if s.Desc { return json.Marshal("-_score") } return json.Marshal("_score") } func (s *SortScore) Copy() SearchSort { rv := *s return &rv } func (s *SortScore) Reverse() { s.Desc = !s.Desc } var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0)) // NewSortGeoDistance creates SearchSort instance for sorting documents by // their distance from the specified point. func NewSortGeoDistance(field, unit string, lon, lat float64, desc bool) ( *SortGeoDistance, error) { rv := &SortGeoDistance{ Field: field, Desc: desc, Unit: unit, Lon: lon, Lat: lat, } var err error rv.unitMult, err = geo.ParseDistanceUnit(unit) if err != nil { return nil, err } return rv, nil } // SortGeoDistance will sort results by the distance of an // indexed geo point, from the provided location. // Field is the name of the field // Descending reverse the sort order (default false) type SortGeoDistance struct { Field string Desc bool Unit string values []string Lon float64 Lat float64 unitMult float64 } // UpdateVisitor notifies this sort field that in this document // this field has the specified term func (s *SortGeoDistance) UpdateVisitor(field string, term []byte) { if field == s.Field { s.values = append(s.values, string(term)) } } // Value returns the sort value of the DocumentMatch // it also resets the state of this SortField for // processing the next document func (s *SortGeoDistance) Value(i *DocumentMatch) string { iTerms := s.filterTermsByType(s.values) iTerm := s.filterTermsByMode(iTerms) s.values = s.values[:0] if iTerm == "" { return maxDistance } i64, err := numeric.PrefixCoded(iTerm).Int64() if err != nil { return maxDistance } docLon := geo.MortonUnhashLon(uint64(i64)) docLat := geo.MortonUnhashLat(uint64(i64)) dist := geo.Haversin(s.Lon, s.Lat, docLon, docLat) // dist is returned in km, so convert to m dist *= 1000 if s.unitMult != 0 { dist /= s.unitMult } distInt64 := numeric.Float64ToInt64(dist) return string(numeric.MustNewPrefixCodedInt64(distInt64, 0)) } // Descending determines the order of the sort func (s *SortGeoDistance) Descending() bool { return s.Desc } func (s *SortGeoDistance) filterTermsByMode(terms []string) string { if len(terms) >= 1 { return terms[0] } return "" } // filterTermsByType attempts to make one pass on the terms // return only valid prefix coded numbers with shift of 0 func (s *SortGeoDistance) filterTermsByType(terms []string) []string { var termsWithShiftZero []string for _, term := range terms { valid, shift := numeric.ValidPrefixCodedTerm(term) if valid && shift == 0 { termsWithShiftZero = append(termsWithShiftZero, term) } } return termsWithShiftZero } // RequiresDocID says this SearchSort does not require the DocID be loaded func (s *SortGeoDistance) RequiresDocID() bool { return false } // RequiresScoring says this SearchStore does not require scoring func (s *SortGeoDistance) RequiresScoring() bool { return false } // RequiresFields says this SearchStore requires the specified stored field func (s *SortGeoDistance) RequiresFields() []string { return []string{s.Field} } func (s *SortGeoDistance) MarshalJSON() ([]byte, error) { sfm := map[string]interface{}{ "by": "geo_distance", "field": s.Field, "location": map[string]interface{}{ "lon": s.Lon, "lat": s.Lat, }, } if s.Unit != "" { sfm["unit"] = s.Unit } if s.Desc { sfm["desc"] = true } return json.Marshal(sfm) } func (s *SortGeoDistance) Copy() SearchSort { rv := *s return &rv } func (s *SortGeoDistance) Reverse() { s.Desc = !s.Desc } type BytesSlice [][]byte func (p BytesSlice) Len() int { return len(p) } func (p BytesSlice) Less(i, j int) bool { return bytes.Compare(p[i], p[j]) < 0 } func (p BytesSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }