From 566177301894f4c56393948c2ca75ad3b717a80e Mon Sep 17 00:00:00 2001 From: guillep2k <18600385+guillep2k@users.noreply.github.com> Date: Thu, 15 Aug 2019 12:38:55 -0300 Subject: [PATCH] Remove unique filter from repo indexer analyzer. (#7878) * Remove unique filter from repo indexer analyzer. * Bump repoIndexerLatestVersion to 4 * Corrrect fmt * make vendor to remove unique dependency --- modules/indexer/repo.go | 5 +- .../bleve/analysis/token/unique/unique.go | 53 ------------------- vendor/modules.txt | 1 - 3 files changed, 2 insertions(+), 57 deletions(-) delete mode 100644 vendor/github.com/blevesearch/bleve/analysis/token/unique/unique.go diff --git a/modules/indexer/repo.go b/modules/indexer/repo.go index 645a6fc88b..91ed173aa7 100644 --- a/modules/indexer/repo.go +++ b/modules/indexer/repo.go @@ -13,7 +13,6 @@ import ( "github.com/blevesearch/bleve" "github.com/blevesearch/bleve/analysis/analyzer/custom" "github.com/blevesearch/bleve/analysis/token/lowercase" - "github.com/blevesearch/bleve/analysis/token/unique" "github.com/blevesearch/bleve/analysis/tokenizer/unicode" "github.com/blevesearch/bleve/search/query" "github.com/ethantkoenig/rupture" @@ -23,7 +22,7 @@ const ( repoIndexerAnalyzer = "repoIndexerAnalyzer" repoIndexerDocType = "repoIndexerDocType" - repoIndexerLatestVersion = 3 + repoIndexerLatestVersion = 4 ) // repoIndexer (thread-safe) index for repository contents @@ -110,7 +109,7 @@ func createRepoIndexer(path string, latestVersion int) error { "type": custom.Name, "char_filters": []string{}, "tokenizer": unicode.Name, - "token_filters": []string{unicodeNormalizeName, lowercase.Name, unique.Name}, + "token_filters": []string{unicodeNormalizeName, lowercase.Name}, }); err != nil { return err } diff --git a/vendor/github.com/blevesearch/bleve/analysis/token/unique/unique.go b/vendor/github.com/blevesearch/bleve/analysis/token/unique/unique.go deleted file mode 100644 index c60e8c9793..0000000000 --- a/vendor/github.com/blevesearch/bleve/analysis/token/unique/unique.go +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2018 Couchbase, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package unique - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -const Name = "unique" - -// UniqueTermFilter retains only the tokens which mark the first occurrence of -// a term. Tokens whose term appears in a preceding token are dropped. -type UniqueTermFilter struct{} - -func NewUniqueTermFilter() *UniqueTermFilter { - return &UniqueTermFilter{} -} - -func (f *UniqueTermFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - encounteredTerms := make(map[string]struct{}, len(input)/4) - j := 0 - for _, token := range input { - term := string(token.Term) - if _, ok := encounteredTerms[term]; ok { - continue - } - encounteredTerms[term] = struct{}{} - input[j] = token - j++ - } - return input[:j] -} - -func UniqueTermFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return NewUniqueTermFilter(), nil -} - -func init() { - registry.RegisterTokenFilter(Name, UniqueTermFilterConstructor) -} diff --git a/vendor/modules.txt b/vendor/modules.txt index 703f161a8a..0f1de7d856 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -26,7 +26,6 @@ github.com/blevesearch/bleve github.com/blevesearch/bleve/analysis/analyzer/custom github.com/blevesearch/bleve/analysis/token/lowercase github.com/blevesearch/bleve/analysis/token/unicodenorm -github.com/blevesearch/bleve/analysis/token/unique github.com/blevesearch/bleve/analysis/tokenizer/unicode github.com/blevesearch/bleve/index/upsidedown github.com/blevesearch/bleve/mapping