Add wildcard support to REPO_INDEXER_EXTENSIONS

pull/7767/head
Guillermo Prandi 2019-08-07 01:26:12 -03:00
parent 021014acfe
commit 72a650c8e4
6 changed files with 42 additions and 29 deletions

View File

@ -177,8 +177,8 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`.
- `REPO_INDEXER_ENABLED`: **false**: Enables code search (uses a lot of disk space, about 6 times more than the repository size). - `REPO_INDEXER_ENABLED`: **false**: Enables code search (uses a lot of disk space, about 6 times more than the repository size).
- `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search. - `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search.
- `REPO_INDEXER_EXTENSIONS`: **empty**: A comma separated list of file extensions to exclude from the index; a \`.' matches files with no extension. An empty list means do not exclude any files. - `REPO_INDEXER_PATTERNS`: **empty**: A comma separated list of file name patterns (see https://github.com/gobwas/glob) to **exclude** from the index. An empty list means do not exclude any files. Use `**.txt` to match any files with .txt extension.
- `REPO_EXTENSIONS_LIST_INCLUDE`: **false**: If true, `REPO_INDEXER_EXTENSIONS` are the file extensions to include rather than exclude from the index. - `REPO_PATTERNS_INCLUDE`: **false**: If true, `REPO_INDEXER_PATTERNS` are the file extensions to **include** rather than exclude from the index.
- `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request. - `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request.
- `MAX_FILE_SIZE`: **1048576**: Maximum size in bytes of files to be indexed. - `MAX_FILE_SIZE`: **1048576**: Maximum size in bytes of files to be indexed.

1
go.mod
View File

@ -54,6 +54,7 @@ require (
github.com/go-redis/redis v6.15.2+incompatible github.com/go-redis/redis v6.15.2+incompatible
github.com/go-sql-driver/mysql v1.4.1 github.com/go-sql-driver/mysql v1.4.1
github.com/go-xorm/xorm v0.7.4 github.com/go-xorm/xorm v0.7.4
github.com/gobwas/glob v0.2.3
github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561 github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561
github.com/gogs/cron v0.0.0-20171120032916-9f6c956d3e14 github.com/gogs/cron v0.0.0-20171120032916-9f6c956d3e14
github.com/google/go-github/v24 v24.0.1 github.com/google/go-github/v24 v24.0.1

2
go.sum
View File

@ -145,6 +145,8 @@ github.com/go-xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a h1:9wScpmSP5A3Bk
github.com/go-xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a/go.mod h1:56xuuqnHyryaerycW3BfssRdxQstACi0Epw/yC5E2xM= github.com/go-xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a/go.mod h1:56xuuqnHyryaerycW3BfssRdxQstACi0Epw/yC5E2xM=
github.com/go-xorm/xorm v0.7.4 h1:g/NgC590SzqV5VKmdRDNe/K3Holw3YJUCXX28r+rFGw= github.com/go-xorm/xorm v0.7.4 h1:g/NgC590SzqV5VKmdRDNe/K3Holw3YJUCXX28r+rFGw=
github.com/go-xorm/xorm v0.7.4/go.mod h1:vpza5fydeRgt+stvo9qgMhSNohYqmNt0I1/D6hkCekA= github.com/go-xorm/xorm v0.7.4/go.mod h1:vpza5fydeRgt+stvo9qgMhSNohYqmNt0I1/D6hkCekA=
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561 h1:deE7ritpK04PgtpyVOS2TYcQEld9qLCD5b5EbVNOuLA= github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561 h1:deE7ritpK04PgtpyVOS2TYcQEld9qLCD5b5EbVNOuLA=
github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561/go.mod h1:YgYOrVn3Nj9Tq0EvjmFbphRytDj7JNRoWSStJZWDJTQ= github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561/go.mod h1:YgYOrVn3Nj9Tq0EvjmFbphRytDj7JNRoWSStJZWDJTQ=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=

View File

@ -232,14 +232,16 @@ func addDelete(filename string, repo *Repository, batch rupture.FlushingBatch) e
} }
func isIndexable(entry *git.TreeEntry) bool { func isIndexable(entry *git.TreeEntry) bool {
if setting.Indexer.FileExtensions != nil { if setting.Indexer.FilePatterns != nil {
var ext string var found bool
parts := strings.Split(entry.Name(), ".") name := strings.ToLower(entry.Name())
cnt := len(parts) for _, g := range setting.Indexer.FilePatterns {
if cnt > 1 { if g.Match(name) {
ext = strings.ToLower(parts[cnt-1]) found = true
break
}
} }
if setting.Indexer.FileExtensions[ext] != setting.Indexer.IncludeExtensions { if found != setting.Indexer.IncludePatterns {
return false return false
} }
} }

View File

@ -8,6 +8,10 @@ import (
"path" "path"
"path/filepath" "path/filepath"
"strings" "strings"
"code.gitea.io/gitea/modules/log"
"github.com/gobwas/glob"
) )
// enumerates all the indexer queue types // enumerates all the indexer queue types
@ -30,8 +34,8 @@ var (
IssueQueueDir string IssueQueueDir string
IssueQueueConnStr string IssueQueueConnStr string
IssueQueueBatchNumber int IssueQueueBatchNumber int
FileExtensions map[string]bool FilePatterns []glob.Glob
IncludeExtensions bool IncludePatterns bool
}{ }{
IssueType: "bleve", IssueType: "bleve",
IssuePath: "indexers/issues.bleve", IssuePath: "indexers/issues.bleve",
@ -54,8 +58,8 @@ func newIndexerService() {
if !filepath.IsAbs(Indexer.RepoPath) { if !filepath.IsAbs(Indexer.RepoPath) {
Indexer.RepoPath = path.Join(AppWorkPath, Indexer.RepoPath) Indexer.RepoPath = path.Join(AppWorkPath, Indexer.RepoPath)
} }
Indexer.FileExtensions = extensionsFromString(sec.Key("REPO_INDEXER_EXTENSIONS").MustString("")) Indexer.FilePatterns = extensionsFromString(sec.Key("REPO_INDEXER_PATTERNS").MustString(""))
Indexer.IncludeExtensions = sec.Key("REPO_EXTENSIONS_LIST_INCLUDE").MustBool(false) Indexer.IncludePatterns = sec.Key("REPO_PATTERNS_INCLUDE").MustBool(false)
Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20) Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20)
Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024) Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024)
@ -65,25 +69,20 @@ func newIndexerService() {
Indexer.IssueQueueBatchNumber = sec.Key("ISSUE_INDEXER_QUEUE_BATCH_NUMBER").MustInt(20) Indexer.IssueQueueBatchNumber = sec.Key("ISSUE_INDEXER_QUEUE_BATCH_NUMBER").MustInt(20)
} }
func extensionsFromString(from string) map[string]bool { func extensionsFromString(from string) []glob.Glob {
extmap := make(map[string]bool) extarr := make([]glob.Glob, 0, 10)
for _, ext := range strings.Split(strings.ToLower(from), ",") { for _, expr := range strings.Split(strings.ToLower(from), ",") {
ext = strings.TrimSpace(ext) expr = strings.TrimSpace(expr)
// Accept *.txt, .txt and txt. Also use . to mean no ext if expr != "" {
if strings.HasPrefix(ext, "*.") { if g, err := glob.Compile(expr, '.', '/'); err != nil {
ext = ext[1:] log.Trace("Index file extensions: '%s': bad pattern: %v", expr, err)
} } else {
if ext == "." { extarr = append(extarr, g)
extmap[""] = true
} else {
ext = strings.TrimPrefix(ext, ".")
if ext != "" {
extmap[ext] = true
} }
} }
} }
if len(extmap) == 0 { if len(extarr) == 0 {
return nil return nil
} }
return extmap return extarr
} }

9
vendor/modules.txt vendored
View File

@ -158,6 +158,15 @@ github.com/go-redis/redis/internal/util
github.com/go-sql-driver/mysql github.com/go-sql-driver/mysql
# github.com/go-xorm/xorm v0.7.4 # github.com/go-xorm/xorm v0.7.4
github.com/go-xorm/xorm github.com/go-xorm/xorm
# github.com/gobwas/glob v0.2.3
github.com/gobwas/glob
github.com/gobwas/glob/compiler
github.com/gobwas/glob/syntax
github.com/gobwas/glob/match
github.com/gobwas/glob/syntax/ast
github.com/gobwas/glob/util/runes
github.com/gobwas/glob/syntax/lexer
github.com/gobwas/glob/util/strings
# github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561 # github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561
github.com/gogits/chardet github.com/gogits/chardet
# github.com/gogs/cron v0.0.0-20171120032916-9f6c956d3e14 # github.com/gogs/cron v0.0.0-20171120032916-9f6c956d3e14