Add wildcard support to REPO_INDEXER_EXTENSIONS

This commit is contained in:
Guillermo Prandi 2019-08-07 01:26:12 -03:00
parent 021014acfe
commit 72a650c8e4
6 changed files with 42 additions and 29 deletions

View File

@ -177,8 +177,8 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`.
- `REPO_INDEXER_ENABLED`: **false**: Enables code search (uses a lot of disk space, about 6 times more than the repository size).
- `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search.
- `REPO_INDEXER_EXTENSIONS`: **empty**: A comma separated list of file extensions to exclude from the index; a \`.' matches files with no extension. An empty list means do not exclude any files.
- `REPO_EXTENSIONS_LIST_INCLUDE`: **false**: If true, `REPO_INDEXER_EXTENSIONS` are the file extensions to include rather than exclude from the index.
- `REPO_INDEXER_PATTERNS`: **empty**: A comma separated list of file name patterns (see https://github.com/gobwas/glob) to **exclude** from the index. An empty list means do not exclude any files. Use `**.txt` to match any files with .txt extension.
- `REPO_PATTERNS_INCLUDE`: **false**: If true, `REPO_INDEXER_PATTERNS` are the file extensions to **include** rather than exclude from the index.
- `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request.
- `MAX_FILE_SIZE`: **1048576**: Maximum size in bytes of files to be indexed.

1
go.mod
View File

@ -54,6 +54,7 @@ require (
github.com/go-redis/redis v6.15.2+incompatible
github.com/go-sql-driver/mysql v1.4.1
github.com/go-xorm/xorm v0.7.4
github.com/gobwas/glob v0.2.3
github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561
github.com/gogs/cron v0.0.0-20171120032916-9f6c956d3e14
github.com/google/go-github/v24 v24.0.1

2
go.sum
View File

@ -145,6 +145,8 @@ github.com/go-xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a h1:9wScpmSP5A3Bk
github.com/go-xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a/go.mod h1:56xuuqnHyryaerycW3BfssRdxQstACi0Epw/yC5E2xM=
github.com/go-xorm/xorm v0.7.4 h1:g/NgC590SzqV5VKmdRDNe/K3Holw3YJUCXX28r+rFGw=
github.com/go-xorm/xorm v0.7.4/go.mod h1:vpza5fydeRgt+stvo9qgMhSNohYqmNt0I1/D6hkCekA=
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561 h1:deE7ritpK04PgtpyVOS2TYcQEld9qLCD5b5EbVNOuLA=
github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561/go.mod h1:YgYOrVn3Nj9Tq0EvjmFbphRytDj7JNRoWSStJZWDJTQ=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=

View File

@ -232,14 +232,16 @@ func addDelete(filename string, repo *Repository, batch rupture.FlushingBatch) e
}
func isIndexable(entry *git.TreeEntry) bool {
if setting.Indexer.FileExtensions != nil {
var ext string
parts := strings.Split(entry.Name(), ".")
cnt := len(parts)
if cnt > 1 {
ext = strings.ToLower(parts[cnt-1])
if setting.Indexer.FilePatterns != nil {
var found bool
name := strings.ToLower(entry.Name())
for _, g := range setting.Indexer.FilePatterns {
if g.Match(name) {
found = true
break
}
if setting.Indexer.FileExtensions[ext] != setting.Indexer.IncludeExtensions {
}
if found != setting.Indexer.IncludePatterns {
return false
}
}

View File

@ -8,6 +8,10 @@ import (
"path"
"path/filepath"
"strings"
"code.gitea.io/gitea/modules/log"
"github.com/gobwas/glob"
)
// enumerates all the indexer queue types
@ -30,8 +34,8 @@ var (
IssueQueueDir string
IssueQueueConnStr string
IssueQueueBatchNumber int
FileExtensions map[string]bool
IncludeExtensions bool
FilePatterns []glob.Glob
IncludePatterns bool
}{
IssueType: "bleve",
IssuePath: "indexers/issues.bleve",
@ -54,8 +58,8 @@ func newIndexerService() {
if !filepath.IsAbs(Indexer.RepoPath) {
Indexer.RepoPath = path.Join(AppWorkPath, Indexer.RepoPath)
}
Indexer.FileExtensions = extensionsFromString(sec.Key("REPO_INDEXER_EXTENSIONS").MustString(""))
Indexer.IncludeExtensions = sec.Key("REPO_EXTENSIONS_LIST_INCLUDE").MustBool(false)
Indexer.FilePatterns = extensionsFromString(sec.Key("REPO_INDEXER_PATTERNS").MustString(""))
Indexer.IncludePatterns = sec.Key("REPO_PATTERNS_INCLUDE").MustBool(false)
Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20)
Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024)
@ -65,25 +69,20 @@ func newIndexerService() {
Indexer.IssueQueueBatchNumber = sec.Key("ISSUE_INDEXER_QUEUE_BATCH_NUMBER").MustInt(20)
}
func extensionsFromString(from string) map[string]bool {
extmap := make(map[string]bool)
for _, ext := range strings.Split(strings.ToLower(from), ",") {
ext = strings.TrimSpace(ext)
// Accept *.txt, .txt and txt. Also use . to mean no ext
if strings.HasPrefix(ext, "*.") {
ext = ext[1:]
}
if ext == "." {
extmap[""] = true
func extensionsFromString(from string) []glob.Glob {
extarr := make([]glob.Glob, 0, 10)
for _, expr := range strings.Split(strings.ToLower(from), ",") {
expr = strings.TrimSpace(expr)
if expr != "" {
if g, err := glob.Compile(expr, '.', '/'); err != nil {
log.Trace("Index file extensions: '%s': bad pattern: %v", expr, err)
} else {
ext = strings.TrimPrefix(ext, ".")
if ext != "" {
extmap[ext] = true
extarr = append(extarr, g)
}
}
}
if len(extmap) == 0 {
if len(extarr) == 0 {
return nil
}
return extmap
return extarr
}

9
vendor/modules.txt vendored
View File

@ -158,6 +158,15 @@ github.com/go-redis/redis/internal/util
github.com/go-sql-driver/mysql
# github.com/go-xorm/xorm v0.7.4
github.com/go-xorm/xorm
# github.com/gobwas/glob v0.2.3
github.com/gobwas/glob
github.com/gobwas/glob/compiler
github.com/gobwas/glob/syntax
github.com/gobwas/glob/match
github.com/gobwas/glob/syntax/ast
github.com/gobwas/glob/util/runes
github.com/gobwas/glob/syntax/lexer
github.com/gobwas/glob/util/strings
# github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561
github.com/gogits/chardet
# github.com/gogs/cron v0.0.0-20171120032916-9f6c956d3e14