mirror of https://github.com/go-gitea/gitea.git
Exclude generated files from language statistics (#11653)
* Update go-enry to v2.5.2pull/11657/head^2
parent
e8955173a9
commit
bd2335671f
2
go.mod
2
go.mod
|
@ -37,7 +37,7 @@ require (
|
|||
github.com/facebookgo/subset v0.0.0-20150612182917-8dac2c3c4870 // indirect
|
||||
github.com/gliderlabs/ssh v0.2.2
|
||||
github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a // indirect
|
||||
github.com/go-enry/go-enry/v2 v2.3.0
|
||||
github.com/go-enry/go-enry/v2 v2.5.2
|
||||
github.com/go-git/go-billy/v5 v5.0.0
|
||||
github.com/go-git/go-git/v5 v5.0.0
|
||||
github.com/go-openapi/jsonreference v0.19.3 // indirect
|
||||
|
|
12
go.sum
12
go.sum
|
@ -193,10 +193,10 @@ github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a h1:FQqo
|
|||
github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
|
||||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8=
|
||||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
|
||||
github.com/go-enry/go-enry/v2 v2.3.0 h1:o8KwgY6uSplysrIpj+Y42J/xGPp90ogVpxE2Z3s8Unk=
|
||||
github.com/go-enry/go-enry/v2 v2.3.0/go.mod h1:+xFJwbqWi15bvqFHb2ELUWVRKFQtwB61+sDrkvvxxGI=
|
||||
github.com/go-enry/go-oniguruma v1.2.0 h1:oBO9XC1IDT9+AoWW5oFsa/7gFeOPacEqDbyXZKWXuDs=
|
||||
github.com/go-enry/go-oniguruma v1.2.0/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4=
|
||||
github.com/go-enry/go-enry/v2 v2.5.2 h1:3f3PFAO6JitWkPi1GQ5/m6Xu4gNL1U5soJ8QaYqJ0YQ=
|
||||
github.com/go-enry/go-enry/v2 v2.5.2/go.mod h1:GVzIiAytiS5uT/QiuakK7TF1u4xDab87Y8V5EJRpsIQ=
|
||||
github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCcBk+eo=
|
||||
github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4=
|
||||
github.com/go-git/gcfg v1.5.0 h1:Q5ViNfGF8zFgyJWPqYwA7qGFoMTEiBmdlkcfRmpIMa4=
|
||||
github.com/go-git/gcfg v1.5.0/go.mod h1:5m20vg6GwYabIxaOonVkTdrILxQMpEShl1xiMF4ua+E=
|
||||
github.com/go-git/go-billy/v5 v5.0.0 h1:7NQHvd9FVid8VL4qVUMm8XifBK+2xCoZ2lSk0agRrHM=
|
||||
|
@ -616,8 +616,6 @@ github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDW
|
|||
github.com/tinylib/msgp v1.1.2 h1:gWmO7n0Ys2RBEb7GPYB9Ujq8Mk5p2U08lRnmMcGy6BQ=
|
||||
github.com/tinylib/msgp v1.1.2/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
|
||||
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
|
||||
github.com/toqueteos/trie v1.0.0 h1:8i6pXxNUXNRAqP246iibb7w/pSFquNTQ+uNfriG7vlk=
|
||||
github.com/toqueteos/trie v1.0.0/go.mod h1:Ywk48QhEqhU1+DwhMkJ2x7eeGxDHiGkAdc9+0DYcbsM=
|
||||
github.com/toqueteos/webbrowser v1.2.0 h1:tVP/gpK69Fx+qMJKsLE7TD8LuGWPnEV71wBN9rrstGQ=
|
||||
github.com/toqueteos/webbrowser v1.2.0/go.mod h1:XWoZq4cyp9WeUeak7w7LXRUQf1F1ATJMir8RTqb4ayM=
|
||||
github.com/tstranex/u2f v1.0.0 h1:HhJkSzDDlVSVIVt7pDJwCHQj67k7A5EeBgPmeD+pVsQ=
|
||||
|
@ -876,8 +874,6 @@ gopkg.in/testfixtures.v2 v2.5.0 h1:N08B7l2GzFQenyYbzqthDnKAA+cmb17iAZhhFxr7JHw=
|
|||
gopkg.in/testfixtures.v2 v2.5.0/go.mod h1:vyAq+MYCgNpR29qitQdLZhdbLFf4mR/2MFJRFoQZZ2M=
|
||||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
|
||||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
|
||||
gopkg.in/toqueteos/substring.v1 v1.0.2 h1:urLqCeMm6x/eTuQa1oZerNw8N1KNOIp5hD5kGL7lFsE=
|
||||
gopkg.in/toqueteos/substring.v1 v1.0.2/go.mod h1:Eb2Z1UYehlVK8LYW2WBVR2rwbujsz3aX8XDrM1vbNew=
|
||||
gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME=
|
||||
gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI=
|
||||
gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74=
|
||||
|
|
|
@ -10,8 +10,8 @@ import (
|
|||
"github.com/go-enry/go-enry/v2"
|
||||
)
|
||||
|
||||
// GetCodeLanguageWithCallback detects code language based on file name and content using callback
|
||||
func GetCodeLanguageWithCallback(filename string, contentFunc func() ([]byte, error)) string {
|
||||
// GetCodeLanguage detects code language based on file name and content
|
||||
func GetCodeLanguage(filename string, content []byte) string {
|
||||
if language, ok := enry.GetLanguageByExtension(filename); ok {
|
||||
return language
|
||||
}
|
||||
|
@ -20,17 +20,9 @@ func GetCodeLanguageWithCallback(filename string, contentFunc func() ([]byte, er
|
|||
return language
|
||||
}
|
||||
|
||||
content, err := contentFunc()
|
||||
if err != nil {
|
||||
if len(content) == 0 {
|
||||
return enry.OtherLanguage
|
||||
}
|
||||
|
||||
return enry.GetLanguage(filepath.Base(filename), content)
|
||||
}
|
||||
|
||||
// GetCodeLanguage detects code language based on file name and content
|
||||
func GetCodeLanguage(filename string, content []byte) string {
|
||||
return GetCodeLanguageWithCallback(filename, func() ([]byte, error) {
|
||||
return content, nil
|
||||
})
|
||||
}
|
||||
|
|
|
@ -50,11 +50,15 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
|
|||
return nil
|
||||
}
|
||||
|
||||
// If content can not be read just do detection by filename
|
||||
content, _ := readFile(f, fileSizeLimit)
|
||||
if enry.IsGenerated(f.Name, content) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO: Use .gitattributes file for linguist overrides
|
||||
|
||||
language := analyze.GetCodeLanguageWithCallback(f.Name, func() ([]byte, error) {
|
||||
return readFile(f, fileSizeLimit)
|
||||
})
|
||||
language := analyze.GetCodeLanguage(f.Name, content)
|
||||
if language == enry.OtherLanguage || language == "" {
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -1,26 +1,26 @@
|
|||
# go-enry [](https://pkg.go.dev/github.com/go-enry/go-enry/v2) [](https://github.com/go-enry/go-enry/actions?query=workflow%3ATest+branch%3Amaster) [](https://codecov.io/gh/go-enry/go-enry)
|
||||
|
||||
Programming language detector and toolbox to ignore binary or vendored files. *enry*, started as a port to _Go_ of the original [Linguist](https://github.com/github/linguist) _Ruby_ library, that has an improved *2x performance*.
|
||||
Programming language detector and toolbox to ignore binary or vendored files. _enry_, started as a port to _Go_ of the original [Linguist](https://github.com/github/linguist) _Ruby_ library, that has an improved _2x performance_.
|
||||
|
||||
* [CLI](#cli)
|
||||
* [Library](#library)
|
||||
* [Use cases](#use-cases)
|
||||
* [By filename](#by-filename)
|
||||
* [By text](#by-text)
|
||||
* [By file](#by-file)
|
||||
* [Filtering](#filtering-vendoring-binaries-etc)
|
||||
* [Coloring](#language-colors-and-groups)
|
||||
* [Languages](#languages)
|
||||
* [Go](#go)
|
||||
* [Java bindings](#java-bindings)
|
||||
* [Python bindings](#python-bindings)
|
||||
* [Divergences from linguist](#divergences-from-linguist)
|
||||
* [Benchmarks](#benchmarks)
|
||||
* [Why Enry?](#why-enry)
|
||||
* [Development](#development)
|
||||
* [Sync with github/linguist upstream](#sync-with-githublinguist-upstream)
|
||||
* [Misc](#misc)
|
||||
* [License](#license)
|
||||
- [CLI](#cli)
|
||||
- [Library](#library)
|
||||
- [Use cases](#use-cases)
|
||||
- [By filename](#by-filename)
|
||||
- [By text](#by-text)
|
||||
- [By file](#by-file)
|
||||
- [Filtering](#filtering-vendoring-binaries-etc)
|
||||
- [Coloring](#language-colors-and-groups)
|
||||
- [Languages](#languages)
|
||||
- [Go](#go)
|
||||
- [Java bindings](#java-bindings)
|
||||
- [Python bindings](#python-bindings)
|
||||
- [Divergences from linguist](#divergences-from-linguist)
|
||||
- [Benchmarks](#benchmarks)
|
||||
- [Why Enry?](#why-enry)
|
||||
- [Development](#development)
|
||||
- [Sync with github/linguist upstream](#sync-with-githublinguist-upstream)
|
||||
- [Misc](#misc)
|
||||
- [License](#license)
|
||||
|
||||
# CLI
|
||||
|
||||
|
@ -28,50 +28,62 @@ The CLI binary is hosted in a separate repository [go-enry/enry](https://github.
|
|||
|
||||
# Library
|
||||
|
||||
*enry* is also a Go library for guessing a programming language that exposes API through FFI to multiple programming environments.
|
||||
_enry_ is also a Go library for guessing a programming language that exposes API through FFI to multiple programming environments.
|
||||
|
||||
## Use cases
|
||||
|
||||
*enry* guesses a programming language using a sequence of matching *strategies* that are
|
||||
applied progressively to narrow down the possible options. Each *strategy* varies on the type
|
||||
_enry_ guesses a programming language using a sequence of matching _strategies_ that are
|
||||
applied progressively to narrow down the possible options. Each _strategy_ varies on the type
|
||||
of input data that it needs to make a decision: file name, extension, the first line of the file, the full content of the file, etc.
|
||||
|
||||
Depending on available input data, enry API can be roughly divided into the next categories or use cases.
|
||||
|
||||
### By filename
|
||||
Next functions require only a name of the file to make a guess:
|
||||
- `GetLanguageByExtension` uses only file extension (wich may be ambiguous)
|
||||
- `GetLanguageByFilename` useful for cases like `.gitignore`, `.bashrc`, etc
|
||||
- all [filtering helpers](#filtering)
|
||||
|
||||
Please note that such guesses are expected not to be very accurate.
|
||||
Next functions require only a name of the file to make a guess:
|
||||
|
||||
- `GetLanguageByExtension` uses only file extension (wich may be ambiguous)
|
||||
- `GetLanguageByFilename` useful for cases like `.gitignore`, `.bashrc`, etc
|
||||
- all [filtering helpers](#filtering)
|
||||
|
||||
Please note that such guesses are expected not to be very accurate.
|
||||
|
||||
### By text
|
||||
To make a guess only based on the content of the file or a text snippet, use
|
||||
- `GetLanguageByShebang` reads only the first line of text to identify the [shebang](https://en.wikipedia.org/wiki/Shebang_(Unix)).
|
||||
- `GetLanguageByModeline` for cases when Vim/Emacs modeline e.g. `/* vim: set ft=cpp: */` may be present at a head or a tail of the text.
|
||||
- `GetLanguageByClassifier` uses a Bayesian classifier trained on all the `./samples/` from Linguist.
|
||||
|
||||
It usually is a last-resort strategy that is used to disambiguate the guess of the previous strategies, and thus it requires a list of "candidate" guesses. One can provide a list of all known languages - keys from the `data.LanguagesLogProbabilities` as possible candidates if more intelligent hypotheses are not available, at the price of possibly suboptimal accuracy.
|
||||
To make a guess only based on the content of the file or a text snippet, use
|
||||
|
||||
- `GetLanguageByShebang` reads only the first line of text to identify the [shebang](<https://en.wikipedia.org/wiki/Shebang_(Unix)>).
|
||||
- `GetLanguageByModeline` for cases when Vim/Emacs modeline e.g. `/* vim: set ft=cpp: */` may be present at a head or a tail of the text.
|
||||
- `GetLanguageByClassifier` uses a Bayesian classifier trained on all the `./samples/` from Linguist.
|
||||
|
||||
It usually is a last-resort strategy that is used to disambiguate the guess of the previous strategies, and thus it requires a list of "candidate" guesses. One can provide a list of all known languages - keys from the `data.LanguagesLogProbabilities` as possible candidates if more intelligent hypotheses are not available, at the price of possibly suboptimal accuracy.
|
||||
|
||||
### By file
|
||||
|
||||
The most accurate guess would be one when both, the file name and the content are available:
|
||||
- `GetLanguagesByContent` only uses file extension and a set of regexp-based content heuristics.
|
||||
- `GetLanguages` uses the full set of matching strategies and is expected to be most accurate.
|
||||
|
||||
- `GetLanguagesByContent` only uses file extension and a set of regexp-based content heuristics.
|
||||
- `GetLanguages` uses the full set of matching strategies and is expected to be most accurate.
|
||||
|
||||
### Filtering: vendoring, binaries, etc
|
||||
*enry* expose a set of file-level helpers `Is*` to simplify filtering out the files that are less interesting for the purpose of source code analysis:
|
||||
- `IsBinary`
|
||||
- `IsVendor`
|
||||
- `IsConfiguration`
|
||||
- `IsDocumentation`
|
||||
- `IsDotFile`
|
||||
- `IsImage`
|
||||
|
||||
_enry_ expose a set of file-level helpers `Is*` to simplify filtering out the files that are less interesting for the purpose of source code analysis:
|
||||
|
||||
- `IsBinary`
|
||||
- `IsVendor`
|
||||
- `IsConfiguration`
|
||||
- `IsDocumentation`
|
||||
- `IsDotFile`
|
||||
- `IsImage`
|
||||
- `IsTest`
|
||||
- `IsGenerated`
|
||||
|
||||
### Language colors and groups
|
||||
*enry* exposes function to get language color to use for example in presenting statistics in graphs:
|
||||
- `GetColor`
|
||||
- `GetLanguageGroup` can be used to group similar languages together e.g. for `Less` this function will return `CSS`
|
||||
|
||||
_enry_ exposes function to get language color to use for example in presenting statistics in graphs:
|
||||
|
||||
- `GetColor`
|
||||
- `GetLanguageGroup` can be used to group similar languages together e.g. for `Less` this function will return `CSS`
|
||||
|
||||
## Languages
|
||||
|
||||
|
@ -136,39 +148,36 @@ Generated Python bindings using a C shared library and cffi are WIP under [src-d
|
|||
A library is going to be published on pypi as [enry](https://pypi.org/project/enry/) for
|
||||
macOS and linux platforms. Windows support is planned under [src-d/enry#150](https://github.com/src-d/enry/issues/150).
|
||||
|
||||
Divergences from Linguist
|
||||
------------
|
||||
## Divergences from Linguist
|
||||
|
||||
The `enry` library is based on the data from `github/linguist` version **v7.9.0**.
|
||||
|
||||
Parsing [linguist/samples](https://github.com/github/linguist/tree/master/samples) the following `enry` results are different from the Linguist:
|
||||
|
||||
* [Heuristics for ".es" extension](https://github.com/github/linguist/blob/e761f9b013e5b61161481fcb898b59721ee40e3d/lib/linguist/heuristics.yml#L103) in JavaScript could not be parsed, due to unsupported backreference in RE2 regexp engine.
|
||||
- [Heuristics for ".es" extension](https://github.com/github/linguist/blob/e761f9b013e5b61161481fcb898b59721ee40e3d/lib/linguist/heuristics.yml#L103) in JavaScript could not be parsed, due to unsupported backreference in RE2 regexp engine.
|
||||
|
||||
* [Heuristics for ".rno" extension](https://github.com/github/linguist/blob/3a1bd3c3d3e741a8aaec4704f782e06f5cd2a00d/lib/linguist/heuristics.yml#L365) in RUNOFF could not be parsed, due to unsupported lookahead in RE2 regexp engine.
|
||||
- [Heuristics for ".rno" extension](https://github.com/github/linguist/blob/3a1bd3c3d3e741a8aaec4704f782e06f5cd2a00d/lib/linguist/heuristics.yml#L365) in RUNOFF could not be parsed, due to unsupported lookahead in RE2 regexp engine.
|
||||
|
||||
* [Heuristics for ".inc" extension](https://github.com/github/linguist/blob/f0e2d0d7f1ce600b2a5acccaef6b149c87d8b99c/lib/linguist/heuristics.yml#L222) in NASL could not be parsed, due to unsupported possessive quantifier in RE2 regexp engine.
|
||||
- [Heuristics for ".inc" extension](https://github.com/github/linguist/blob/f0e2d0d7f1ce600b2a5acccaef6b149c87d8b99c/lib/linguist/heuristics.yml#L222) in NASL could not be parsed, due to unsupported possessive quantifier in RE2 regexp engine.
|
||||
|
||||
* As of [Linguist v5.3.2](https://github.com/github/linguist/releases/tag/v5.3.2) it is using [flex-based scanner in C for tokenization](https://github.com/github/linguist/pull/3846). Enry still uses [extract_token](https://github.com/github/linguist/pull/3846/files#diff-d5179df0b71620e3fac4535cd1368d15L60) regex-based algorithm. See [#193](https://github.com/src-d/enry/issues/193).
|
||||
- As of [Linguist v5.3.2](https://github.com/github/linguist/releases/tag/v5.3.2) it is using [flex-based scanner in C for tokenization](https://github.com/github/linguist/pull/3846). Enry still uses [extract_token](https://github.com/github/linguist/pull/3846/files#diff-d5179df0b71620e3fac4535cd1368d15L60) regex-based algorithm. See [#193](https://github.com/src-d/enry/issues/193).
|
||||
|
||||
* Bayesian classifier can't distinguish "SQL" from "PLpgSQL. See [#194](https://github.com/src-d/enry/issues/194).
|
||||
- Bayesian classifier can't distinguish "SQL" from "PLpgSQL. See [#194](https://github.com/src-d/enry/issues/194).
|
||||
|
||||
* Detection of [generated files](https://github.com/github/linguist/blob/bf95666fc15e49d556f2def4d0a85338423c25f3/lib/linguist/generated.rb#L53) is not supported yet.
|
||||
(Thus they are not excluded from CLI output). See [#213](https://github.com/src-d/enry/issues/213).
|
||||
- Detection of [generated files](https://github.com/github/linguist/blob/bf95666fc15e49d556f2def4d0a85338423c25f3/lib/linguist/generated.rb#L53) is not supported yet.
|
||||
(Thus they are not excluded from CLI output). See [#213](https://github.com/src-d/enry/issues/213).
|
||||
|
||||
* XML detection strategy is not implemented. See [#192](https://github.com/src-d/enry/issues/192).
|
||||
- XML detection strategy is not implemented. See [#192](https://github.com/src-d/enry/issues/192).
|
||||
|
||||
* Overriding languages and types though `.gitattributes` is not yet supported. See [#18](https://github.com/src-d/enry/issues/18).
|
||||
- Overriding languages and types though `.gitattributes` is not yet supported. See [#18](https://github.com/src-d/enry/issues/18).
|
||||
|
||||
* `enry` CLI output does NOT exclude `.gitignore`ed files and git submodules, as Linguist does
|
||||
- `enry` CLI output does NOT exclude `.gitignore`ed files and git submodules, as Linguist does
|
||||
|
||||
In all the cases above that have an issue number - we plan to update enry to match Linguist behavior.
|
||||
|
||||
## Benchmarks
|
||||
|
||||
Benchmarks
|
||||
------------
|
||||
|
||||
Enry's language detection has been compared with Linguist's on [*linguist/samples*](https://github.com/github/linguist/tree/master/samples).
|
||||
Enry's language detection has been compared with Linguist's on [_linguist/samples_](https://github.com/github/linguist/tree/master/samples).
|
||||
|
||||
We got these results:
|
||||
|
||||
|
@ -182,9 +191,7 @@ Go regexp engine being slower than Ruby's on, wich is based on [oniguruma](https
|
|||
|
||||
See [instructions](#misc) for running enry with oniguruma.
|
||||
|
||||
|
||||
Why Enry?
|
||||
------------
|
||||
## Why Enry?
|
||||
|
||||
In the movie [My Fair Lady](https://en.wikipedia.org/wiki/My_Fair_Lady), [Professor Henry Higgins](http://www.imdb.com/character/ch0011719/) is a linguist who at the very beginning of the movie enjoys guessing the origin of people based on their accent.
|
||||
|
||||
|
@ -199,10 +206,9 @@ To run the tests use:
|
|||
Setting `ENRY_TEST_REPO` to the path to existing checkout of Linguist will avoid cloning it and sepeed tests up.
|
||||
Setting `ENRY_DEBUG=1` will provide insight in the Bayesian classifier building done by `make code-generate`.
|
||||
|
||||
|
||||
### Sync with github/linguist upstream
|
||||
|
||||
*enry* re-uses parts of the original [github/linguist](https://github.com/github/linguist) to generate internal data structures.
|
||||
_enry_ re-uses parts of the original [github/linguist](https://github.com/github/linguist) to generate internal data structures.
|
||||
In order to update to the latest release of linguist do:
|
||||
|
||||
```bash
|
||||
|
@ -217,10 +223,10 @@ $ make code-generate
|
|||
|
||||
To stay in sync, enry needs to be updated when a new release of the linguist includes changes to any of the following files:
|
||||
|
||||
* [languages.yml](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml)
|
||||
* [heuristics.yml](https://github.com/github/linguist/blob/master/lib/linguist/heuristics.yml)
|
||||
* [vendor.yml](https://github.com/github/linguist/blob/master/lib/linguist/vendor.yml)
|
||||
* [documentation.yml](https://github.com/github/linguist/blob/master/lib/linguist/documentation.yml)
|
||||
- [languages.yml](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml)
|
||||
- [heuristics.yml](https://github.com/github/linguist/blob/master/lib/linguist/heuristics.yml)
|
||||
- [vendor.yml](https://github.com/github/linguist/blob/master/lib/linguist/vendor.yml)
|
||||
- [documentation.yml](https://github.com/github/linguist/blob/master/lib/linguist/documentation.yml)
|
||||
|
||||
There is no automation for detecting the changes in the linguist project, so this process above has to be done manually from time to time.
|
||||
|
||||
|
@ -229,8 +235,6 @@ the generated files (in [data](https://github.com/go-enry/go-enry/blob/master/da
|
|||
|
||||
Separating all the necessary "manual" code changes to a different PR that includes some background description and an update to the documentation on ["divergences from linguist"](#divergences-from-linguist) is very much appreciated as it simplifies the maintenance (review/release notes/etc).
|
||||
|
||||
|
||||
|
||||
## Misc
|
||||
|
||||
<details>
|
||||
|
@ -238,19 +242,20 @@ Separating all the necessary "manual" code changes to a different PR that includ
|
|||
|
||||
### Benchmark
|
||||
|
||||
All benchmark scripts are in [*benchmarks*](https://github.com/go-enry/go-enry/blob/master/benchmarks) directory.
|
||||
|
||||
All benchmark scripts are in [_benchmarks_](https://github.com/go-enry/go-enry/blob/master/benchmarks) directory.
|
||||
|
||||
#### Dependencies
|
||||
As benchmarks depend on Ruby and Github-Linguist gem make sure you have:
|
||||
- Ruby (e.g using [`rbenv`](https://github.com/rbenv/rbenv)), [`bundler`](https://bundler.io/) installed
|
||||
- Docker
|
||||
- [native dependencies](https://github.com/github/linguist/#dependencies) installed
|
||||
- Build the gem `cd .linguist && bundle install && rake build_gem && cd -`
|
||||
- Install it `gem install --no-rdoc --no-ri --local .linguist/github-linguist-*.gem`
|
||||
|
||||
As benchmarks depend on Ruby and Github-Linguist gem make sure you have:
|
||||
|
||||
- Ruby (e.g using [`rbenv`](https://github.com/rbenv/rbenv)), [`bundler`](https://bundler.io/) installed
|
||||
- Docker
|
||||
- [native dependencies](https://github.com/github/linguist/#dependencies) installed
|
||||
- Build the gem `cd .linguist && bundle install && rake build_gem && cd -`
|
||||
- Install it `gem install --no-rdoc --no-ri --local .linguist/github-linguist-*.gem`
|
||||
|
||||
#### Quick benchmark
|
||||
|
||||
To run quicker benchmarks
|
||||
|
||||
make benchmarks
|
||||
|
@ -259,19 +264,20 @@ to get average times for the primary detection function and strategies for the w
|
|||
|
||||
make benchmarks-samples
|
||||
|
||||
|
||||
#### Full benchmark
|
||||
|
||||
If you want to reproduce the same benchmarks as reported above:
|
||||
- Make sure all [dependencies](#benchmark-dependencies) are installed
|
||||
- Install [gnuplot](http://gnuplot.info) (in order to plot the histogram)
|
||||
- Run `ENRY_TEST_REPO="$PWD/.linguist" benchmarks/run.sh` (takes ~15h)
|
||||
|
||||
- Make sure all [dependencies](#benchmark-dependencies) are installed
|
||||
- Install [gnuplot](http://gnuplot.info) (in order to plot the histogram)
|
||||
- Run `ENRY_TEST_REPO="$PWD/.linguist" benchmarks/run.sh` (takes ~15h)
|
||||
|
||||
It will run the benchmarks for enry and Linguist, parse the output, create csv files and plot the histogram.
|
||||
|
||||
### Faster regexp engine (optional)
|
||||
|
||||
[Oniguruma](https://github.com/kkos/oniguruma) is CRuby's regular expression engine.
|
||||
It is very fast and performs better than the one built into Go runtime. *enry* supports swapping
|
||||
It is very fast and performs better than the one built into Go runtime. _enry_ supports swapping
|
||||
between those two engines thanks to [rubex](https://github.com/moovweb/rubex) project.
|
||||
The typical overall speedup from using Oniguruma is 1.5-2x. However, it requires CGo and the external shared library.
|
||||
On macOS with [Homebrew](https://brew.sh/), it is:
|
||||
|
@ -296,8 +302,6 @@ and then rebuild the project.
|
|||
|
||||
</details>
|
||||
|
||||
|
||||
License
|
||||
------------
|
||||
## License
|
||||
|
||||
Apache License, Version 2.0. See [LICENSE](LICENSE)
|
||||
|
|
|
@ -328,15 +328,13 @@ func getInterpreter(data []byte) (interpreter string) {
|
|||
return
|
||||
}
|
||||
|
||||
func getFirstLine(data []byte) []byte {
|
||||
buf := bufio.NewScanner(bytes.NewReader(data))
|
||||
buf.Scan()
|
||||
line := buf.Bytes()
|
||||
if err := buf.Err(); err != nil {
|
||||
return nil
|
||||
func getFirstLine(content []byte) []byte {
|
||||
nlpos := bytes.IndexByte(content, '\n')
|
||||
if nlpos < 0 {
|
||||
return content
|
||||
}
|
||||
|
||||
return line
|
||||
return content[:nlpos]
|
||||
}
|
||||
|
||||
func hasShebang(line []byte) bool {
|
||||
|
|
|
@ -3,24 +3,24 @@
|
|||
|
||||
package data
|
||||
|
||||
import "gopkg.in/toqueteos/substring.v1"
|
||||
import "github.com/go-enry/go-enry/v2/regex"
|
||||
|
||||
var DocumentationMatchers = substring.Or(
|
||||
substring.Regexp(`^[Dd]ocs?/`),
|
||||
substring.Regexp(`(^|/)[Dd]ocumentation/`),
|
||||
substring.Regexp(`(^|/)[Gg]roovydoc/`),
|
||||
substring.Regexp(`(^|/)[Jj]avadoc/`),
|
||||
substring.Regexp(`^[Mm]an/`),
|
||||
substring.Regexp(`^[Ee]xamples/`),
|
||||
substring.Regexp(`^[Dd]emos?/`),
|
||||
substring.Regexp(`(^|/)inst/doc/`),
|
||||
substring.Regexp(`(^|/)CHANGE(S|LOG)?(\.|$)`),
|
||||
substring.Regexp(`(^|/)CONTRIBUTING(\.|$)`),
|
||||
substring.Regexp(`(^|/)COPYING(\.|$)`),
|
||||
substring.Regexp(`(^|/)INSTALL(\.|$)`),
|
||||
substring.Regexp(`(^|/)LICEN[CS]E(\.|$)`),
|
||||
substring.Regexp(`(^|/)[Ll]icen[cs]e(\.|$)`),
|
||||
substring.Regexp(`(^|/)README(\.|$)`),
|
||||
substring.Regexp(`(^|/)[Rr]eadme(\.|$)`),
|
||||
substring.Regexp(`^[Ss]amples?/`),
|
||||
)
|
||||
var DocumentationMatchers = []regex.EnryRegexp{
|
||||
regex.MustCompile(`^[Dd]ocs?/`),
|
||||
regex.MustCompile(`(^|/)[Dd]ocumentation/`),
|
||||
regex.MustCompile(`(^|/)[Gg]roovydoc/`),
|
||||
regex.MustCompile(`(^|/)[Jj]avadoc/`),
|
||||
regex.MustCompile(`^[Mm]an/`),
|
||||
regex.MustCompile(`^[Ee]xamples/`),
|
||||
regex.MustCompile(`^[Dd]emos?/`),
|
||||
regex.MustCompile(`(^|/)inst/doc/`),
|
||||
regex.MustCompile(`(^|/)CHANGE(S|LOG)?(\.|$)`),
|
||||
regex.MustCompile(`(^|/)CONTRIBUTING(\.|$)`),
|
||||
regex.MustCompile(`(^|/)COPYING(\.|$)`),
|
||||
regex.MustCompile(`(^|/)INSTALL(\.|$)`),
|
||||
regex.MustCompile(`(^|/)LICEN[CS]E(\.|$)`),
|
||||
regex.MustCompile(`(^|/)[Ll]icen[cs]e(\.|$)`),
|
||||
regex.MustCompile(`(^|/)README(\.|$)`),
|
||||
regex.MustCompile(`(^|/)[Rr]eadme(\.|$)`),
|
||||
regex.MustCompile(`^[Ss]amples?/`),
|
||||
}
|
||||
|
|
|
@ -0,0 +1,823 @@
|
|||
package data
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
|
||||
"github.com/go-enry/go-enry/v2/regex"
|
||||
)
|
||||
|
||||
// GeneratedCodeExtensions contains all extensions that belong to generated
|
||||
// files for sure.
|
||||
var GeneratedCodeExtensions = map[string]struct{}{
|
||||
// XCode files
|
||||
".nib": {},
|
||||
".xcworkspacedata": {},
|
||||
".xcuserstate": {},
|
||||
}
|
||||
|
||||
// GeneratedCodeNameMatcher is a function that tells whether the file with the
|
||||
// given name is generated.
|
||||
type GeneratedCodeNameMatcher func(string) bool
|
||||
|
||||
func nameMatches(pattern string) GeneratedCodeNameMatcher {
|
||||
r := regex.MustCompile(pattern)
|
||||
return func(name string) bool {
|
||||
return r.MatchString(name)
|
||||
}
|
||||
}
|
||||
|
||||
func nameContains(pattern string) GeneratedCodeNameMatcher {
|
||||
return func(name string) bool {
|
||||
return strings.Contains(name, pattern)
|
||||
}
|
||||
}
|
||||
|
||||
func nameEndsWith(pattern string) GeneratedCodeNameMatcher {
|
||||
return func(name string) bool {
|
||||
return strings.HasSuffix(name, pattern)
|
||||
}
|
||||
}
|
||||
|
||||
// GeneratedCodeNameMatchers are all the matchers that check whether the code
|
||||
// is generated based only on the file name.
|
||||
var GeneratedCodeNameMatchers = []GeneratedCodeNameMatcher{
|
||||
// Cocoa pods
|
||||
nameMatches(`(^Pods|\/Pods)\/`),
|
||||
|
||||
// Carthage build
|
||||
nameMatches(`(^|\/)Carthage\/Build\/`),
|
||||
|
||||
// NET designer file
|
||||
nameMatches(`(?i)\.designer\.(cs|vb)$`),
|
||||
|
||||
// Generated NET specflow feature file
|
||||
nameEndsWith(".feature.cs"),
|
||||
|
||||
// Node modules
|
||||
nameContains("node_modules/"),
|
||||
|
||||
// Go vendor
|
||||
nameMatches(`vendor\/([-0-9A-Za-z]+\.)+(com|edu|gov|in|me|net|org|fm|io)`),
|
||||
|
||||
// Go lock
|
||||
nameEndsWith("Gopkg.lock"),
|
||||
nameEndsWith("glide.lock"),
|
||||
|
||||
// Esy lock
|
||||
nameMatches(`(^|\/)(\w+\.)?esy.lock$`),
|
||||
|
||||
// NPM shrinkwrap
|
||||
nameEndsWith("npm-shrinkwrap.json"),
|
||||
|
||||
// NPM package lock
|
||||
nameEndsWith("package-lock.json"),
|
||||
|
||||
// Yarn plugnplay
|
||||
nameMatches(`(^|\/)\.pnp\.(c|m)?js$`),
|
||||
|
||||
// Godeps
|
||||
nameContains("Godeps/"),
|
||||
|
||||
// Composer lock
|
||||
nameEndsWith("composer.lock"),
|
||||
|
||||
// Generated by zephir
|
||||
nameMatches(`.\.zep\.(?:c|h|php)$`),
|
||||
|
||||
// Cargo lock
|
||||
nameEndsWith("Cargo.lock"),
|
||||
|
||||
// Pipenv lock
|
||||
nameEndsWith("Pipfile.lock"),
|
||||
|
||||
// GraphQL relay
|
||||
nameContains("__generated__/"),
|
||||
}
|
||||
|
||||
// GeneratedCodeMatcher checks whether the file with the given data is
|
||||
// generated code.
|
||||
type GeneratedCodeMatcher func(path, ext string, content []byte) bool
|
||||
|
||||
// GeneratedCodeMatchers is the list of all generated code matchers that
|
||||
// rely on checking the content of the file to make the guess.
|
||||
var GeneratedCodeMatchers = []GeneratedCodeMatcher{
|
||||
isMinifiedFile,
|
||||
hasSourceMapReference,
|
||||
isSourceMap,
|
||||
isCompiledCoffeeScript,
|
||||
isGeneratedNetDocfile,
|
||||
isGeneratedJavaScriptPEGParser,
|
||||
isGeneratedPostScript,
|
||||
isGeneratedGo,
|
||||
isGeneratedProtobuf,
|
||||
isGeneratedJavaScriptProtocolBuffer,
|
||||
isGeneratedApacheThrift,
|
||||
isGeneratedJNIHeader,
|
||||
isVCRCassette,
|
||||
isCompiledCythonFile,
|
||||
isGeneratedModule,
|
||||
isGeneratedUnity3DMeta,
|
||||
isGeneratedRacc,
|
||||
isGeneratedJFlex,
|
||||
isGeneratedGrammarKit,
|
||||
isGeneratedRoxygen2,
|
||||
isGeneratedJison,
|
||||
isGeneratedGRPCCpp,
|
||||
isGeneratedDart,
|
||||
isGeneratedPerlPPPortHeader,
|
||||
isGeneratedGameMakerStudio,
|
||||
isGeneratedGimp,
|
||||
isGeneratedVisualStudio6,
|
||||
isGeneratedHaxe,
|
||||
isGeneratedHTML,
|
||||
isGeneratedJooq,
|
||||
}
|
||||
|
||||
func canBeMinified(ext string) bool {
|
||||
return ext == ".js" || ext == ".css"
|
||||
}
|
||||
|
||||
// isMinifiedFile returns whether the file may be minified.
|
||||
// We consider a minified file any css or js file whose average number of chars
|
||||
// per line is more than 110.
|
||||
func isMinifiedFile(path, ext string, content []byte) bool {
|
||||
if !canBeMinified(ext) {
|
||||
return false
|
||||
}
|
||||
|
||||
var chars, lines uint64
|
||||
forEachLine(content, func(line []byte) {
|
||||
chars += uint64(len(line))
|
||||
lines++
|
||||
})
|
||||
|
||||
if lines == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
return chars/lines > 110
|
||||
}
|
||||
|
||||
var sourceMapRegex = regex.MustCompile(`^\/[*\/][\#@] source(?:Mapping)?URL|sourceURL=`)
|
||||
|
||||
// hasSourceMapReference returns whether the file contains a reference to a
|
||||
// source-map file.
|
||||
func hasSourceMapReference(_ string, ext string, content []byte) bool {
|
||||
if !canBeMinified(ext) {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, line := range getLines(content, -2) {
|
||||
if sourceMapRegex.Match(line) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
var sourceMapRegexps = []regex.EnryRegexp{
|
||||
regex.MustCompile(`^{"version":\d+,`),
|
||||
regex.MustCompile(`^\/\*\* Begin line maps\. \*\*\/{`),
|
||||
}
|
||||
|
||||
// isSourceMap returns whether the file itself is a source map.
|
||||
func isSourceMap(path, _ string, content []byte) bool {
|
||||
if strings.HasSuffix(path, ".js.map") || strings.HasSuffix(path, ".css.map") {
|
||||
return true
|
||||
}
|
||||
|
||||
firstLine := getFirstLine(content)
|
||||
if len(firstLine) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, r := range sourceMapRegexps {
|
||||
if r.Match(firstLine) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isCompiledCoffeeScript(path, ext string, content []byte) bool {
|
||||
if ext != ".js" {
|
||||
return false
|
||||
}
|
||||
|
||||
firstLine := getFirstLine(content)
|
||||
lastLines := getLines(content, -2)
|
||||
if len(lastLines) < 2 {
|
||||
return false
|
||||
}
|
||||
|
||||
if string(firstLine) == "(function() {" &&
|
||||
string(lastLines[1]) == "}).call(this);" &&
|
||||
string(lastLines[0]) == "" {
|
||||
score := 0
|
||||
|
||||
forEachLine(content, func(line []byte) {
|
||||
if bytes.Contains(line, []byte("var ")) {
|
||||
// Underscored temp vars are likely to be Coffee
|
||||
score += 1 * countAppearancesInLine(line, "_fn", "_i", "_len", "_ref", "_results")
|
||||
|
||||
// bind and extend functions are very Coffee specific
|
||||
score += 3 * countAppearancesInLine(line, "__bind", "__extends", "__hasProp", "__indexOf", "__slice")
|
||||
}
|
||||
})
|
||||
|
||||
// Require a score of 3. This is fairly abritrary. Consider tweaking later.
|
||||
// See: https://github.com/github/linguist/blob/master/lib/linguist/generated.rb#L176-L213
|
||||
return score >= 3
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isGeneratedNetDocfile(_, ext string, content []byte) bool {
|
||||
if ext != ".xml" {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := bytes.Split(content, []byte{'\n'})
|
||||
if len(lines) <= 3 {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.Contains(lines[1], []byte("<doc>")) &&
|
||||
bytes.Contains(lines[2], []byte("<assembly>")) &&
|
||||
bytes.Contains(lines[len(lines)-2], []byte("</doc>"))
|
||||
}
|
||||
|
||||
var pegJavaScriptGeneratedRegex = regex.MustCompile(`^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js`)
|
||||
|
||||
func isGeneratedJavaScriptPEGParser(_, ext string, content []byte) bool {
|
||||
if ext != ".js" {
|
||||
return false
|
||||
}
|
||||
|
||||
// PEG.js-generated parsers include a comment near the top of the file
|
||||
// that marks them as such.
|
||||
return pegJavaScriptGeneratedRegex.Match(bytes.Join(getLines(content, 5), []byte("")))
|
||||
}
|
||||
|
||||
var postScriptType1And42Regex = regex.MustCompile(`(\n|\r\n|\r)\s*(?:currentfile eexec\s+|\/sfnts\s+\[)`)
|
||||
|
||||
var postScriptRegexes = []regex.EnryRegexp{
|
||||
regex.MustCompile(`[0-9]|draw|mpage|ImageMagick|inkscape|MATLAB`),
|
||||
regex.MustCompile(`PCBNEW|pnmtops|\(Unknown\)|Serif Affinity|Filterimage -tops`),
|
||||
}
|
||||
|
||||
func isGeneratedPostScript(_, ext string, content []byte) bool {
|
||||
if ext != ".ps" && ext != ".eps" && ext != ".pfa" {
|
||||
return false
|
||||
}
|
||||
|
||||
// Type 1 and Type 42 fonts converted to PostScript are stored as hex-encoded byte streams; these
|
||||
// streams are always preceded the `eexec` operator (if Type 1), or the `/sfnts` key (if Type 42).
|
||||
if postScriptType1And42Regex.Match(content) {
|
||||
return true
|
||||
}
|
||||
|
||||
// We analyze the "%%Creator:" comment, which contains the author/generator
|
||||
// of the file. If there is one, it should be in one of the first few lines.
|
||||
var creator []byte
|
||||
for _, line := range getLines(content, 10) {
|
||||
if bytes.HasPrefix(line, []byte("%%Creator: ")) {
|
||||
creator = line
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if len(creator) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
// EAGLE doesn't include a version number when it generates PostScript.
|
||||
// However, it does prepend its name to the document's "%%Title" field.
|
||||
if bytes.Contains(creator, []byte("EAGLE")) {
|
||||
for _, line := range getLines(content, 5) {
|
||||
if bytes.HasPrefix(line, []byte("%%Title: EAGLE Drawing ")) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Most generators write their version number, while human authors' or companies'
|
||||
// names don't contain numbers. So look if the line contains digits. Also
|
||||
// look for some special cases without version numbers.
|
||||
for _, r := range postScriptRegexes {
|
||||
if r.Match(creator) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isGeneratedGo(_, ext string, content []byte) bool {
|
||||
if ext != ".go" {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, 40)
|
||||
if len(lines) <= 1 {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, line := range lines {
|
||||
if bytes.Contains(line, []byte("Code generated by")) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
var protoExtensions = map[string]struct{}{
|
||||
".py": {},
|
||||
".java": {},
|
||||
".h": {},
|
||||
".cc": {},
|
||||
".cpp": {},
|
||||
".m": {},
|
||||
".rb": {},
|
||||
".php": {},
|
||||
}
|
||||
|
||||
func isGeneratedProtobuf(_, ext string, content []byte) bool {
|
||||
if _, ok := protoExtensions[ext]; !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, 3)
|
||||
if len(lines) <= 1 {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, line := range lines {
|
||||
if bytes.Contains(line, []byte("Generated by the protocol buffer compiler. DO NOT EDIT!")) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isGeneratedJavaScriptProtocolBuffer(_, ext string, content []byte) bool {
|
||||
if ext != ".js" {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, 6)
|
||||
if len(lines) < 6 {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.Contains(lines[5], []byte("GENERATED CODE -- DO NOT EDIT!"))
|
||||
}
|
||||
|
||||
var apacheThriftExtensions = map[string]struct{}{
|
||||
".rb": {},
|
||||
".py": {},
|
||||
".go": {},
|
||||
".js": {},
|
||||
".m": {},
|
||||
".java": {},
|
||||
".h": {},
|
||||
".cc": {},
|
||||
".cpp": {},
|
||||
".php": {},
|
||||
}
|
||||
|
||||
func isGeneratedApacheThrift(_, ext string, content []byte) bool {
|
||||
if _, ok := apacheThriftExtensions[ext]; !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, line := range getLines(content, 6) {
|
||||
if bytes.Contains(line, []byte("Autogenerated by Thrift Compiler")) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isGeneratedJNIHeader(_, ext string, content []byte) bool {
|
||||
if ext != ".h" {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, 2)
|
||||
if len(lines) < 2 {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.Contains(lines[0], []byte("/* DO NOT EDIT THIS FILE - it is machine generated */")) &&
|
||||
bytes.Contains(lines[1], []byte("#include <jni.h>"))
|
||||
}
|
||||
|
||||
func isVCRCassette(_, ext string, content []byte) bool {
|
||||
if ext != ".yml" {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, -2)
|
||||
if len(lines) < 2 {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.Contains(lines[1], []byte("recorded_with: VCR"))
|
||||
}
|
||||
|
||||
func isCompiledCythonFile(_, ext string, content []byte) bool {
|
||||
if ext != ".c" && ext != ".cpp" {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, 1)
|
||||
if len(lines) < 1 {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.Contains(lines[0], []byte("Generated by Cython"))
|
||||
}
|
||||
|
||||
func isGeneratedModule(_, ext string, content []byte) bool {
|
||||
if ext != ".mod" {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, 1)
|
||||
if len(lines) < 1 {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.Contains(lines[0], []byte("PCBNEW-LibModule-V")) ||
|
||||
bytes.Contains(lines[0], []byte("GFORTRAN module version '"))
|
||||
}
|
||||
|
||||
func isGeneratedUnity3DMeta(_, ext string, content []byte) bool {
|
||||
if ext != ".meta" {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, 1)
|
||||
if len(lines) < 1 {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.Contains(lines[0], []byte("fileFormatVersion: "))
|
||||
}
|
||||
|
||||
func isGeneratedRacc(_, ext string, content []byte) bool {
|
||||
if ext != ".rb" {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, 3)
|
||||
if len(lines) < 3 {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.HasPrefix(lines[2], []byte("# This file is automatically generated by Racc"))
|
||||
}
|
||||
|
||||
func isGeneratedJFlex(_, ext string, content []byte) bool {
|
||||
if ext != ".java" {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, 1)
|
||||
if len(lines) < 1 {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.HasPrefix(lines[0], []byte("/* The following code was generated by JFlex "))
|
||||
}
|
||||
|
||||
func isGeneratedGrammarKit(_, ext string, content []byte) bool {
|
||||
if ext != ".java" {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, 1)
|
||||
if len(lines) < 1 {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.Contains(lines[0], []byte("// This is a generated file. Not intended for manual editing."))
|
||||
}
|
||||
|
||||
func isGeneratedRoxygen2(_, ext string, content []byte) bool {
|
||||
if ext != ".rd" {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, 1)
|
||||
if len(lines) < 1 {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.Contains(lines[0], []byte("% Generated by roxygen2: do not edit by hand"))
|
||||
}
|
||||
|
||||
func isGeneratedJison(_, ext string, content []byte) bool {
|
||||
if ext != ".js" {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, 1)
|
||||
if len(lines) < 1 {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.Contains(lines[0], []byte("/* parser generated by jison ")) ||
|
||||
bytes.Contains(lines[0], []byte("/* generated by jison-lex "))
|
||||
}
|
||||
|
||||
func isGeneratedGRPCCpp(_, ext string, content []byte) bool {
|
||||
switch ext {
|
||||
case ".cpp", ".hpp", ".h", ".cc":
|
||||
lines := getLines(content, 1)
|
||||
if len(lines) < 1 {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.Contains(lines[0], []byte("// Generated by the gRPC"))
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
var dartRegex = regex.MustCompile(`generated code\W{2,3}do not modify`)
|
||||
|
||||
func isGeneratedDart(_, ext string, content []byte) bool {
|
||||
if ext != ".dart" {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, 1)
|
||||
if len(lines) < 1 {
|
||||
return false
|
||||
}
|
||||
|
||||
return dartRegex.Match(bytes.ToLower(lines[0]))
|
||||
}
|
||||
|
||||
func isGeneratedPerlPPPortHeader(name, _ string, content []byte) bool {
|
||||
if !strings.HasSuffix(name, "ppport.h") {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, 10)
|
||||
if len(lines) < 10 {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.Contains(lines[8], []byte("Automatically created by Devel::PPPort"))
|
||||
}
|
||||
|
||||
var (
|
||||
gameMakerStudioFirstLineRegex = regex.MustCompile(`^\d\.\d\.\d.+\|\{`)
|
||||
gameMakerStudioThirdLineRegex = regex.MustCompile(`\"modelName\"\:\s*\"GM`)
|
||||
)
|
||||
|
||||
func isGeneratedGameMakerStudio(_, ext string, content []byte) bool {
|
||||
if ext != ".yy" && ext != ".yyp" {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, 3)
|
||||
if len(lines) < 3 {
|
||||
return false
|
||||
}
|
||||
|
||||
return gameMakerStudioThirdLineRegex.Match(lines[2]) ||
|
||||
gameMakerStudioFirstLineRegex.Match(lines[0])
|
||||
}
|
||||
|
||||
var gimpRegexes = []regex.EnryRegexp{
|
||||
regex.MustCompile(`\/\* GIMP [a-zA-Z0-9\- ]+ C\-Source image dump \(.+?\.c\) \*\/`),
|
||||
regex.MustCompile(`\/\* GIMP header image file format \([a-zA-Z0-9\- ]+\)\: .+?\.h \*\/`),
|
||||
}
|
||||
|
||||
func isGeneratedGimp(_, ext string, content []byte) bool {
|
||||
if ext != ".c" && ext != ".h" {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, 1)
|
||||
if len(lines) < 1 {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, r := range gimpRegexes {
|
||||
if r.Match(lines[0]) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isGeneratedVisualStudio6(_, ext string, content []byte) bool {
|
||||
if ext != ".dsp" {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, l := range getLines(content, 3) {
|
||||
if bytes.Contains(l, []byte("# Microsoft Developer Studio Generated Build File")) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
var haxeExtensions = map[string]struct{}{
|
||||
".js": {},
|
||||
".py": {},
|
||||
".lua": {},
|
||||
".cpp": {},
|
||||
".h": {},
|
||||
".java": {},
|
||||
".cs": {},
|
||||
".php": {},
|
||||
}
|
||||
|
||||
func isGeneratedHaxe(_, ext string, content []byte) bool {
|
||||
if _, ok := haxeExtensions[ext]; !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, l := range getLines(content, 3) {
|
||||
if bytes.Contains(l, []byte("Generated by Haxe")) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
var (
|
||||
doxygenRegex = regex.MustCompile(`<!--\s+Generated by Doxygen\s+[.0-9]+\s*-->`)
|
||||
htmlMetaRegex = regex.MustCompile(`<meta(\s+[^>]+)>`)
|
||||
htmlMetaContentRegex = regex.MustCompile(`\s+(name|content|value)\s*=\s*("[^"]+"|'[^']+'|[^\s"']+)`)
|
||||
orgModeMetaRegex = regex.MustCompile(`org\s+mode`)
|
||||
)
|
||||
|
||||
func isGeneratedHTML(_, ext string, content []byte) bool {
|
||||
if ext != ".html" && ext != ".htm" && ext != ".xhtml" {
|
||||
return false
|
||||
}
|
||||
|
||||
lines := getLines(content, 30)
|
||||
|
||||
// Pkgdown
|
||||
if len(lines) >= 2 {
|
||||
for _, l := range lines[:2] {
|
||||
if bytes.Contains(l, []byte("<!-- Generated by pkgdown: do not edit by hand -->")) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Mandoc
|
||||
if len(lines) > 2 &&
|
||||
bytes.HasPrefix(lines[2], []byte("<!-- This is an automatically generated file.")) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Doxygen
|
||||
for _, l := range lines {
|
||||
if doxygenRegex.Match(l) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// HTML tag: <meta name="generator" content="" />
|
||||
part := bytes.ToLower(bytes.Join(lines, []byte{' '}))
|
||||
part = bytes.ReplaceAll(part, []byte{'\n'}, []byte{})
|
||||
part = bytes.ReplaceAll(part, []byte{'\r'}, []byte{})
|
||||
matches := htmlMetaRegex.FindAll(part, -1)
|
||||
if len(matches) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, m := range matches {
|
||||
var name, value, content string
|
||||
ms := htmlMetaContentRegex.FindAllStringSubmatch(string(m), -1)
|
||||
for _, m := range ms {
|
||||
switch m[1] {
|
||||
case "name":
|
||||
name = m[2]
|
||||
case "value":
|
||||
value = m[2]
|
||||
case "content":
|
||||
content = m[2]
|
||||
}
|
||||
}
|
||||
|
||||
var val = value
|
||||
if val == "" {
|
||||
val = content
|
||||
}
|
||||
|
||||
name = strings.Trim(name, `"'`)
|
||||
val = strings.Trim(val, `"'`)
|
||||
|
||||
if name != "generator" || val == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.Contains(val, "jlatex2html") ||
|
||||
strings.Contains(val, "latex2html") ||
|
||||
strings.Contains(val, "groff") ||
|
||||
strings.Contains(val, "makeinfo") ||
|
||||
strings.Contains(val, "texi2html") ||
|
||||
strings.Contains(val, "ronn") ||
|
||||
orgModeMetaRegex.MatchString(val) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isGeneratedJooq(_, ext string, content []byte) bool {
|
||||
if ext != ".java" {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, l := range getLines(content, 2) {
|
||||
if bytes.Contains(l, []byte("This file is generated by jOOQ.")) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func getFirstLine(content []byte) []byte {
|
||||
lines := getLines(content, 1)
|
||||
if len(lines) > 0 {
|
||||
return lines[0]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getLines returns up to the first n lines. A negative index will return up to
|
||||
// the last n lines in reverse order.
|
||||
func getLines(content []byte, n int) [][]byte {
|
||||
var result [][]byte
|
||||
if n < 0 {
|
||||
for pos := len(content); pos > 0 && len(result) < -n; {
|
||||
nlpos := bytes.LastIndexByte(content[:pos], '\n')
|
||||
if nlpos+1 < len(content)-1 {
|
||||
result = append(result, content[nlpos+1:pos])
|
||||
}
|
||||
pos = nlpos
|
||||
}
|
||||
} else {
|
||||
for pos := 0; pos < len(content) && len(result) < n; {
|
||||
nlpos := bytes.IndexByte(content[pos:], '\n')
|
||||
if nlpos < 0 && pos < len(content) {
|
||||
nlpos = len(content)
|
||||
} else if nlpos >= 0 {
|
||||
nlpos += pos
|
||||
}
|
||||
|
||||
result = append(result, content[pos:nlpos])
|
||||
pos = nlpos + 1
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func forEachLine(content []byte, cb func([]byte)) {
|
||||
var pos int
|
||||
for pos < len(content) {
|
||||
nlpos := bytes.IndexByte(content[pos:], '\n')
|
||||
if nlpos < 0 && pos < len(content) {
|
||||
nlpos = len(content)
|
||||
} else if nlpos >= 0 {
|
||||
nlpos += pos
|
||||
}
|
||||
|
||||
cb(content[pos:nlpos])
|
||||
pos = nlpos + 1
|
||||
}
|
||||
}
|
||||
|
||||
func countAppearancesInLine(line []byte, targets ...string) int {
|
||||
var count int
|
||||
for _, t := range targets {
|
||||
count += bytes.Count(line, []byte(t))
|
||||
}
|
||||
return count
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
package data
|
||||
|
||||
import "github.com/go-enry/go-enry/v2/regex"
|
||||
|
||||
// TestMatchers is hand made collection of regexp used by the function `enry.IsTest`
|
||||
// to identify test files in different languages.
|
||||
var TestMatchers = []regex.EnryRegexp{
|
||||
regex.MustCompile(`(^|/)tests/.*Test\.php$`),
|
||||
regex.MustCompile(`(^|/)test/.*Test(s?)\.java$`),
|
||||
regex.MustCompile(`(^|/)test(/|/.*/)Test.*\.java$`),
|
||||
regex.MustCompile(`(^|/)test/.*(Test(s?)|Spec(s?))\.scala$`),
|
||||
regex.MustCompile(`(^|/)test_.*\.py$`),
|
||||
regex.MustCompile(`(^|/).*_test\.go$`),
|
||||
regex.MustCompile(`(^|/).*_(test|spec)\.rb$`),
|
||||
regex.MustCompile(`(^|/).*Test(s?)\.cs$`),
|
||||
regex.MustCompile(`(^|/).*\.(test|spec)\.(ts|tsx|js)$`),
|
||||
}
|
|
@ -3,167 +3,167 @@
|
|||
|
||||
package data
|
||||
|
||||
import "gopkg.in/toqueteos/substring.v1"
|
||||
import "github.com/go-enry/go-enry/v2/regex"
|
||||
|
||||
var VendorMatchers = substring.Or(
|
||||
substring.Regexp(`(^|/)cache/`),
|
||||
substring.Regexp(`^[Dd]ependencies/`),
|
||||
substring.Regexp(`(^|/)dist/`),
|
||||
substring.Regexp(`^deps/`),
|
||||
substring.Regexp(`(^|/)configure$`),
|
||||
substring.Regexp(`(^|/)config.guess$`),
|
||||
substring.Regexp(`(^|/)config.sub$`),
|
||||
substring.Regexp(`(^|/)aclocal.m4`),
|
||||
substring.Regexp(`(^|/)libtool.m4`),
|
||||
substring.Regexp(`(^|/)ltoptions.m4`),
|
||||
substring.Regexp(`(^|/)ltsugar.m4`),
|
||||
substring.Regexp(`(^|/)ltversion.m4`),
|
||||
substring.Regexp(`(^|/)lt~obsolete.m4`),
|
||||
substring.Regexp(`dotnet-install\.(ps1|sh)$`),
|
||||
substring.Regexp(`cpplint.py`),
|
||||
substring.Regexp(`node_modules/`),
|
||||
substring.Regexp(`(^|/)\.yarn/releases/`),
|
||||
substring.Regexp(`(^|/)_esy$`),
|
||||
substring.Regexp(`bower_components/`),
|
||||
substring.Regexp(`^rebar$`),
|
||||
substring.Regexp(`erlang.mk`),
|
||||
substring.Regexp(`Godeps/_workspace/`),
|
||||
substring.Regexp(`(^|/)testdata/`),
|
||||
substring.Regexp(`.indent.pro`),
|
||||
substring.Regexp(`(\.|-)min\.(js|css)$`),
|
||||
substring.Regexp(`([^\s]*)import\.(css|less|scss|styl)$`),
|
||||
substring.Regexp(`(^|/)bootstrap([^.]*)\.(js|css|less|scss|styl)$`),
|
||||
substring.Regexp(`(^|/)custom\.bootstrap([^\s]*)(js|css|less|scss|styl)$`),
|
||||
substring.Regexp(`(^|/)font-?awesome\.(css|less|scss|styl)$`),
|
||||
substring.Regexp(`(^|/)font-?awesome/.*\.(css|less|scss|styl)$`),
|
||||
substring.Regexp(`(^|/)foundation\.(css|less|scss|styl)$`),
|
||||
substring.Regexp(`(^|/)normalize\.(css|less|scss|styl)$`),
|
||||
substring.Regexp(`(^|/)skeleton\.(css|less|scss|styl)$`),
|
||||
substring.Regexp(`(^|/)[Bb]ourbon/.*\.(css|less|scss|styl)$`),
|
||||
substring.Regexp(`(^|/)animate\.(css|less|scss|styl)$`),
|
||||
substring.Regexp(`(^|/)materialize\.(css|less|scss|styl|js)$`),
|
||||
substring.Regexp(`(^|/)select2/.*\.(css|scss|js)$`),
|
||||
substring.Regexp(`(^|/)bulma\.(css|sass|scss)$`),
|
||||
substring.Regexp(`(3rd|[Tt]hird)[-_]?[Pp]arty/`),
|
||||
substring.Regexp(`vendors?/`),
|
||||
substring.Regexp(`extern(al)?/`),
|
||||
substring.Regexp(`(^|/)[Vv]+endor/`),
|
||||
substring.Regexp(`^debian/`),
|
||||
substring.Regexp(`run.n$`),
|
||||
substring.Regexp(`bootstrap-datepicker/`),
|
||||
substring.Regexp(`(^|/)jquery([^.]*)\.js$`),
|
||||
substring.Regexp(`(^|/)jquery\-\d\.\d+(\.\d+)?\.js$`),
|
||||
substring.Regexp(`(^|/)jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?\.(js|css)$`),
|
||||
substring.Regexp(`(^|/)jquery\.(ui|effects)\.([^.]*)\.(js|css)$`),
|
||||
substring.Regexp(`jquery.fn.gantt.js`),
|
||||
substring.Regexp(`jquery.fancybox.(js|css)`),
|
||||
substring.Regexp(`fuelux.js`),
|
||||
substring.Regexp(`(^|/)jquery\.fileupload(-\w+)?\.js$`),
|
||||
substring.Regexp(`jquery.dataTables.js`),
|
||||
substring.Regexp(`bootbox.js`),
|
||||
substring.Regexp(`pdf.worker.js`),
|
||||
substring.Regexp(`(^|/)slick\.\w+.js$`),
|
||||
substring.Regexp(`(^|/)Leaflet\.Coordinates-\d+\.\d+\.\d+\.src\.js$`),
|
||||
substring.Regexp(`leaflet.draw-src.js`),
|
||||
substring.Regexp(`leaflet.draw.css`),
|
||||
substring.Regexp(`Control.FullScreen.css`),
|
||||
substring.Regexp(`Control.FullScreen.js`),
|
||||
substring.Regexp(`leaflet.spin.js`),
|
||||
substring.Regexp(`wicket-leaflet.js`),
|
||||
substring.Regexp(`.sublime-project`),
|
||||
substring.Regexp(`.sublime-workspace`),
|
||||
substring.Regexp(`.vscode`),
|
||||
substring.Regexp(`(^|/)prototype(.*)\.js$`),
|
||||
substring.Regexp(`(^|/)effects\.js$`),
|
||||
substring.Regexp(`(^|/)controls\.js$`),
|
||||
substring.Regexp(`(^|/)dragdrop\.js$`),
|
||||
substring.Regexp(`(.*?)\.d\.ts$`),
|
||||
substring.Regexp(`(^|/)mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$`),
|
||||
substring.Regexp(`(^|/)dojo\.js$`),
|
||||
substring.Regexp(`(^|/)MochiKit\.js$`),
|
||||
substring.Regexp(`(^|/)yahoo-([^.]*)\.js$`),
|
||||
substring.Regexp(`(^|/)yui([^.]*)\.js$`),
|
||||
substring.Regexp(`(^|/)ckeditor\.js$`),
|
||||
substring.Regexp(`(^|/)tiny_mce([^.]*)\.js$`),
|
||||
substring.Regexp(`(^|/)tiny_mce/(langs|plugins|themes|utils)`),
|
||||
substring.Regexp(`(^|/)ace-builds/`),
|
||||
substring.Regexp(`(^|/)fontello(.*?)\.css$`),
|
||||
substring.Regexp(`(^|/)MathJax/`),
|
||||
substring.Regexp(`(^|/)Chart\.js$`),
|
||||
substring.Regexp(`(^|/)[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo)`),
|
||||
substring.Regexp(`(^|/)shBrush([^.]*)\.js$`),
|
||||
substring.Regexp(`(^|/)shCore\.js$`),
|
||||
substring.Regexp(`(^|/)shLegacy\.js$`),
|
||||
substring.Regexp(`(^|/)angular([^.]*)\.js$`),
|
||||
substring.Regexp(`(^|\/)d3(\.v\d+)?([^.]*)\.js$`),
|
||||
substring.Regexp(`(^|/)react(-[^.]*)?\.js$`),
|
||||
substring.Regexp(`(^|/)flow-typed/.*\.js$`),
|
||||
substring.Regexp(`(^|/)modernizr\-\d\.\d+(\.\d+)?\.js$`),
|
||||
substring.Regexp(`(^|/)modernizr\.custom\.\d+\.js$`),
|
||||
substring.Regexp(`(^|/)knockout-(\d+\.){3}(debug\.)?js$`),
|
||||
substring.Regexp(`(^|/)docs?/_?(build|themes?|templates?|static)/`),
|
||||
substring.Regexp(`(^|/)admin_media/`),
|
||||
substring.Regexp(`(^|/)env/`),
|
||||
substring.Regexp(`^fabfile\.py$`),
|
||||
substring.Regexp(`^waf$`),
|
||||
substring.Regexp(`^.osx$`),
|
||||
substring.Regexp(`\.xctemplate/`),
|
||||
substring.Regexp(`\.imageset/`),
|
||||
substring.Regexp(`(^|/)Carthage/`),
|
||||
substring.Regexp(`(^|/)Sparkle/`),
|
||||
substring.Regexp(`Crashlytics.framework/`),
|
||||
substring.Regexp(`Fabric.framework/`),
|
||||
substring.Regexp(`BuddyBuildSDK.framework/`),
|
||||
substring.Regexp(`Realm.framework`),
|
||||
substring.Regexp(`RealmSwift.framework`),
|
||||
substring.Regexp(`gitattributes$`),
|
||||
substring.Regexp(`gitignore$`),
|
||||
substring.Regexp(`gitmodules$`),
|
||||
substring.Regexp(`(^|/)gradlew$`),
|
||||
substring.Regexp(`(^|/)gradlew\.bat$`),
|
||||
substring.Regexp(`(^|/)gradle/wrapper/`),
|
||||
substring.Regexp(`(^|/)mvnw$`),
|
||||
substring.Regexp(`(^|/)mvnw\.cmd$`),
|
||||
substring.Regexp(`(^|/)\.mvn/wrapper/`),
|
||||
substring.Regexp(`-vsdoc\.js$`),
|
||||
substring.Regexp(`\.intellisense\.js$`),
|
||||
substring.Regexp(`(^|/)jquery([^.]*)\.validate(\.unobtrusive)?\.js$`),
|
||||
substring.Regexp(`(^|/)jquery([^.]*)\.unobtrusive\-ajax\.js$`),
|
||||
substring.Regexp(`(^|/)[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$`),
|
||||
substring.Regexp(`^[Pp]ackages\/.+\.\d+\/`),
|
||||
substring.Regexp(`(^|/)extjs/.*?\.js$`),
|
||||
substring.Regexp(`(^|/)extjs/.*?\.xml$`),
|
||||
substring.Regexp(`(^|/)extjs/.*?\.txt$`),
|
||||
substring.Regexp(`(^|/)extjs/.*?\.html$`),
|
||||
substring.Regexp(`(^|/)extjs/.*?\.properties$`),
|
||||
substring.Regexp(`(^|/)extjs/.sencha/`),
|
||||
substring.Regexp(`(^|/)extjs/docs/`),
|
||||
substring.Regexp(`(^|/)extjs/builds/`),
|
||||
substring.Regexp(`(^|/)extjs/cmd/`),
|
||||
substring.Regexp(`(^|/)extjs/examples/`),
|
||||
substring.Regexp(`(^|/)extjs/locale/`),
|
||||
substring.Regexp(`(^|/)extjs/packages/`),
|
||||
substring.Regexp(`(^|/)extjs/plugins/`),
|
||||
substring.Regexp(`(^|/)extjs/resources/`),
|
||||
substring.Regexp(`(^|/)extjs/src/`),
|
||||
substring.Regexp(`(^|/)extjs/welcome/`),
|
||||
substring.Regexp(`(^|/)html5shiv\.js$`),
|
||||
substring.Regexp(`^[Tt]ests?/fixtures/`),
|
||||
substring.Regexp(`^[Ss]pecs?/fixtures/`),
|
||||
substring.Regexp(`(^|/)cordova([^.]*)\.js$`),
|
||||
substring.Regexp(`(^|/)cordova\-\d\.\d(\.\d)?\.js$`),
|
||||
substring.Regexp(`foundation(\..*)?\.js$`),
|
||||
substring.Regexp(`^Vagrantfile$`),
|
||||
substring.Regexp(`.[Dd][Ss]_[Ss]tore$`),
|
||||
substring.Regexp(`^vignettes/`),
|
||||
substring.Regexp(`^inst/extdata/`),
|
||||
substring.Regexp(`octicons.css`),
|
||||
substring.Regexp(`sprockets-octicons.scss`),
|
||||
substring.Regexp(`(^|/)activator$`),
|
||||
substring.Regexp(`(^|/)activator\.bat$`),
|
||||
substring.Regexp(`proguard.pro`),
|
||||
substring.Regexp(`proguard-rules.pro`),
|
||||
substring.Regexp(`^puphpet/`),
|
||||