diff --git a/modules/markup/html.go b/modules/markup/html.go index fc823b1f30..1ff7a41cbb 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -92,6 +92,32 @@ func getIssueFullPattern() *regexp.Regexp { return issueFullPattern } +// CustomLinkURLSchemes allows for additional schemes to be detected when parsing links within text +func CustomLinkURLSchemes(schemes []string) { + schemes = append(schemes, "http", "https") + withAuth := make([]string, 0, len(schemes)) + validScheme := regexp.MustCompile(`^[a-z]+$`) + for _, s := range schemes { + if !validScheme.MatchString(s) { + continue + } + without := false + for _, sna := range xurls.SchemesNoAuthority { + if s == sna { + without = true + break + } + } + if without { + s += ":" + } else { + s += "://" + } + withAuth = append(withAuth, s) + } + linkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|")) +} + // IsSameDomain checks if given url string has the same hostname as current Gitea instance func IsSameDomain(s string) bool { if strings.HasPrefix(s, "/") { diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go index 66e56f71a7..91ef320b40 100644 --- a/modules/markup/html_test.go +++ b/modules/markup/html_test.go @@ -89,6 +89,11 @@ func TestRender_links(t *testing.T) { } // Text that should be turned into URL + defaultCustom := setting.Markdown.CustomURLSchemes + setting.Markdown.CustomURLSchemes = []string{"ftp", "magnet"} + ReplaceSanitizer() + CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes) + test( "https://www.example.com", `

https://www.example.com

`) @@ -131,6 +136,12 @@ func TestRender_links(t *testing.T) { test( "https://username:password@gitea.com", `

https://username:password@gitea.com

`) + test( + "ftp://gitea.com/file.txt", + `

ftp://gitea.com/file.txt

`) + test( + "magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download", + `

magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download

`) // Test that should *not* be turned into URL test( @@ -154,6 +165,14 @@ func TestRender_links(t *testing.T) { test( "www", `

www

`) + test( + "ftps://gitea.com", + `

ftps://gitea.com

`) + + // Restore previous settings + setting.Markdown.CustomURLSchemes = defaultCustom + ReplaceSanitizer() + CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes) } func TestRender_email(t *testing.T) { diff --git a/modules/markup/markup.go b/modules/markup/markup.go index dc43b533c0..008b21ab97 100644 --- a/modules/markup/markup.go +++ b/modules/markup/markup.go @@ -9,12 +9,16 @@ import ( "strings" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" ) // Init initialize regexps for markdown parsing func Init() { getIssueFullPattern() NewSanitizer() + if len(setting.Markdown.CustomURLSchemes) > 0 { + CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes) + } // since setting maybe changed extensions, this will reload all parser extensions mapping extParsers = make(map[string]Parser) diff --git a/modules/markup/sanitizer.go b/modules/markup/sanitizer.go index fd6f90b2ab..f873e8105e 100644 --- a/modules/markup/sanitizer.go +++ b/modules/markup/sanitizer.go @@ -28,22 +28,28 @@ var sanitizer = &Sanitizer{} // entire application lifecycle. func NewSanitizer() { sanitizer.init.Do(func() { - sanitizer.policy = bluemonday.UGCPolicy() - // We only want to allow HighlightJS specific classes for code blocks - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^language-\w+$`)).OnElements("code") - - // Checkboxes - sanitizer.policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") - sanitizer.policy.AllowAttrs("checked", "disabled").OnElements("input") - - // Custom URL-Schemes - sanitizer.policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...) - - // Allow keyword markup - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^` + keywordClass + `$`)).OnElements("span") + ReplaceSanitizer() }) } +// ReplaceSanitizer replaces the current sanitizer to account for changes in settings +func ReplaceSanitizer() { + sanitizer = &Sanitizer{} + sanitizer.policy = bluemonday.UGCPolicy() + // We only want to allow HighlightJS specific classes for code blocks + sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^language-\w+$`)).OnElements("code") + + // Checkboxes + sanitizer.policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") + sanitizer.policy.AllowAttrs("checked", "disabled").OnElements("input") + + // Custom URL-Schemes + sanitizer.policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...) + + // Allow keyword markup + sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^` + keywordClass + `$`)).OnElements("span") +} + // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. func Sanitize(s string) string { NewSanitizer()