Rework file highlight rendering and fix yaml copy-paste (#19967)

* Rework file highlight rendering and fix yaml copy-paste

* use Split+Trim to replace tag parser

* remove unnecessary bytes.Count

* remove newLineInHTML = "
"

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
silverwind 2022-07-30 21:17:43 +02:00 committed by GitHub
parent 0e61a74e5a
commit ae3dde1c87
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 179 additions and 145 deletions

View File

@ -10,6 +10,7 @@ import (
"bytes" "bytes"
"fmt" "fmt"
gohtml "html" gohtml "html"
"io"
"path/filepath" "path/filepath"
"strings" "strings"
"sync" "sync"
@ -26,7 +27,7 @@ import (
) )
// don't index files larger than this many bytes for performance purposes // don't index files larger than this many bytes for performance purposes
const sizeLimit = 1000000 const sizeLimit = 1024 * 1024
var ( var (
// For custom user mapping // For custom user mapping
@ -46,7 +47,6 @@ func NewContext() {
highlightMapping[keys[i].Name()] = keys[i].Value() highlightMapping[keys[i].Name()] = keys[i].Value()
} }
} }
// The size 512 is simply a conservative rule of thumb // The size 512 is simply a conservative rule of thumb
c, err := lru.New2Q(512) c, err := lru.New2Q(512)
if err != nil { if err != nil {
@ -60,7 +60,7 @@ func NewContext() {
func Code(fileName, language, code string) string { func Code(fileName, language, code string) string {
NewContext() NewContext()
// diff view newline will be passed as empty, change to literal \n so it can be copied // diff view newline will be passed as empty, change to literal '\n' so it can be copied
// preserve literal newline in blame view // preserve literal newline in blame view
if code == "" || code == "\n" { if code == "" || code == "\n" {
return "\n" return "\n"
@ -128,36 +128,32 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string {
return code return code
} }
htmlw.Flush() _ = htmlw.Flush()
// Chroma will add newlines for certain lexers in order to highlight them properly // Chroma will add newlines for certain lexers in order to highlight them properly
// Once highlighted, strip them here so they don't cause copy/paste trouble in HTML output // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
return strings.TrimSuffix(htmlbuf.String(), "\n") return strings.TrimSuffix(htmlbuf.String(), "\n")
} }
// File returns a slice of chroma syntax highlighted lines of code // File returns a slice of chroma syntax highlighted HTML lines of code
func File(numLines int, fileName, language string, code []byte) []string { func File(fileName, language string, code []byte) ([]string, error) {
NewContext() NewContext()
if len(code) > sizeLimit { if len(code) > sizeLimit {
return plainText(string(code), numLines) return PlainText(code), nil
} }
formatter := html.New(html.WithClasses(true), formatter := html.New(html.WithClasses(true),
html.WithLineNumbers(false), html.WithLineNumbers(false),
html.PreventSurroundingPre(true), html.PreventSurroundingPre(true),
) )
if formatter == nil { htmlBuf := bytes.Buffer{}
log.Error("Couldn't create chroma formatter") htmlWriter := bufio.NewWriter(&htmlBuf)
return plainText(string(code), numLines)
}
htmlbuf := bytes.Buffer{}
htmlw := bufio.NewWriter(&htmlbuf)
var lexer chroma.Lexer var lexer chroma.Lexer
// provided language overrides everything // provided language overrides everything
if len(language) > 0 { if language != "" {
lexer = lexers.Get(language) lexer = lexers.Get(language)
} }
@ -168,9 +164,9 @@ func File(numLines int, fileName, language string, code []byte) []string {
} }
if lexer == nil { if lexer == nil {
language := analyze.GetCodeLanguage(fileName, code) guessLanguage := analyze.GetCodeLanguage(fileName, code)
lexer = lexers.Get(language) lexer = lexers.Get(guessLanguage)
if lexer == nil { if lexer == nil {
lexer = lexers.Match(fileName) lexer = lexers.Match(fileName)
if lexer == nil { if lexer == nil {
@ -181,54 +177,43 @@ func File(numLines int, fileName, language string, code []byte) []string {
iterator, err := lexer.Tokenise(nil, string(code)) iterator, err := lexer.Tokenise(nil, string(code))
if err != nil { if err != nil {
log.Error("Can't tokenize code: %v", err) return nil, fmt.Errorf("can't tokenize code: %w", err)
return plainText(string(code), numLines)
} }
err = formatter.Format(htmlw, styles.GitHub, iterator) err = formatter.Format(htmlWriter, styles.GitHub, iterator)
if err != nil { if err != nil {
log.Error("Can't format code: %v", err) return nil, fmt.Errorf("can't format code: %w", err)
return plainText(string(code), numLines)
} }
htmlw.Flush() _ = htmlWriter.Flush()
finalNewLine := false
if len(code) > 0 {
finalNewLine = code[len(code)-1] == '\n'
}
m := make([]string, 0, numLines) // at the moment, Chroma generates stable output `<span class="line"><span class="cl">...\n</span></span>` for each line
for _, v := range strings.SplitN(htmlbuf.String(), "\n", numLines) { htmlStr := htmlBuf.String()
content := v lines := strings.Split(htmlStr, `<span class="line"><span class="cl">`)
// need to keep lines that are only \n so copy/paste works properly in browser m := make([]string, 0, len(lines))
if content == "" { for i := 1; i < len(lines); i++ {
content = "\n" line := lines[i]
} else if content == `</span><span class="w">` { line = strings.TrimSuffix(line, "</span></span>")
content += "\n</span>" m = append(m, line)
} else if content == `</span></span><span class="line"><span class="cl">` {
content += "\n"
}
content = strings.TrimSuffix(content, `<span class="w">`)
content = strings.TrimPrefix(content, `</span>`)
m = append(m, content)
} }
if finalNewLine { return m, nil
m = append(m, "<span class=\"w\">\n</span>")
}
return m
} }
// return unhiglighted map // PlainText returns non-highlighted HTML for code
func plainText(code string, numLines int) []string { func PlainText(code []byte) []string {
m := make([]string, 0, numLines) r := bufio.NewReader(bytes.NewReader(code))
for _, v := range strings.SplitN(code, "\n", numLines) { m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1)
content := v for {
// need to keep lines that are only \n so copy/paste works properly in browser content, err := r.ReadString('\n')
if content == "" { if err != nil && err != io.EOF {
content = "\n" log.Error("failed to read string from buffer: %v", err)
break
} }
m = append(m, gohtml.EscapeString(content)) if content == "" && err == io.EOF {
break
}
s := gohtml.EscapeString(content)
m = append(m, s)
} }
return m return m
} }

View File

@ -8,97 +8,146 @@ import (
"strings" "strings"
"testing" "testing"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"gopkg.in/ini.v1"
) )
func lines(s string) []string {
return strings.Split(strings.ReplaceAll(strings.TrimSpace(s), `\n`, "\n"), "\n")
}
func TestFile(t *testing.T) { func TestFile(t *testing.T) {
setting.Cfg = ini.Empty()
tests := []struct { tests := []struct {
name string name string
numLines int code string
fileName string want []string
code string
want string
}{ }{
{ {
name: ".drone.yml", name: "empty.py",
numLines: 12, code: "",
fileName: ".drone.yml", want: lines(""),
code: util.Dedent(`
kind: pipeline
name: default
steps:
- name: test
image: golang:1.13
environment:
GOPROXY: https://goproxy.cn
commands:
- go get -u
- go build -v
- go test -v -race -coverprofile=coverage.txt -covermode=atomic
`),
want: util.Dedent(`
<span class="line"><span class="cl"><span class="nt">kind</span><span class="p">:</span><span class="w"> </span><span class="l">pipeline</span>
</span></span><span class="line"><span class="cl"><span class="w"></span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l">default</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="w"></span><span class="nt">steps</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"><span class="w"></span>- <span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l">test</span>
</span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">image</span><span class="p">:</span><span class="w"> </span><span class="l">golang:1.13</span>
</span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">environment</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"><span class="w"></span><span class="w"> </span><span class="nt">GOPROXY</span><span class="p">:</span><span class="w"> </span><span class="l">https://goproxy.cn</span>
</span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">commands</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"><span class="w"></span><span class="w"> </span>- <span class="l">go get -u</span>
</span></span><span class="line"><span class="cl"><span class="w"> </span>- <span class="l">go build -v</span>
</span></span><span class="line"><span class="cl"><span class="w"> </span>- <span class="l">go test -v -race -coverprofile=coverage.txt -covermode=atomic</span></span></span>
`),
}, },
{ {
name: ".drone.yml - trailing space", name: "tags.txt",
numLines: 13, code: "<>",
fileName: ".drone.yml", want: lines("&lt;&gt;"),
code: strings.Replace(util.Dedent(` },
kind: pipeline {
name: default name: "tags.py",
code: "<>",
want: lines(`<span class="o">&lt;</span><span class="o">&gt;</span>`),
},
{
name: "eol-no.py",
code: "a=1",
want: lines(`<span class="n">a</span><span class="o">=</span><span class="mi">1</span>`),
},
{
name: "eol-newline1.py",
code: "a=1\n",
want: lines(`<span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n`),
},
{
name: "eol-newline2.py",
code: "a=1\n\n",
want: lines(`
<span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n
\n
`,
),
},
{
name: "empty-line-with-space.py",
code: strings.ReplaceAll(strings.TrimSpace(`
def:
a=1
steps: b=''
- name: test {space}
image: golang:1.13 c=2
environment: `), "{space}", " "),
GOPROXY: https://goproxy.cn want: lines(`
commands: <span class="n">def</span><span class="p">:</span>\n
- go get -u <span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n
- go build -v \n
- go test -v -race -coverprofile=coverage.txt -covermode=atomic <span class="n">b</span><span class="o">=</span><span class="sa"></span><span class="s1">&#39;</span><span class="s1">&#39;</span>\n
`)+"\n", "name: default", "name: default ", 1), \n
want: util.Dedent(` <span class="n">c</span><span class="o">=</span><span class="mi">2</span>`,
<span class="line"><span class="cl"><span class="nt">kind</span><span class="p">:</span><span class="w"> </span><span class="l">pipeline</span> ),
</span></span><span class="line"><span class="cl"><span class="w"></span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l">default </span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="w"></span><span class="nt">steps</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"><span class="w"></span>- <span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l">test</span>
</span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">image</span><span class="p">:</span><span class="w"> </span><span class="l">golang:1.13</span>
</span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">environment</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"><span class="w"></span><span class="w"> </span><span class="nt">GOPROXY</span><span class="p">:</span><span class="w"> </span><span class="l">https://goproxy.cn</span>
</span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">commands</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"><span class="w"></span><span class="w"> </span>- <span class="l">go get -u</span>
</span></span><span class="line"><span class="cl"><span class="w"> </span>- <span class="l">go build -v</span>
</span></span><span class="line"><span class="cl"><span class="w"> </span>- <span class="l">go test -v -race -coverprofile=coverage.txt -covermode=atomic</span>
</span></span>
<span class="w">
</span>
`),
}, },
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
got := strings.Join(File(tt.numLines, tt.fileName, "", []byte(tt.code)), "\n") out, err := File(tt.name, "", []byte(tt.code))
assert.Equal(t, tt.want, got) assert.NoError(t, err)
expected := strings.Join(tt.want, "\n")
actual := strings.Join(out, "\n")
assert.Equal(t, strings.Count(actual, "<span"), strings.Count(actual, "</span>"))
assert.EqualValues(t, expected, actual)
})
}
}
func TestPlainText(t *testing.T) {
tests := []struct {
name string
code string
want []string
}{
{
name: "empty.py",
code: "",
want: lines(""),
},
{
name: "tags.py",
code: "<>",
want: lines("&lt;&gt;"),
},
{
name: "eol-no.py",
code: "a=1",
want: lines(`a=1`),
},
{
name: "eol-newline1.py",
code: "a=1\n",
want: lines(`a=1\n`),
},
{
name: "eol-newline2.py",
code: "a=1\n\n",
want: lines(`
a=1\n
\n
`),
},
{
name: "empty-line-with-space.py",
code: strings.ReplaceAll(strings.TrimSpace(`
def:
a=1
b=''
{space}
c=2
`), "{space}", " "),
want: lines(`
def:\n
a=1\n
\n
b=&#39;&#39;\n
\n
c=2`),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
out := PlainText([]byte(tt.code))
expected := strings.Join(tt.want, "\n")
actual := strings.Join(out, "\n")
assert.EqualValues(t, expected, actual)
}) })
} }
} }

View File

@ -15,7 +15,6 @@ import (
"net/http" "net/http"
"net/url" "net/url"
"path" "path"
"strconv"
"strings" "strings"
"time" "time"
@ -57,15 +56,6 @@ type namedBlob struct {
blob *git.Blob blob *git.Blob
} }
func linesBytesCount(s []byte) int {
nl := []byte{'\n'}
n := bytes.Count(s, nl)
if len(s) > 0 && !bytes.HasSuffix(s, nl) {
n++
}
return n
}
// FIXME: There has to be a more efficient way of doing this // FIXME: There has to be a more efficient way of doing this
func getReadmeFileFromPath(commit *git.Commit, treePath string) (*namedBlob, error) { func getReadmeFileFromPath(commit *git.Commit, treePath string) (*namedBlob, error) {
tree, err := commit.SubTree(treePath) tree, err := commit.SubTree(treePath)
@ -555,8 +545,14 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
) )
} else { } else {
buf, _ := io.ReadAll(rd) buf, _ := io.ReadAll(rd)
lineNums := linesBytesCount(buf)
ctx.Data["NumLines"] = strconv.Itoa(lineNums) // empty: 0 lines; "a": one line; "a\n": two lines; "a\nb": two lines;
// the NumLines is only used for the display on the UI: "xxx lines"
if len(buf) == 0 {
ctx.Data["NumLines"] = 0
} else {
ctx.Data["NumLines"] = bytes.Count(buf, []byte{'\n'}) + 1
}
ctx.Data["NumLinesSet"] = true ctx.Data["NumLinesSet"] = true
language := "" language := ""
@ -584,7 +580,11 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
language = "" language = ""
} }
} }
fileContent := highlight.File(lineNums, blob.Name(), language, buf) fileContent, err := highlight.File(blob.Name(), language, buf)
if err != nil {
log.Error("highlight.File failed, fallback to plain text: %v", err)
fileContent = highlight.PlainText(buf)
}
status, _ := charset.EscapeControlReader(bytes.NewReader(buf), io.Discard) status, _ := charset.EscapeControlReader(bytes.NewReader(buf), io.Discard)
ctx.Data["EscapeStatus"] = status ctx.Data["EscapeStatus"] = status
statuses := make([]charset.EscapeStatus, len(fileContent)) statuses := make([]charset.EscapeStatus, len(fileContent))