// Copyright 2019 The Gitea Authors. All rights reserved. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. package mdstripper import ( "bytes" "io" "github.com/russross/blackfriday/v2" ) // MarkdownStripper extends blackfriday.Renderer type MarkdownStripper struct { links []string coallesce bool empty bool } const ( blackfridayExtensions = 0 | blackfriday.NoIntraEmphasis | blackfriday.Tables | blackfriday.FencedCode | blackfriday.Strikethrough | blackfriday.NoEmptyLineBeforeBlock | blackfriday.DefinitionLists | blackfriday.Footnotes | blackfriday.HeadingIDs | blackfriday.AutoHeadingIDs | // Not included in modules/markup/markdown/markdown.go; // required here to process inline links blackfriday.Autolink ) // StripMarkdown parses markdown content by removing all markup and code blocks // in order to extract links and other references func StripMarkdown(rawBytes []byte) (string, []string) { buf, links := StripMarkdownBytes(rawBytes) return string(buf), links } // StripMarkdownBytes parses markdown content by removing all markup and code blocks // in order to extract links and other references func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) { stripper := &MarkdownStripper{ links: make([]string, 0, 10), empty: true, } parser := blackfriday.New(blackfriday.WithRenderer(stripper), blackfriday.WithExtensions(blackfridayExtensions)) ast := parser.Parse(rawBytes) var buf bytes.Buffer stripper.RenderHeader(&buf, ast) ast.Walk(func(node *blackfriday.Node, entering bool) blackfriday.WalkStatus { return stripper.RenderNode(&buf, node, entering) }) stripper.RenderFooter(&buf, ast) return buf.Bytes(), stripper.GetLinks() } // RenderNode is the main rendering method. It will be called once for // every leaf node and twice for every non-leaf node (first with // entering=true, then with entering=false). The method should write its // rendition of the node to the supplied writer w. func (r *MarkdownStripper) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus { if !entering { return blackfriday.GoToNext } switch node.Type { case blackfriday.Text: r.processString(w, node.Literal, node.Parent == nil) return blackfriday.GoToNext case blackfriday.Link: r.processLink(w, node.LinkData.Destination) r.coallesce = false return blackfriday.SkipChildren } r.coallesce = false return blackfriday.GoToNext } // RenderHeader is a method that allows the renderer to produce some // content preceding the main body of the output document. func (r *MarkdownStripper) RenderHeader(w io.Writer, ast *blackfriday.Node) { } // RenderFooter is a symmetric counterpart of RenderHeader. func (r *MarkdownStripper) RenderFooter(w io.Writer, ast *blackfriday.Node) { } func (r *MarkdownStripper) doubleSpace(w io.Writer) { if !r.empty { _, _ = w.Write([]byte{'\n'}) } } func (r *MarkdownStripper) processString(w io.Writer, text []byte, coallesce bool) { // Always break-up words if !coallesce || !r.coallesce { r.doubleSpace(w) } _, _ = w.Write(text) r.coallesce = coallesce r.empty = false } func (r *MarkdownStripper) processLink(w io.Writer, link []byte) { // Links are processed out of band r.links = append(r.links, string(link)) r.coallesce = false } // GetLinks returns the list of link data collected while parsing func (r *MarkdownStripper) GetLinks() []string { return r.links }