@ -6,43 +6,39 @@ package mdstripper
import (
"bytes"
"io"
"github.com/russross/blackfriday"
"github.com/russross/blackfriday/v2 "
)
// MarkdownStripper extends blackfriday.Renderer
type MarkdownStripper struct {
blackfriday . Renderer
links [ ] string
coallesce bool
empty bool
}
const (
blackfridayExtensions = 0 |
blackfriday . EXTENSION_NO_INTRA_EMPHASIS |
blackfriday . EXTENSION_TABLES |
blackfriday . EXTENSION_FENCED_CODE |
blackfriday . EXTENSION_STRIKETHROUGH |
blackfriday . EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK |
blackfriday . EXTENSION_DEFINITION_LISTS |
blackfriday . EXTENSION_FOOTNOTES |
blackfriday . EXTENSION_HEADER_IDS |
blackfriday . EXTENSION_AUTO_HEADER_IDS |
blackfriday . NoIntraEmphasis |
blackfriday . Tables |
blackfriday . FencedCode |
blackfriday . Strikethrough |
blackfriday . NoEmptyLineBeforeBlock |
blackfriday . DefinitionLists |
blackfriday . Footnotes |
blackfriday . HeadingIDs |
blackfriday . AutoHeadingIDs |
// Not included in modules/markup/markdown/markdown.go;
// required here to process inline links
blackfriday . EXTENSION_AUTOLINK
blackfriday . Autolink
)
//revive:disable:var-naming Implementing the Rendering interface requires breaking some linting rules
// StripMarkdown parses markdown content by removing all markup and code blocks
// in order to extract links and other references
func StripMarkdown ( rawBytes [ ] byte ) ( string , [ ] string ) {
stripper := & MarkdownStripper {
links : make ( [ ] string , 0 , 10 ) ,
}
body := blackfriday . Markdown ( rawBytes , stripper , blackfridayExtensions )
return string ( body ) , stripper . GetLinks ( )
buf , links := StripMarkdownBytes ( rawBytes )
return string ( buf ) , links
}
// StripMarkdownBytes parses markdown content by removing all markup and code blocks
@ -50,205 +46,67 @@ func StripMarkdown(rawBytes []byte) (string, []string) {
func StripMarkdownBytes ( rawBytes [ ] byte ) ( [ ] byte , [ ] string ) {
stripper := & MarkdownStripper {
links : make ( [ ] string , 0 , 10 ) ,
empty : true ,
}
body := blackfriday . Markdown ( rawBytes , stripper , blackfridayExtensions )
return body , stripper . GetLinks ( )
}
// block-level callbacks
// BlockCode dummy function to proceed with rendering
func ( r * MarkdownStripper ) BlockCode ( out * bytes . Buffer , text [ ] byte , infoString string ) {
// Not rendered
r . coallesce = false
}
// BlockQuote dummy function to proceed with rendering
func ( r * MarkdownStripper ) BlockQuote ( out * bytes . Buffer , text [ ] byte ) {
// FIXME: perhaps it's better to leave out block quote for this?
r . processString ( out , text , false )
}
// BlockHtml dummy function to proceed with rendering
func ( r * MarkdownStripper ) BlockHtml ( out * bytes . Buffer , text [ ] byte ) { //nolint
// Not rendered
r . coallesce = false
}
// Header dummy function to proceed with rendering
func ( r * MarkdownStripper ) Header ( out * bytes . Buffer , text func ( ) bool , level int , id string ) {
text ( )
r . coallesce = false
}
// HRule dummy function to proceed with rendering
func ( r * MarkdownStripper ) HRule ( out * bytes . Buffer ) {
// Not rendered
r . coallesce = false
}
// List dummy function to proceed with rendering
func ( r * MarkdownStripper ) List ( out * bytes . Buffer , text func ( ) bool , flags int ) {
text ( )
r . coallesce = false
}
// ListItem dummy function to proceed with rendering
func ( r * MarkdownStripper ) ListItem ( out * bytes . Buffer , text [ ] byte , flags int ) {
r . processString ( out , text , false )
}
// Paragraph dummy function to proceed with rendering
func ( r * MarkdownStripper ) Paragraph ( out * bytes . Buffer , text func ( ) bool ) {
text ( )
r . coallesce = false
}
// Table dummy function to proceed with rendering
func ( r * MarkdownStripper ) Table ( out * bytes . Buffer , header [ ] byte , body [ ] byte , columnData [ ] int ) {
r . processString ( out , header , false )
r . processString ( out , body , false )
}
// TableRow dummy function to proceed with rendering
func ( r * MarkdownStripper ) TableRow ( out * bytes . Buffer , text [ ] byte ) {
r . processString ( out , text , false )
}
// TableHeaderCell dummy function to proceed with rendering
func ( r * MarkdownStripper ) TableHeaderCell ( out * bytes . Buffer , text [ ] byte , flags int ) {
r . processString ( out , text , false )
}
// TableCell dummy function to proceed with rendering
func ( r * MarkdownStripper ) TableCell ( out * bytes . Buffer , text [ ] byte , flags int ) {
r . processString ( out , text , false )
}
// Footnotes dummy function to proceed with rendering
func ( r * MarkdownStripper ) Footnotes ( out * bytes . Buffer , text func ( ) bool ) {
text ( )
}
// FootnoteItem dummy function to proceed with rendering
func ( r * MarkdownStripper ) FootnoteItem ( out * bytes . Buffer , name , text [ ] byte , flags int ) {
r . processString ( out , text , false )
}
// TitleBlock dummy function to proceed with rendering
func ( r * MarkdownStripper ) TitleBlock ( out * bytes . Buffer , text [ ] byte ) {
r . processString ( out , text , false )
}
// Span-level callbacks
// AutoLink dummy function to proceed with rendering
func ( r * MarkdownStripper ) AutoLink ( out * bytes . Buffer , link [ ] byte , kind int ) {
r . processLink ( out , link , [ ] byte { } )
}
// CodeSpan dummy function to proceed with rendering
func ( r * MarkdownStripper ) CodeSpan ( out * bytes . Buffer , text [ ] byte ) {
// Not rendered
r . coallesce = false
}
// DoubleEmphasis dummy function to proceed with rendering
func ( r * MarkdownStripper ) DoubleEmphasis ( out * bytes . Buffer , text [ ] byte ) {
r . processString ( out , text , false )
}
// Emphasis dummy function to proceed with rendering
func ( r * MarkdownStripper ) Emphasis ( out * bytes . Buffer , text [ ] byte ) {
r . processString ( out , text , false )
}
// Image dummy function to proceed with rendering
func ( r * MarkdownStripper ) Image ( out * bytes . Buffer , link [ ] byte , title [ ] byte , alt [ ] byte ) {
// Not rendered
r . coallesce = false
}
// LineBreak dummy function to proceed with rendering
func ( r * MarkdownStripper ) LineBreak ( out * bytes . Buffer ) {
// Not rendered
r . coallesce = false
}
// Link dummy function to proceed with rendering
func ( r * MarkdownStripper ) Link ( out * bytes . Buffer , link [ ] byte , title [ ] byte , content [ ] byte ) {
r . processLink ( out , link , content )
}
// RawHtmlTag dummy function to proceed with rendering
func ( r * MarkdownStripper ) RawHtmlTag ( out * bytes . Buffer , tag [ ] byte ) { //nolint
// Not rendered
r . coallesce = false
}
// TripleEmphasis dummy function to proceed with rendering
func ( r * MarkdownStripper ) TripleEmphasis ( out * bytes . Buffer , text [ ] byte ) {
r . processString ( out , text , false )
}
// StrikeThrough dummy function to proceed with rendering
func ( r * MarkdownStripper ) StrikeThrough ( out * bytes . Buffer , text [ ] byte ) {
r . processString ( out , text , false )
}
// FootnoteRef dummy function to proceed with rendering
func ( r * MarkdownStripper ) FootnoteRef ( out * bytes . Buffer , ref [ ] byte , id int ) {
// Not rendered
r . coallesce = false
}
// Low-level callbacks
// Entity dummy function to proceed with rendering
func ( r * MarkdownStripper ) Entity ( out * bytes . Buffer , entity [ ] byte ) {
// FIXME: literal entities are not parsed; perhaps they should
r . coallesce = false
}
// NormalText dummy function to proceed with rendering
func ( r * MarkdownStripper ) NormalText ( out * bytes . Buffer , text [ ] byte ) {
r . processString ( out , text , true )
}
// Header and footer
// DocumentHeader dummy function to proceed with rendering
func ( r * MarkdownStripper ) DocumentHeader ( out * bytes . Buffer ) {
parser := blackfriday . New ( blackfriday . WithRenderer ( stripper ) , blackfriday . WithExtensions ( blackfridayExtensions ) )
ast := parser . Parse ( rawBytes )
var buf bytes . Buffer
stripper . RenderHeader ( & buf , ast )
ast . Walk ( func ( node * blackfriday . Node , entering bool ) blackfriday . WalkStatus {
return stripper . RenderNode ( & buf , node , entering )
} )
stripper . RenderFooter ( & buf , ast )
return buf . Bytes ( ) , stripper . GetLinks ( )
}
// RenderNode is the main rendering method. It will be called once for
// every leaf node and twice for every non-leaf node (first with
// entering=true, then with entering=false). The method should write its
// rendition of the node to the supplied writer w.
func ( r * MarkdownStripper ) RenderNode ( w io . Writer , node * blackfriday . Node , entering bool ) blackfriday . WalkStatus {
if ! entering {
return blackfriday . GoToNext
}
switch node . Type {
case blackfriday . Text :
r . processString ( w , node . Literal , node . Parent == nil )
return blackfriday . GoToNext
case blackfriday . Link :
r . processLink ( w , node . LinkData . Destination )
r . coallesce = false
return blackfriday . SkipChildren
}
r . coallesce = false
return blackfriday . GoToNext
}
// DocumentFooter dummy function to proceed with rendering
func ( r * MarkdownStripper ) DocumentFooter ( out * bytes . Buffer ) {
r . coallesce = false
// RenderHeader is a method that allows the renderer to produce some
// content preceding the main body of the output document.
func ( r * MarkdownStripper ) RenderHeader ( w io . Writer , ast * blackfriday . Node ) {
}
// GetFlags returns rendering flags
func ( r * MarkdownStripper ) GetFlags ( ) int {
return 0
// RenderFooter is a symmetric counterpart of RenderHeader.
func ( r * MarkdownStripper ) RenderFooter ( w io . Writer , ast * blackfriday . Node ) {
}
//revive:enable:var-naming
func doubleSpace ( out * bytes . Buffer ) {
if out . Len ( ) > 0 {
out . WriteByte ( '\n' )
func ( r * MarkdownStripper ) doubleSpace ( w io . Writer ) {
if ! r . empty {
_ , _ = w . Write ( [ ] byte { '\n' } )
}
}
func ( r * MarkdownStripper ) processString ( out * bytes . Buff er, text [ ] byte , coallesce bool ) {
func ( r * MarkdownStripper ) processString ( w io . Writer , text [ ] byte , coallesce bool ) {
// Always break-up words
if ! coallesce || ! r . coallesce {
doubleSpace ( out )
r . doubleSpace ( w )
}
out . Write ( text )
_ , _ = w . Write ( text )
r . coallesce = coallesce
r . empty = false
}
func ( r * MarkdownStripper ) processLink ( out * bytes . Buffer , link [ ] byte , content [ ] byte ) {
func ( r * MarkdownStripper ) processLink ( w io . Writer , link [ ] byte ) {
// Links are processed out of band
r . links = append ( r . links , string ( link ) )
r . coallesce = false