Blackfriday Markdown Processor Available at http://github.com/russross/blackfriday Copyright © 2011 Russ Ross <russ@russross.com>. Distributed under the Simplified BSD License. See README.md for details.

package blackfriday

import (
	
	
	
	
	
)
Markdown parsing and processing
Version string of the package. Appears in the rendered document when CompletePage flag is on.
const Version = "2.0"
Extensions is a bitwise or'ed collection of enabled Blackfriday's extensions.
These are the supported markdown parsing extensions. OR these values together to select multiple extensions.
const (
	NoExtensions           Extensions = 0
	NoIntraEmphasis        Extensions = 1 << iota // Ignore emphasis markers inside words
	Tables                                        // Render tables
	FencedCode                                    // Render fenced code blocks
	Autolink                                      // Detect embedded URLs that are not explicitly marked
	Strikethrough                                 // Strikethrough text using ~~test~~
	LaxHTMLBlocks                                 // Loosen up HTML block parsing rules
	SpaceHeadings                                 // Be strict about prefix heading rules
	HardLineBreak                                 // Translate newlines into line breaks
	TabSizeEight                                  // Expand tabs to eight spaces instead of four
	Footnotes                                     // Pandoc-style footnotes
	NoEmptyLineBeforeBlock                        // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
	HeadingIDs                                    // specify heading IDs  with {#id}
	Titleblock                                    // Titleblock ala pandoc
	AutoHeadingIDs                                // Create the heading ID from the text
	BackslashLineBreak                            // Translate trailing backslashes into line breaks
	DefinitionLists                               // Render definition lists

	CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants |
		SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes

	CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
		Autolink | Strikethrough | SpaceHeadings | HeadingIDs |
		BackslashLineBreak | DefinitionLists
)
ListType contains bitwise or'ed flags for list and list item objects.
These are the possible flag values for the ListItem renderer. Multiple flag values may be ORed together. These are mostly of interest if you are writing a new output format.
CellAlignFlags holds a type of alignment in a table cell.
These are the possible flag values for the table cell renderer. Only a single one of these values will be used; they are not ORed together. These are mostly of interest if you are writing a new output format.
The size of a tab stop.
const (
	TabSizeDefault = 4
	TabSizeDouble  = 8
)
blockTags is a set of tags that are recognized as HTML block tags. Any of these can be included in markdown text without special escaping.
var blockTags = map[string]struct{}{
	"blockquote": {},
	"del":        {},
	"div":        {},
	"dl":         {},
	"fieldset":   {},
	"form":       {},
	"h1":         {},
	"h2":         {},
	"h3":         {},
	"h4":         {},
	"h5":         {},
	"h6":         {},
	"iframe":     {},
	"ins":        {},
	"math":       {},
	"noscript":   {},
	"ol":         {},
	"pre":        {},
	"p":          {},
	"script":     {},
	"style":      {},
	"table":      {},
	"ul":         {},
HTML5
	"address":    {},
	"article":    {},
	"aside":      {},
	"canvas":     {},
	"figcaption": {},
	"figure":     {},
	"footer":     {},
	"header":     {},
	"hgroup":     {},
	"main":       {},
	"nav":        {},
	"output":     {},
	"progress":   {},
	"section":    {},
	"video":      {},
}
Renderer is the rendering interface. This is mostly of interest if you are implementing a new rendering format. Only an HTML implementation is provided in this repository, see the README for external implementations.
RenderNode is the main rendering method. It will be called once for every leaf node and twice for every non-leaf node (first with entering=true, then with entering=false). The method should write its rendition of the node to the supplied writer w.
	RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
RenderHeader is a method that allows the renderer to produce some content preceding the main body of the output document. The header is understood in the broad sense here. For example, the default HTML renderer will write not only the HTML document preamble, but also the table of contents if it was requested. The method will be passed an entire document tree, in case a particular implementation needs to inspect it to produce output. The output should be written to the supplied writer w. If your implementation has no header to write, supply an empty implementation.
	RenderHeader(w io.Writer, ast *Node)
RenderFooter is a symmetric counterpart of RenderHeader.
	RenderFooter(w io.Writer, ast *Node)
}
Callback functions for inline parsing. One such function is defined for each character that triggers a response when parsing inline data.
type inlineParser func(p *Markdown, data []byte, offset int) (int, *Node)
Markdown is a type that holds extensions and the runtime state used by Parse, and the renderer. You can not use it directly, construct it with New.
Footnotes need to be ordered as well as available to quickly check for presence. If a ref is also a footnote, it's stored both in refs and here in notes. Slice is nil if footnotes not enabled.
	notes []*reference

	doc                  *Node
	tip                  *Node // = doc
	oldTip               *Node
	lastMatchedContainer *Node // = doc
	allClosed            bool
}

func ( *Markdown) ( string) ( *reference,  bool) {
	if .referenceOverride != nil {
		,  := .referenceOverride()
		if  {
			if  == nil {
				return nil, false
			}
			return &reference{
				link:     []byte(.Link),
				title:    []byte(.Title),
				noteID:   0,
				hasBlock: false,
				text:     []byte(.Text)}, true
		}
refs are case insensitive
	,  = .refs[strings.ToLower()]
	return , 
}

func ( *Markdown) ( *Node) {
	 := .Parent
	.open = false
	.tip = 
}

func ( *Markdown) ( NodeType,  uint32) *Node {
	return .addExistingChild(NewNode(), )
}

func ( *Markdown) ( *Node,  uint32) *Node {
	for !.tip.canContain(.Type) {
		.finalize(.tip)
	}
	.tip.AppendChild()
	.tip = 
	return 
}

func ( *Markdown) () {
	if !.allClosed {
		for .oldTip != .lastMatchedContainer {
			 := .oldTip.Parent
			.finalize(.oldTip)
			.oldTip = 
		}
		.allClosed = true
	}
}
Public interface
Reference represents the details of a link. See the documentation in Options for more details on use-case.
Link is usually the URL the reference points to.
Title is the alternate text describing the link in more detail.
Text is the optional text to override the ref with if the syntax used was [refid][]
ReferenceOverrideFunc is expected to be called with a reference string and return either a valid Reference type that the reference string maps to or nil. If overridden is false, the default reference logic will be executed. See the documentation in Options for more details on use-case.
type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
New constructs a Markdown processor. You can use the same With* functions as for Run() to customize parser's behavior and the renderer.
func ( ...Option) *Markdown {
	var  Markdown
	for ,  := range  {
		(&)
	}
	.refs = make(map[string]*reference)
	.maxNesting = 16
	.insideLink = false
	 := NewNode(Document)
	.doc = 
	.tip = 
	.oldTip = 
	.lastMatchedContainer = 
Option customizes the Markdown processor's default behavior.
type Option func(*Markdown)
WithRenderer allows you to override the default renderer.
func ( Renderer) Option {
	return func( *Markdown) {
		.renderer = 
	}
}
WithExtensions allows you to pick some of the many extensions provided by Blackfriday. You can bitwise OR them.
func ( Extensions) Option {
	return func( *Markdown) {
		.extensions = 
	}
}
WithNoExtensions turns off all extensions and custom behavior.
WithRefOverride sets an optional function callback that is called every time a reference is resolved. In Markdown, the link reference syntax can be made to resolve a link to a reference instead of an inline URL, in one of the following ways: * [link text][refid] * [refid][] Usually, the refid is defined at the bottom of the Markdown document. If this override function is provided, the refid is passed to the override function first, before consulting the defined refids at the bottom. If the override function indicates an override did not occur, the refids at the bottom will be used to fill in the link details.
Run is the main entry point to Blackfriday. It parses and renders a block of markdown-encoded text. The simplest invocation of Run takes one argument, input: output := Run(input) This will parse the input with CommonExtensions enabled and render it with the default HTMLRenderer (with CommonHTMLFlags). Variadic arguments opts can customize the default behavior. Since Markdown type does not contain exported fields, you can not use it directly. Instead, use the With* functions. For example, this will call the most basic functionality, with no extensions: output := Run(input, WithNoExtensions()) You can use any number of With* arguments, even contradicting ones. They will be applied in order of appearance and the latter will override the former: output := Run(input, WithNoExtensions(), WithExtensions(exts), WithRenderer(yourRenderer))
func ( []byte,  ...Option) []byte {
	 := NewHTMLRenderer(HTMLRendererParameters{
		Flags: CommonHTMLFlags,
	})
	 := []Option{WithRenderer(), WithExtensions(CommonExtensions)}
	 = append(, ...)
	 := New(...)
	 := .Parse()
	var  bytes.Buffer
	.renderer.RenderHeader(&, )
	.Walk(func( *Node,  bool) WalkStatus {
		return .renderer.RenderNode(&, , )
	})
	.renderer.RenderFooter(&, )
	return .Bytes()
}
Parse is an entry point to the parsing part of Blackfriday. It takes an input markdown document and produces a syntax tree for its contents. This tree can then be rendered with a default or custom renderer, or analyzed/transformed by the caller to whatever non-standard needs they have. The return value is the root node of the syntax tree.
func ( *Markdown) ( []byte) *Node {
Walk the tree and finish up some of unfinished blocks
	for .tip != nil {
		.finalize(.tip)
Walk the tree again and process inline markdown in each block
	.doc.Walk(func( *Node,  bool) WalkStatus {
		if .Type == Paragraph || .Type == Heading || .Type == TableCell {
			.inline(, .content)
			.content = nil
		}
		return GoToNext
	})
	.parseRefsToAST()
	return .doc
}

func ( *Markdown) () {
	if .extensions&Footnotes == 0 || len(.notes) == 0 {
		return
	}
	.tip = .doc
	 := .addBlock(List, nil)
	.IsFootnotesList = true
	.ListFlags = ListTypeOrdered
Note: this loop is intentionally explicit, not range-form. This is because the body of the loop will append nested footnotes to p.notes and we need to process those late additions. Range form would only walk over the fixed initial set.
	for  := 0;  < len(.notes); ++ {
		 := .notes[]
		.addExistingChild(.footnote, 0)
		 := .footnote
		.ListFlags =  | ListTypeOrdered
		.RefLink = .link
		if .hasBlock {
			 |= ListItemContainsBlock
			.block(.title)
		} else {
			.inline(, .title)
		}
		 &^= ListItemBeginningOfList | ListItemContainsBlock
	}
	 := .Parent
	finalizeList()
	.tip = 
	.Walk(func( *Node,  bool) WalkStatus {
		if .Type == Paragraph || .Type == Heading {
			.inline(, .content)
			.content = nil
		}
		return GoToNext
	})
}
Link references This section implements support for references that (usually) appear as footnotes in a document, and can be referenced anywhere in the document. The basic format is: [1]: http://www.google.com/ "Google" [2]: http://www.github.com/ "Github" Anywhere in the document, the reference can be linked by referring to its label, i.e., 1 and 2 in this example, as in: This library is hosted on [Github][2], a git hosting site. Actual footnotes as specified in Pandoc and supported by some other Markdown libraries such as php-markdown are also taken care of. They look like this: This sentence needs a bit of further explanation.[^note] [^note]: This is the explanation. Footnotes should be placed at the end of the document in an ordered list. Finally, there are inline footnotes such as: Inline footnotes^[Also supported.] provide a quick inline explanation, but are rendered at the bottom of the document.
reference holds all information necessary for a reference-style links or footnotes. Consider this markdown with reference-style links: [link][ref] [ref]: /url/ "tooltip title" It will be ultimately converted to this HTML: <p><a href=\"/url/\" title=\"title\">link</a></p> And a reference structure will be populated as follows: p.refs["ref"] = &reference{ link: "/url/", title: "tooltip title", } Alternatively, reference can contain information about a footnote. Consider this markdown: Text needing a footnote.[^a] [^a]: This is the note A reference structure will be populated as follows: p.refs["a"] = &reference{ link: "a", title: "This is the note", noteID: <some positive int>, } TODO: As you can see, it begs for splitting into two dedicated structures for refs and for footnotes.
type reference struct {
	link     []byte
	title    []byte
	noteID   int // 0 if not a footnote ref
	hasBlock bool
	footnote *Node // a link to the Item node within a list of footnotes

	text []byte // only gets populated by refOverride feature with Reference.Text
}

func ( *reference) () string {
	return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
		.link, .title, .text, .noteID, .hasBlock)
}
Check whether or not data starts with a reference link. If so, it is parsed and stored in the list of references (in the render struct). Returns the number of bytes to skip to move past it, or zero if the first line is not a reference.
up to 3 optional leading spaces
	if len() < 4 {
		return 0
	}
	 := 0
	for  < 3 && [] == ' ' {
		++
	}

	 := 0
id part: anything but a newline between brackets
	if [] != '[' {
		return 0
	}
	++
	if .extensions&Footnotes != 0 {
we can set it to anything here because the proper noteIds will be assigned later during the second pass. It just has to be != 0
			 = 1
			++
		}
	}
	 := 
	for  < len() && [] != '\n' && [] != '\r' && [] != ']' {
		++
	}
	if  >= len() || [] != ']' {
		return 0
	}
footnotes can have empty ID, like this: [^], but a reference can not be empty like this: []. Break early if it's not a footnote and there's no ID
	if  == 0 &&  ==  {
		return 0
spacer: colon (space | tab)* newline? (space | tab)*
	++
	if  >= len() || [] != ':' {
		return 0
	}
	++
	for  < len() && ([] == ' ' || [] == '\t') {
		++
	}
	if  < len() && ([] == '\n' || [] == '\r') {
		++
		if  < len() && [] == '\n' && [-1] == '\r' {
			++
		}
	}
	for  < len() && ([] == ' ' || [] == '\t') {
		++
	}
	if  >= len() {
		return 0
	}

	var (
		,    int
		,  int
		               int
		                   []byte
		              bool
	)

	if .extensions&Footnotes != 0 &&  != 0 {
		, , ,  = scanFootnote(, , , )
		 = 
	} else {
		, , , ,  = scanLinkRef(, , )
	}
	if  == 0 {
		return 0
	}
a valid ref has been found

	 := &reference{
		noteID:   ,
		hasBlock: ,
	}

reusing the link field for the id since footnotes don't have links
if footnote, it's not really a title, it's the contained text
		.title = 
	} else {
		.link = [:]
		.title = [:]
	}
id matches are case-insensitive
	 := string(bytes.ToLower([:]))

	.refs[] = 

	return 
}

link: whitespace-free sequence, optionally between angle brackets
	if [] == '<' {
		++
	}
	 = 
	for  < len() && [] != ' ' && [] != '\t' && [] != '\n' && [] != '\r' {
		++
	}
	 = 
	if [] == '<' && [-1] == '>' {
		++
		--
	}
optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
	for  < len() && ([] == ' ' || [] == '\t') {
		++
	}
	if  < len() && [] != '\n' && [] != '\r' && [] != '\'' && [] != '"' && [] != '(' {
		return
	}
compute end-of-line
	if  >= len() || [] == '\r' || [] == '\n' {
		 = 
	}
	if +1 < len() && [] == '\r' && [+1] == '\n' {
		++
	}
optional (space|tab)* spacer after a newline
	if  > 0 {
		 =  + 1
		for  < len() && ([] == ' ' || [] == '\t') {
			++
		}
	}
optional title: any non-newline sequence enclosed in '"() alone on its line
	if +1 < len() && ([] == '\'' || [] == '"' || [] == '(') {
		++
		 = 
look for EOL
		for  < len() && [] != '\n' && [] != '\r' {
			++
		}
		if +1 < len() && [] == '\n' && [+1] == '\r' {
			 =  + 1
		} else {
			 = 
		}
step back
		--
		for  >  && ([] == ' ' || [] == '\t') {
			--
		}
		if  >  && ([] == '\'' || [] == '"' || [] == ')') {
			 = 
			 = 
		}
	}

	return
}
The first bit of this logic is the same as Parser.listItem, but the rest is much simpler. This function simply finds the entire block and shifts it over by one tab if it is indeed a block (just returns the line if it's not). blockEnd is the end of the section in the input buffer, and contents is the extracted text that was shifted over one tab. It will need to be rendered at the end of the document.
func ( *Markdown,  []byte, ,  int) (,  int,  []byte,  bool) {
	if  == 0 || len() == 0 {
		return
	}
skip leading whitespace on first line
	for  < len() && [] == ' ' {
		++
	}

	 = 
find the end of the line
	 = 
	for  < len() && [-1] != '\n' {
		++
	}
get working buffer
	var  bytes.Buffer
put the first line into the working buffer
	.Write([:])
	 = 
process the following lines
	 := false

:
	for  < len() {
		++
find the end of this line
		for  < len() && [-1] != '\n' {
			++
		}
if it is an empty line, guess that it is part of this item and move on to the next line
		if .isEmpty([:]) > 0 {
			 = true
			 = 
			continue
		}

		 := 0
this is the end of the block. we don't want to include this last line in the index.
			break 
		}
if there were blank lines before this one, insert a new one now
		if  {
			.WriteByte('\n')
			 = false
		}
get rid of that first tab, write to buffer
		.Write([+ : ])
		 = true

		 = 
	}

	if [-1] != '\n' {
		.WriteByte('\n')
	}

	 = .Bytes()

	return
}
Miscellaneous helper functions
Test if a character is a punctuation symbol. Taken from a private function in regexp in the stdlib.
func ( byte) bool {
	for ,  := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
		if  ==  {
			return true
		}
	}
	return false
}
Test if a character is a whitespace character.
func ( byte) bool {
	return ishorizontalspace() || isverticalspace()
}
Test if a character is a horizontal whitespace character.
func ( byte) bool {
	return  == ' ' ||  == '\t'
}
Test if a character is a vertical character.
func ( byte) bool {
	return  == '\n' ||  == '\r' ||  == '\f' ||  == '\v'
}
Test if a character is letter.
func ( byte) bool {
	return ( >= 'a' &&  <= 'z') || ( >= 'A' &&  <= 'Z')
}
Test if a character is a letter or a digit. TODO: check when this is looking for ASCII alnum and when it should use unicode
func ( byte) bool {
	return ( >= '0' &&  <= '9') || isletter()
}
Replace tab characters with spaces, aligning to the next TAB_SIZE column. always ends output with a newline
first, check for common cases: no tabs, or only tabs at beginning of line
	,  := 0, 0
	 := false
	for  = 0;  < len(); ++ {
		if [] == '\t' {
			if  ==  {
				++
			} else {
				 = true
				break
			}
		}
	}
no need to decode runes if all tabs are at the beginning of the line
	if ! {
		for  = 0;  < *; ++ {
			.WriteByte(' ')
		}
		.Write([:])
		return
	}
the slow case: we need to count runes to figure out how many spaces to insert for each tab
	 := 0
	 = 0
	for  < len() {
		 := 
		for  < len() && [] != '\t' {
			,  := utf8.DecodeRune([:])
			 += 
			++
		}

		if  >  {
			.Write([:])
		}

		if  >= len() {
			break
		}

		for {
			.WriteByte(' ')
			++
			if % == 0 {
				break
			}
		}

		++
	}
}
Find if a line counts as indented or not. Returns number of characters the indent is (0 = not indented).
func ( []byte,  int) int {
	if len() == 0 {
		return 0
	}
	if [0] == '\t' {
		return 1
	}
	if len() <  {
		return 0
	}
	for  := 0;  < ; ++ {
		if [] != ' ' {
			return 0
		}
	}
	return 
}
Create a url-safe slug for fragments
func ( []byte) []byte {
	if len() == 0 {
		return 
	}
	 := make([]byte, 0, len())
	 := false

	for ,  := range  {
		if isalnum() {
			 = false
			 = append(, )
		} else if  {
			continue
		} else {
			 = append(, '-')
			 = true
		}
	}
	var ,  int
	var  byte
	for ,  = range  {
		if  != '-' {
			break
		}
	}
	for  = len() - 1;  > 0; -- {
		if [] != '-' {
			break
		}
	}
	return [ : +1]