Blackfriday Markdown Processor Available at http://github.com/russross/blackfriday Copyright © 2011 Russ Ross <russ@russross.com>. Distributed under the Simplified BSD License. See README.md for details.
Functions to parse inline elements.

package blackfriday

import (
	
	
	
)

var (
	urlRe    = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
	anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
https://www.w3.org/TR/html5/syntax.html#character-references highest unicode code point in 17 planes (2^20): 1,114,112d = 7 dec digits or 6 hex digits named entity references can be 2-31 characters with stuff like &lt; at one end and &CounterClockwiseContourIntegral; at the other. There are also sometimes numbers at the end, although this isn't inherent in the specification; there are never numbers anywhere else in current character references, though; see &frac34; and &blk12;, etc. https://www.w3.org/TR/html5/syntax.html#named-character-references entity := "&" (named group | number ref) ";" named group := [a-zA-Z]{2,31}[0-9]{0,2} number ref := "#" (dec ref | hex ref) dec ref := [0-9]{1,7} hex ref := ("x" | "X") [0-9a-fA-F]{1,6}
	htmlEntityRe = regexp.MustCompile(`&([a-zA-Z]{2,31}[0-9]{0,2}|#([0-9]{1,7}|[xX][0-9a-fA-F]{1,6}));`)
)
Functions to parse text within a block Each function returns the number of chars taken care of data is the complete block being rendered offset is the number of valid chars before the current cursor

handlers might call us recursively: enforce a maximum depth
	if .nesting >= .maxNesting || len() == 0 {
		return
	}
	.nesting++
	,  := 0, 0
	for  < len() {
		 := .inlineCallback[[]]
		if  != nil {
No action from the callback.
				++
Copy inactive chars into the output.
				.AppendChild(text([:]))
				if  != nil {
					.AppendChild()
Skip past whatever the callback used.
				 =  + 
				 = 
			}
		} else {
			++
		}
	}
	if  < len() {
		if [-1] == '\n' {
			--
		}
		.AppendChild(text([:]))
	}
	.nesting--
}
single and double emphasis parsing
func ( *Markdown,  []byte,  int) (int, *Node) {
	 = [:]
	 := [0]

whitespace cannot follow an opening emphasis; strikethrough only takes two characters '~~'
		if  == '~' || isspace([1]) {
			return 0, nil
		}
		,  := helperEmphasis(, [1:], )
		if  == 0 {
			return 0, nil
		}

		return  + 1, 
	}

	if len() > 3 && [1] ==  && [2] !=  {
		if isspace([2]) {
			return 0, nil
		}
		,  := helperDoubleEmphasis(, [2:], )
		if  == 0 {
			return 0, nil
		}

		return  + 2, 
	}

	if len() > 4 && [1] ==  && [2] ==  && [3] !=  {
		if  == '~' || isspace([3]) {
			return 0, nil
		}
		,  := helperTripleEmphasis(, , 3, )
		if  == 0 {
			return 0, nil
		}

		return  + 3, 
	}

	return 0, nil
}

func ( *Markdown,  []byte,  int) (int, *Node) {
	 = [:]

	 := 0
count the number of backticks in the delimiter
	for  < len() && [] == '`' {
		++
	}
find the next delimiter
	,  := 0, 0
	for  = ;  < len() &&  < ; ++ {
		if [] == '`' {
			++
		} else {
			 = 0
		}
	}
no matching delimiter?
	if  <  &&  >= len() {
		return 0, nil
	}
trim outside whitespace
	 := 
	for  <  && [] == ' ' {
		++
	}

	 :=  - 
	for  >  && [-1] == ' ' {
		--
	}
render the code span
	if  !=  {
		 := NewNode(Code)
		.Literal = [:]
		return , 
	}

	return , nil
}
newline preceded by two spaces becomes <br>
func ( *Markdown,  []byte,  int) (int, *Node) {
	 := 
	for  < len() && [] == ' ' {
		++
	}

	if  < len() && [] == '\n' {
		if - >= 2 {
			return  -  + 1, NewNode(Hardbreak)
		}
		return  - , nil
	}
	return 0, nil
}
newline without two spaces works when HardLineBreak is enabled
func ( *Markdown,  []byte,  int) (int, *Node) {
	if .extensions&HardLineBreak != 0 {
		return 1, NewNode(Hardbreak)
	}
	return 0, nil
}

type linkType int

const (
	linkNormal linkType = iota
	linkImg
	linkDeferredFootnote
	linkInlineFootnote
)

func ( []byte,  int,  linkType) bool {
	if  == linkDeferredFootnote {
		return false
	}
	return  < len()-1 && [] == '[' && [+1] != '^'
}

func ( *Markdown,  []byte,  int) (int, *Node) {
	if  < len()-1 && [+1] == '[' {
		return link(, , )
	}
	return 0, nil
}

func ( *Markdown,  []byte,  int) (int, *Node) {
	if  < len()-1 && [+1] == '[' {
		return link(, , )
	}
	return 0, nil
}
'[': parse a link or an image or a footnote
no links allowed inside regular links, footnote, and deferred footnotes
	if .insideLink && ( > 0 && [-1] == '[' || len()-1 >  && [+1] == '^') {
		return 0, nil
	}

	var  linkType
special case: ![^text] == deferred footnote (that follows something with an exclamation point)
	case .extensions&Footnotes != 0 && len()-1 >  && [+1] == '^':
![alt] == image
	case  >= 0 && [] == '!':
		 = linkImg
^[text] == inline footnote [^refId] == deferred footnote
	case .extensions&Footnotes != 0:
		if  >= 0 && [] == '^' {
			 = linkInlineFootnote
			++
		} else if len()-1 >  && [+1] == '^' {
			 = linkDeferredFootnote
[text] == regular link
	default:
		 = linkNormal
	}

	 = [:]

	var (
		                       = 1
		                  int
		, ,  []byte
		               = false
	)

	if  == linkDeferredFootnote {
		++
	}
look for the matching closing bracket
	for  := 1;  > 0 &&  < len(); ++ {
		switch {
		case [] == '\n':
			 = true

		case [-1] == '\\':
			continue

		case [] == '[':
			++

		case [] == ']':
			--
			if  <= 0 {
				-- // compensate for extra i++ in for loop
			}
		}
	}

	if  >= len() {
		return 0, nil
	}

	 := 
	++
	var  *Node
skip any amount of whitespace or newline (this is much more lax than original markdown syntax)
	for  < len() && isspace([]) {
		++
	}
inline style link
	switch {
skip initial whitespace
		++

		for  < len() && isspace([]) {
			++
		}

		 := 
look for link end: ' " )
	:
		for  < len() {
			switch {
			case [] == '\\':
				 += 2

			case [] == ')' || [] == '\'' || [] == '"':
				break 

			default:
				++
			}
		}

		if  >= len() {
			return 0, nil
		}
		 := 
look for title end if present
		,  := 0, 0
		if [] == '\'' || [] == '"' {
			++
			 = 

		:
			for  < len() {
				switch {
				case [] == '\\':
					 += 2

				case [] == ')':
					break 

				default:
					++
				}
			}

			if  >= len() {
				return 0, nil
			}
skip whitespace after title
			 =  - 1
			for  >  && isspace([]) {
				--
			}
check for closing quote presence
			if [] != '\'' && [] != '"' {
				,  = 0, 0
				 = 
			}
		}
remove whitespace at the end of the link
		for  >  && isspace([-1]) {
			--
		}
remove optional angle brackets around the link
		if [] == '<' {
			++
		}
		if [-1] == '>' {
			--
		}
build escaped link and title
		if  >  {
			 = [:]
		}

		if  >  {
			 = [:]
		}

		++
reference style link
	case isReferenceStyleLink(, , ):
		var  []byte
		 := false
look for the id
		++
		 := 
		for  < len() && [] != ']' {
			++
		}
		if  >= len() {
			return 0, nil
		}
		 := 
find the reference
		if  ==  {
			if  {
				var  bytes.Buffer

				for  := 1;  < ; ++ {
					switch {
					case [] != '\n':
						.WriteByte([])
					case [-1] != ' ':
						.WriteByte(' ')
					}
				}

				 = .Bytes()
			} else {
				 = [1:]
				 = true
			}
		} else {
			 = [:]
		}
find the reference with matching id
		,  := .getRef(string())
		if ! {
			return 0, nil
		}
keep link and title from reference
		 = .link
		 = .title
		if  {
			 = .text
		}
		++
shortcut reference style link or reference or inline footnote
	default:
		var  []byte
craft the id
		if  {
			var  bytes.Buffer

			for  := 1;  < ; ++ {
				switch {
				case [] != '\n':
					.WriteByte([])
				case [-1] != ' ':
					.WriteByte(' ')
				}
			}

			 = .Bytes()
		} else {
			if  == linkDeferredFootnote {
				 = [2:] // get rid of the ^
			} else {
				 = [1:]
			}
		}

		 = NewNode(Item)
create a new reference
			 = len(.notes) + 1

			var  []byte
			if len() > 0 {
				if len() < 16 {
					 = make([]byte, len())
				} else {
					 = make([]byte, 16)
				}
				copy(, slugify())
			} else {
				 = append([]byte("footnote-"), []byte(strconv.Itoa())...)
			}

			 := &reference{
				noteID:   ,
				hasBlock: false,
				link:     ,
				title:    ,
				footnote: ,
			}

			.notes = append(.notes, )

			 = .link
			 = .title
find the reference with matching id
			,  := .getRef(string())
			if ! {
				return 0, nil
			}

			if  == linkDeferredFootnote {
				.noteID = len(.notes) + 1
				.footnote = 
				.notes = append(.notes, )
			}
keep link and title from reference
if inline footnote, title == footnote contents
			 = .title
			 = .noteID
		}
rewind the whitespace
		 =  + 1
	}

	var  []byte
	if  == linkNormal ||  == linkImg {
		if len() > 0 {
			var  bytes.Buffer
			unescapeText(&, )
			 = .Bytes()
		}
links need something to click on and somewhere to go
		if len() == 0 || ( == linkNormal &&  <= 1) {
			return 0, nil
		}
	}
call the relevant rendering function
	var  *Node
	switch  {
	case linkNormal:
		 = NewNode(Link)
		.Destination = normalizeURI()
		.Title = 
		if len() > 0 {
			.AppendChild(text())
links cannot contain other links, so turn off link parsing temporarily and recurse
			 := .insideLink
			.insideLink = true
			.inline(, [1:])
			.insideLink = 
		}

	case linkImg:
		 = NewNode(Image)
		.Destination = 
		.Title = 
		.AppendChild(text([1:]))
		++

	case linkInlineFootnote, linkDeferredFootnote:
		 = NewNode(Link)
		.Destination = 
		.Title = 
		.NoteID = 
		.Footnote = 
		if  == linkInlineFootnote {
			++
		}

	default:
		return 0, nil
	}

	return , 
}

func ( *Markdown) ( []byte) int {
	if len() < 5 {
		return 0
	}
	if [0] != '<' || [1] != '!' || [2] != '-' || [3] != '-' {
		return 0
	}
scan for an end-of-comment marker, across lines if necessary
	for  < len() && !([-2] == '-' && [-1] == '-' && [] == '>') {
		++
no end-of-comment marker
	if  >= len() {
		return 0
	}
	return  + 1
}

func ( []byte) []byte {
	if bytes.HasPrefix(, []byte("mailto://")) {
		return [9:]
	} else if bytes.HasPrefix(, []byte("mailto:")) {
		return [7:]
	} else {
		return 
	}
}
autolinkType specifies a kind of autolink that gets detected.
These are the possible flag values for the autolink renderer.
'<' when tags or autolinks are allowed
func ( *Markdown,  []byte,  int) (int, *Node) {
	 = [:]
	,  := tagLength()
	if  := .inlineHTMLComment();  > 0 {
		 = 
	}
	if  > 2 {
		if  != notAutolink {
			var  bytes.Buffer
			unescapeText(&, [1:+1-2])
			if .Len() > 0 {
				 := .Bytes()
				 := NewNode(Link)
				.Destination = 
				if  == emailAutolink {
					.Destination = append([]byte("mailto:"), ...)
				}
				.AppendChild(text(stripMailto()))
				return , 
			}
		} else {
			 := NewNode(HTMLSpan)
			.Literal = [:]
			return , 
		}
	}

	return , nil
}
'\\' backslash escape
var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")

func ( *Markdown,  []byte,  int) (int, *Node) {
	 = [:]

	if len() > 1 {
		if .extensions&BackslashLineBreak != 0 && [1] == '\n' {
			return 2, NewNode(Hardbreak)
		}
		if bytes.IndexByte(escapeChars, [1]) < 0 {
			return 0, nil
		}

		return 2, text([1:2])
	}

	return 2, nil
}

func ( *bytes.Buffer,  []byte) {
	 := 0
	for  < len() {
		 := 
		for  < len() && [] != '\\' {
			++
		}

		if  >  {
			.Write([:])
		}

		if +1 >= len() {
			break
		}

		.WriteByte([+1])
		 += 2
	}
}
'&' escaped when it doesn't belong to an entity valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
func ( *Markdown,  []byte,  int) (int, *Node) {
	 = [:]

	 := 1

	if  < len() && [] == '#' {
		++
	}

	for  < len() && isalnum([]) {
		++
	}

	if  < len() && [] == ';' {
		++ // real entity
	} else {
		return 0, nil // lone '&'
	}

undo &amp; escaping or it will be converted to &amp;amp; by another escaper in the renderer
	if bytes.Equal(, []byte("&amp;")) {
		 = []byte{'&'}
	}

	return , text()
}

func ( []byte,  int) bool {
	 := htmlEntityRe.FindAllIndex([:], -1)
	return  != nil && [len()-1][1] == 
}
hasPrefixCaseInsensitive is a custom implementation of strings.HasPrefix(strings.ToLower(s), prefix) we rolled our own because ToLower pulls in a huge machinery of lowercasing anything from Unicode and that's very slow. Since this func will only be used on ASCII protocol prefixes, we can take shortcuts.
func (,  []byte) bool {
	if len() < len() {
		return false
	}
	 := byte('a' - 'A')
	for ,  := range  {
		if  != [] &&  != []+ {
			return false
		}
	}
	return true
}

var protocolPrefixes = [][]byte{
	[]byte("http://"),
	[]byte("https://"),
	[]byte("ftp://"),
	[]byte("file://"),
	[]byte("mailto:"),
}

const shortestPrefix = 6 // len("ftp://"), the shortest of the above

quick check to rule out most false hits
	if .insideLink || len() < +shortestPrefix {
		return 0, nil
	}
	for ,  := range protocolPrefixes {
		 :=  + 8 // 8 is the len() of the longest prefix
		if  > len() {
			 = len()
		}
		if hasPrefixCaseInsensitive([:], ) {
			return autoLink(, , )
		}
	}
	return 0, nil
}

Now a more expensive check to see if we're not inside an anchor element
	 := 
	 := 0
	for  > 0 && [] != '<' {
		--
		++
	}

	 := anchorRe.Find([:])
	if  != nil {
		 := NewNode(HTMLSpan)
		.Literal = [:]
		return len() - , 
	}
scan backward for a word boundary
	 := 0
	for - > 0 &&  <= 7 && isletter([--1]) {
		++
	}
	if  > 6 { // longest supported protocol is "mailto" which has 6 letters
		return 0, nil
	}

	 := 
	 = [-:]

	if !isSafeLink() {
		return 0, nil
	}

	 := 0
	for  < len() && !isEndOfLink([]) {
		++
	}
Skip punctuation at the end of the link
	if ([-1] == '.' || [-1] == ',') && [-2] != '\\' {
		--
	}
But don't skip semicolon if it's a part of escaped entity:
	if [-1] == ';' && [-2] != '\\' && !linkEndsWithEntity(, ) {
		--
	}
See if the link finishes with a punctuation sign that can be closed.
	var  byte
	switch [-1] {
	case '"':
		 = '"'
	case '\'':
		 = '\''
	case ')':
		 = '('
	case ']':
		 = '['
	case '}':
		 = '{'
	default:
		 = 0
	}

	if  != 0 {
		 :=  -  +  - 2

		 := 1
Try to close the final punctuation sign in this same line; * if we managed to close it outside of the URL, that means that it's * not part of the URL. If it closes inside the URL, that means it * is part of the URL. * * Examples: * * foo http:www.pokemon.com/Pikachu_(Electric) bar * => http:www.pokemon.com/Pikachu_(Electric) * * foo (http:www.pokemon.com/Pikachu_(Electric)) bar * => http:www.pokemon.com/Pikachu_(Electric) * * foo http:www.pokemon.com/Pikachu_(Electric)) bar * => http:www.pokemon.com/Pikachu_(Electric)) * * (foo http:www.pokemon.com/Pikachu_(Electric)) bar * => foo http:www.pokemon.com/Pikachu_(Electric)

		for  >= 0 && [] != '\n' &&  != 0 {
			if [] == [-1] {
				++
			}

			if [] ==  {
				--
			}

			--
		}

		if  == 0 {
			--
		}
	}

	var  bytes.Buffer
	unescapeText(&, [:])

	if .Len() > 0 {
		 := NewNode(Link)
		.Destination = .Bytes()
		.AppendChild(text(.Bytes()))
		return , 
	}

	return , nil
}

func ( byte) bool {
	return isspace() ||  == '<'
}

var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}

func ( []byte) bool {
	for ,  := range validPaths {
		if len() >= len() && bytes.Equal([:len()], ) {
			if len() == len() {
				return true
			} else if isalnum([len()]) {
				return true
			}
		}
	}

TODO: handle unicode here case-insensitive prefix test
		if len() > len() && bytes.Equal(bytes.ToLower([:len()]), ) && isalnum([len()]) {
			return true
		}
	}

	return false
}
return the length of the given tag, or 0 is it's not valid
func ( []byte) ( autolinkType,  int) {
	var ,  int
a valid tag can't be shorter than 3 chars
	if len() < 3 {
		return notAutolink, 0
	}
begins with a '<' optionally followed by '/', followed by letter or number
	if [0] != '<' {
		return notAutolink, 0
	}
	if [1] == '/' {
		 = 2
	} else {
		 = 1
	}

	if !isalnum([]) {
		return notAutolink, 0
	}
scheme test
	 = notAutolink
try to find the beginning of an URI
	for  < len() && (isalnum([]) || [] == '.' || [] == '+' || [] == '-') {
		++
	}

	if  > 1 &&  < len() && [] == '@' {
		if  = isMailtoAutoLink([:]);  != 0 {
			return emailAutolink,  + 
		}
	}

	if  > 2 &&  < len() && [] == ':' {
		 = normalAutolink
		++
	}
complete autolink test: no whitespace or ' or "
	switch {
	case  >= len():
		 = notAutolink
	case  != notAutolink:
		 = 

		for  < len() {
			if [] == '\\' {
				 += 2
			} else if [] == '>' || [] == '\'' || [] == '"' || isspace([]) {
				break
			} else {
				++
			}

		}

		if  >= len() {
			return , 0
		}
		if  >  && [] == '>' {
			return ,  + 1
		}
one of the forbidden chars has been found
		 = notAutolink
	}
	 += bytes.IndexByte([:], '>')
	if  < 0 {
		return , 0
	}
	return ,  + 1
}
look for the address part of a mail autolink and '>' this is less strict than the original markdown e-mail address matching
func ( []byte) int {
	 := 0
address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
	for  := 0;  < len(); ++ {
		if isalnum([]) {
			continue
		}

		switch [] {
		case '@':
			++

		case '-', '.', '_':
			break

		case '>':
			if  == 1 {
				return  + 1
			}
			return 0
		default:
			return 0
		}
	}

	return 0
}
look for the next emph char, skipping other constructs
func ( []byte,  byte) int {
	 := 0

	for  < len() {
		for  < len() && [] !=  && [] != '`' && [] != '[' {
			++
		}
		if  >= len() {
			return 0
do not count escaped chars
		if  != 0 && [-1] == '\\' {
			++
			continue
		}
		if [] ==  {
			return 
		}

skip a code span
			 := 0
			++
			for  < len() && [] != '`' {
				if  == 0 && [] ==  {
					 = 
				}
				++
			}
			if  >= len() {
				return 
			}
			++
skip a link
			 := 0
			++
			for  < len() && [] != ']' {
				if  == 0 && [] ==  {
					 = 
				}
				++
			}
			++
			for  < len() && ([] == ' ' || [] == '\n') {
				++
			}
			if  >= len() {
				return 
			}
			if [] != '[' && [] != '(' { // not a link
				if  > 0 {
					return 
				}
				continue
			}
			 := []
			++
			for  < len() && [] !=  {
				if  == 0 && [] ==  {
					return 
				}
				++
			}
			if  >= len() {
				return 
			}
			++
		}
	}
	return 0
}

func ( *Markdown,  []byte,  byte) (int, *Node) {
	 := 0
skip one symbol if coming from emph3
	if len() > 1 && [0] ==  && [1] ==  {
		 = 1
	}

	for  < len() {
		 := helperFindEmphChar([:], )
		if  == 0 {
			return 0, nil
		}
		 += 
		if  >= len() {
			return 0, nil
		}

		if +1 < len() && [+1] ==  {
			++
			continue
		}

		if [] ==  && !isspace([-1]) {

			if .extensions&NoIntraEmphasis != 0 {
				if !(+1 == len() || isspace([+1]) || ispunct([+1])) {
					continue
				}
			}

			 := NewNode(Emph)
			.inline(, [:])
			return  + 1, 
		}
	}

	return 0, nil
}

func ( *Markdown,  []byte,  byte) (int, *Node) {
	 := 0

	for  < len() {
		 := helperFindEmphChar([:], )
		if  == 0 {
			return 0, nil
		}
		 += 

		if +1 < len() && [] ==  && [+1] ==  &&  > 0 && !isspace([-1]) {
			 := Strong
			if  == '~' {
				 = Del
			}
			 := NewNode()
			.inline(, [:])
			return  + 2, 
		}
		++
	}
	return 0, nil
}

func ( *Markdown,  []byte,  int,  byte) (int, *Node) {
	 := 0
	 := 
	 = [:]

	for  < len() {
		 := helperFindEmphChar([:], )
		if  == 0 {
			return 0, nil
		}
		 += 
skip whitespace preceded symbols
		if [] !=  || isspace([-1]) {
			continue
		}

		switch {
triple symbol found
			 := NewNode(Strong)
			 := NewNode(Emph)
			.AppendChild()
			.inline(, [:])
			return  + 3, 
double symbol found, hand over to emph1
			,  := helperEmphasis(, [-2:], )
			if  == 0 {
				return 0, nil
			}
			return  - 2, 
single symbol found, hand over to emph2
			,  := helperDoubleEmphasis(, [-1:], )
			if  == 0 {
				return 0, nil
			}
			return  - 1, 
		}
	}
	return 0, nil
}

func ( []byte) *Node {
	 := NewNode(Text)
	.Literal = 
	return 
}

func ( []byte) []byte {
	return  // TODO: implement