Source File
token.go
Belonging Package
golang.org/x/net/html
package html
import (
)
var ErrBufferExceeded = errors.New("max buffer exceeded")
func ( TokenType) () string {
switch {
case ErrorToken:
return "Error"
case TextToken:
return "Text"
case StartTagToken:
return "StartTag"
case EndTagToken:
return "EndTag"
case SelfClosingTagToken:
return "SelfClosingTag"
case CommentToken:
return "Comment"
case DoctypeToken:
return "Doctype"
}
return "Invalid(" + strconv.Itoa(int()) + ")"
}
func ( Token) () string {
switch .Type {
case ErrorToken:
return ""
case TextToken:
return EscapeString(.Data)
case StartTagToken:
return "<" + .tagString() + ">"
case EndTagToken:
return "</" + .tagString() + ">"
case SelfClosingTagToken:
return "<" + .tagString() + "/>"
case CommentToken:
return "<!--" + .Data + "-->"
case DoctypeToken:
return "<!DOCTYPE " + .Data + ">"
}
return "Invalid(" + strconv.Itoa(int(.Type)) + ")"
}
pendingAttr [2]span
attr [][2]span
func ( *Tokenizer) ( bool) {
.allowCDATA =
}
func ( *Tokenizer) () {
defer func() {
.data.end = .raw.end
}()
var byte
:
= .readByte()
if .err != nil {
return
}
if == '<' {
goto
}
goto
:
= .readByte()
if .err != nil {
return
}
switch {
case '/':
goto
case '!':
goto
}
.raw.end--
goto
:
if .readRawEndTag() || .err != nil {
return
}
goto
:
= .readByte()
if .err != nil {
return
}
if == '-' {
goto
}
.raw.end--
goto
:
= .readByte()
if .err != nil {
return
}
if == '-' {
goto
}
.raw.end--
goto
:
= .readByte()
if .err != nil {
return
}
switch {
case '-':
goto
case '<':
goto
}
goto
:
= .readByte()
if .err != nil {
return
}
switch {
case '-':
goto
case '<':
goto
}
goto
:
= .readByte()
if .err != nil {
return
}
switch {
case '-':
goto
case '<':
goto
case '>':
goto
}
goto
:
= .readByte()
if .err != nil {
return
}
if == '/' {
goto
}
if 'a' <= && <= 'z' || 'A' <= && <= 'Z' {
goto
}
.raw.end--
goto
:
if .readRawEndTag() || .err != nil {
return
}
goto
:
.raw.end--
for := 0; < len("script"); ++ {
= .readByte()
if .err != nil {
return
}
if != "script"[] && != "SCRIPT"[] {
.raw.end--
goto
}
}
= .readByte()
if .err != nil {
return
}
switch {
case ' ', '\n', '\r', '\t', '\f', '/', '>':
goto
}
.raw.end--
goto
:
= .readByte()
if .err != nil {
return
}
switch {
case '-':
goto
case '<':
goto
}
goto
:
= .readByte()
if .err != nil {
return
}
switch {
case '-':
goto
case '<':
goto
}
goto
:
= .readByte()
if .err != nil {
return
}
switch {
case '-':
goto
case '<':
goto
case '>':
goto
}
goto
:
= .readByte()
if .err != nil {
return
}
if == '/' {
goto
}
.raw.end--
goto
:
if .readRawEndTag() {
.raw.end += len("</script>")
goto
}
if .err != nil {
return
}
goto
}
func ( *Tokenizer) () TokenType {
.data.start = .raw.end
var [2]byte
for := 0; < 2; ++ {
[] = .readByte()
if .err != nil {
.data.end = .raw.end
return CommentToken
}
}
if [0] == '-' && [1] == '-' {
.readComment()
return CommentToken
}
.raw.end -= 2
if .readDoctype() {
return DoctypeToken
}
if .allowCDATA && .readCDATA() {
.convertNUL = true
return TextToken
.readUntilCloseAngle()
return CommentToken
}
, := .buf[.data.start], false
if 'A' <= && <= 'Z' {
+= 'a' - 'A'
}
switch {
case 'i':
= .startTagIn("iframe")
case 'n':
= .startTagIn("noembed", "noframes", "noscript")
case 'p':
= .startTagIn("plaintext")
case 's':
= .startTagIn("script", "style")
case 't':
= .startTagIn("textarea", "title")
case 'x':
= .startTagIn("xmp")
}
if {
.rawTag = strings.ToLower(string(.buf[.data.start:.data.end]))
if .err == nil && .buf[.raw.end-2] == '/' {
return SelfClosingTagToken
}
return StartTagToken
}
.readTagName()
if .skipWhiteSpace(); .err != nil {
return
}
for {
:= .readByte()
if .err != nil || == '>' {
break
}
.raw.end--
.readTagAttrKey()
if && .pendingAttr[0].start != .pendingAttr[0].end {
.attr = append(.attr, .pendingAttr)
}
if .skipWhiteSpace(); .err != nil {
break
}
}
}
func ( *Tokenizer) () {
.pendingAttr[1].start = .raw.end
.pendingAttr[1].end = .raw.end
if .skipWhiteSpace(); .err != nil {
return
}
:= .readByte()
if .err != nil {
return
}
if != '=' {
.raw.end--
return
}
if .skipWhiteSpace(); .err != nil {
return
}
:= .readByte()
if .err != nil {
return
}
switch {
case '>':
.raw.end--
return
case '\'', '"':
.pendingAttr[1].start = .raw.end
for {
:= .readByte()
if .err != nil {
.pendingAttr[1].end = .raw.end
return
}
if == {
.pendingAttr[1].end = .raw.end - 1
return
}
}
default:
.pendingAttr[1].start = .raw.end - 1
for {
:= .readByte()
if .err != nil {
.pendingAttr[1].end = .raw.end
return
}
switch {
case ' ', '\n', '\r', '\t', '\f':
.pendingAttr[1].end = .raw.end - 1
return
case '>':
.raw.end--
.pendingAttr[1].end = .raw.end
return
}
}
}
}
= .readByte()
if .err != nil {
break
}
var TokenType
switch {
case 'a' <= && <= 'z' || 'A' <= && <= 'Z':
= StartTagToken
case == '/':
= EndTagToken
.tt = CommentToken
return .tt
}
if 'a' <= && <= 'z' || 'A' <= && <= 'Z' {
.readTag(false)
if .err != nil {
.tt = ErrorToken
} else {
.tt = EndTagToken
}
return .tt
}
.raw.end--
.readUntilCloseAngle()
.tt = CommentToken
return .tt
case CommentToken:
if == '!' {
.tt = .readMarkupDeclaration()
return .tt
}
.raw.end--
.readUntilCloseAngle()
.tt = CommentToken
return .tt
}
}
if .raw.start < .raw.end {
.data.end = .raw.end
.tt = TextToken
return .tt
}
.tt = ErrorToken
return .tt
}
func ( []byte) []byte {
for , := range {
if != '\r' {
continue
}
:= + 1
if >= len() || [] != '\n' {
[] = '\n'
continue
}
:=
for < len() {
if [] == '\r' {
if +1 < len() && [+1] == '\n' {
++
}
[] = '\n'
} else {
[] = []
}
++
++
}
return [:]
}
return
}
var (
nul = []byte("\x00")
replacement = []byte("\ufffd")
)
func ( *Tokenizer) () []byte {
switch .tt {
case TextToken, CommentToken, DoctypeToken:
:= .buf[.data.start:.data.end]
.data.start = .raw.end
.data.end = .raw.end
= convertNewlines()
if (.convertNUL || .tt == CommentToken) && bytes.Contains(, nul) {
= bytes.Replace(, nul, replacement, -1)
}
if !.textIsRaw {
= unescape(, false)
}
return
}
return nil
}
func ( *Tokenizer) () (, []byte, bool) {
if .nAttrReturned < len(.attr) {
switch .tt {
case StartTagToken, SelfClosingTagToken:
:= .attr[.nAttrReturned]
.nAttrReturned++
= .buf[[0].start:[0].end]
= .buf[[1].start:[1].end]
return lower(), unescape(convertNewlines(), true), .nAttrReturned < len(.attr)
}
}
return nil, nil, false
}
func ( *Tokenizer) () Token {
:= Token{Type: .tt}
switch .tt {
case TextToken, CommentToken, DoctypeToken:
.Data = string(.Text())
case StartTagToken, SelfClosingTagToken, EndTagToken:
, := .TagName()
for {
var , []byte
, , = .TagAttr()
.Attr = append(.Attr, Attribute{"", atom.String(), string()})
}
if := atom.Lookup(); != 0 {
.DataAtom, .Data = , .String()
} else {
.DataAtom, .Data = 0, string()
}
}
return
}
func ( io.Reader) *Tokenizer {
return NewTokenizerFragment(, "")
}
![]() |
The pages are generated with Golds v0.3.2-preview. (GOOS=darwin GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds. |