package css_lexer

import (
	
	

	
)
The lexer converts a source file to a stream of tokens. Unlike esbuild's JavaScript lexer, this CSS lexer runs to completion before the CSS parser begins, resulting in a single array of all tokens in the file.

type T uint8

const eof = -1
const replacementCharacter = 0xFFFD

const (
	TEndOfFile T = iota

	TAtKeyword
	TBadString
	TBadURL
	TCDC // "-->"
	TCDO // "<!--"
	TCloseBrace
	TCloseBracket
	TCloseParen
	TColon
	TComma
	TDelim
	TDelimAmpersand
	TDelimAsterisk
	TDelimBar
	TDelimCaret
	TDelimDollar
	TDelimDot
	TDelimEquals
	TDelimExclamation
	TDelimGreaterThan
	TDelimPlus
	TDelimSlash
	TDelimTilde
	TDimension
	TFunction
	THash
	TIdent
	TNumber
	TOpenBrace
	TOpenBracket
	TOpenParen
	TPercentage
	TSemicolon
	TString
	TURL
	TWhitespace
)

var tokenToString = []string{
	"end of file",
	"@-keyword",
	"bad string token",
	"bad URL token",
	"\"-->\"",
	"\"<!--\"",
	"\"}\"",
	"\"]\"",
	"\")\"",
	"\":\"",
	"\",\"",
	"delimiter",
	"\"&\"",
	"\"*\"",
	"\"|\"",
	"\"^\"",
	"\"$\"",
	"\".\"",
	"\"=\"",
	"\"!\"",
	"\">\"",
	"\"+\"",
	"\"/\"",
	"\"~\"",
	"dimension",
	"function token",
	"hash token",
	"identifier",
	"number",
	"\"{\"",
	"\"[\"",
	"\"(\"",
	"percentage",
	"\";\"",
	"string token",
	"URL token",
	"whitespace",
}

func ( T) () string {
	return tokenToString[]
}
This token struct is designed to be memory-efficient. It just references a range in the input file instead of directly containing the substring of text since a range takes up less memory than a string.
type Token struct {
	Range      logger.Range // 8 bytes
	UnitOffset uint16       // 2 bytes
	Kind       T            // 1 byte
	IsID       bool         // 1 byte
}

func ( Token) ( string) string {
	 := [.Range.Loc.Start:.Range.End()]

	switch .Kind {
	case TIdent, TDimension:
		return decodeEscapesInToken()

	case TAtKeyword, THash:
		return decodeEscapesInToken([1:])

	case TFunction:
		return decodeEscapesInToken([:len()-1])

	case TString:
		return decodeEscapesInToken([1 : len()-1])

	case TURL:
		 := 4
		 := len() - 1
Trim leading and trailing whitespace
		for  <  && isWhitespace(rune([])) {
			++
		}
		for  <  && isWhitespace(rune([-1])) {
			--
		}

		return decodeEscapesInToken([:])
	}

	return 
}

type lexer struct {
	log       logger.Log
	source    logger.Source
	current   int
	codePoint rune
	Token     Token
}

func ( logger.Log,  logger.Source) ( []Token) {
	 := lexer{
		log:    ,
		source: ,
	}
	.step()
The U+FEFF character is usually a zero-width non-breaking space. However, when it's used at the start of a text stream it is called a BOM (byte order mark) instead and indicates that the text stream is UTF-8 encoded. This is problematic for us because CSS does not treat U+FEFF as whitespace. Only " \t\r\n\f" characters are treated as whitespace. Skip over the BOM if it is present so it doesn't cause us trouble when we try to parse it.
	if .codePoint == '\uFEFF' {
		.step()
	}

	.next()
	for .Token.Kind != TEndOfFile {
		 = append(, .Token)
		.next()
	}
	return
}

func ( *lexer) () {
	,  := utf8.DecodeRuneInString(.source.Contents[.current:])
Use -1 to indicate the end of the file
	if  == 0 {
		 = eof
	}

	.codePoint = 
	.Token.Range.Len = int32(.current) - .Token.Range.Loc.Start
	.current += 
}

Reference: https://www.w3.org/TR/css-syntax-3/

	for {
		.Token = Token{Range: logger.Range{Loc: logger.Loc{Start: .Token.Range.End()}}}

		switch .codePoint {
		case eof:
			.Token.Kind = TEndOfFile

		case '/':
			.step()
			switch .codePoint {
			case '*':
				.step()
				.consumeToEndOfMultiLineComment(.Token.Range)
				continue
			case '/':
				.step()
				.consumeToEndOfSingleLineComment()
				continue
			}
			.Token.Kind = TDelimSlash

		case ' ', '\t', '\n', '\r', '\f':
			.step()
			for {
				if isWhitespace(.codePoint) {
					.step()
				} else if .codePoint == '/' && .current < len(.source.Contents) && .source.Contents[.current] == '*' {
					 := logger.Range{Loc: logger.Loc{Start: .Token.Range.End()}, Len: 2}
					.step()
					.step()
					.consumeToEndOfMultiLineComment()
				} else {
					break
				}
			}
			.Token.Kind = TWhitespace

		case '"', '\'':
			.Token.Kind = .consumeString()

		case '#':
			.step()
			if IsNameContinue(.codePoint) || .isValidEscape() {
				.Token.Kind = THash
				if .wouldStartIdentifier() {
					.Token.IsID = true
				}
				.consumeName()
			} else {
				.Token.Kind = TDelim
			}

		case '(':
			.step()
			.Token.Kind = TOpenParen

		case ')':
			.step()
			.Token.Kind = TCloseParen

		case '[':
			.step()
			.Token.Kind = TOpenBracket

		case ']':
			.step()
			.Token.Kind = TCloseBracket

		case '{':
			.step()
			.Token.Kind = TOpenBrace

		case '}':
			.step()
			.Token.Kind = TCloseBrace

		case ',':
			.step()
			.Token.Kind = TComma

		case ':':
			.step()
			.Token.Kind = TColon

		case ';':
			.step()
			.Token.Kind = TSemicolon

		case '+':
			if .wouldStartNumber() {
				.Token.Kind = .consumeNumeric()
			} else {
				.step()
				.Token.Kind = TDelimPlus
			}

		case '.':
			if .wouldStartNumber() {
				.Token.Kind = .consumeNumeric()
			} else {
				.step()
				.Token.Kind = TDelimDot
			}

		case '-':
			if .wouldStartNumber() {
				.Token.Kind = .consumeNumeric()
			} else if .current+2 <= len(.source.Contents) && .source.Contents[.current:.current+2] == "->" {
				.step()
				.step()
				.step()
				.Token.Kind = TCDC
			} else if .wouldStartIdentifier() {
				.consumeName()
				.Token.Kind = TIdent
			} else {
				.step()
				.Token.Kind = TDelim
			}

		case '<':
			if .current+3 <= len(.source.Contents) && .source.Contents[.current:.current+3] == "!--" {
				.step()
				.step()
				.step()
				.step()
				.Token.Kind = TCDO
			} else {
				.step()
				.Token.Kind = TDelim
			}

		case '@':
			.step()
			if .wouldStartIdentifier() {
				.consumeName()
				.Token.Kind = TAtKeyword
			} else {
				.Token.Kind = TDelim
			}

		case '\\':
			if .isValidEscape() {
				.Token.Kind = .consumeIdentLike()
			} else {
				.step()
				.log.AddRangeError(&.source, .Token.Range, "Invalid escape")
				.Token.Kind = TDelim
			}

		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
			.Token.Kind = .consumeNumeric()

		case '>':
			.step()
			.Token.Kind = TDelimGreaterThan

		case '~':
			.step()
			.Token.Kind = TDelimTilde

		case '&':
			.step()
			.Token.Kind = TDelimAmpersand

		case '*':
			.step()
			.Token.Kind = TDelimAsterisk

		case '|':
			.step()
			.Token.Kind = TDelimBar

		case '!':
			.step()
			.Token.Kind = TDelimExclamation

		case '=':
			.step()
			.Token.Kind = TDelimEquals

		case '^':
			.step()
			.Token.Kind = TDelimCaret

		case '$':
			.step()
			.Token.Kind = TDelimDollar

		default:
			if IsNameStart(.codePoint) {
				.Token.Kind = .consumeIdentLike()
			} else {
				.step()
				.Token.Kind = TDelim
			}
		}

		return
	}
}

func ( *lexer) ( logger.Range) {
	for {
		switch .codePoint {
		case '*':
			.step()
			if .codePoint == '/' {
				.step()
				return
			}

		case eof: // This indicates the end of the file
			.log.AddErrorWithNotes(&.source, logger.Loc{Start: .Token.Range.End()}, "Expected \"*/\" to terminate multi-line comment",
				[]logger.MsgData{logger.RangeData(&.source, , "The multi-line comment starts here")})
			return

		default:
			.step()
		}
	}
}

func ( *lexer) () {
	for !isNewline(.codePoint) && .codePoint != eof {
		.step()
	}
	.log.AddRangeWarning(&.source, .Token.Range, "Comments in CSS use \"/* ... */\" instead of \"//\"")
}

func ( *lexer) () bool {
	if .codePoint != '\\' {
		return false
	}
	,  := utf8.DecodeRuneInString(.source.Contents[.current:])
	return !isNewline()
}

func ( *lexer) () bool {
	if IsNameStart(.codePoint) {
		return true
	}

	if .codePoint == '-' {
		,  := utf8.DecodeRuneInString(.source.Contents[.current:])
		if IsNameStart() ||  == '-' {
			return true
		}
		if  == '\\' {
			, _ = utf8.DecodeRuneInString(.source.Contents[.current+:])
			return !isNewline()
		}
		return false
	}

	return .isValidEscape()
}

func ( string) bool {
	if len() > 0 {
		,  := utf8.DecodeRuneInString()
		if IsNameStart() {
			return true
		} else if  == '-' {
			if ,  := utf8.DecodeRuneInString([:]); IsNameStart() ||  == '-' {
				return true
			}
		}
	}
	return false
}

func ( *lexer) () bool {
	if .codePoint >= '0' && .codePoint <= '9' {
		return true
	} else if .codePoint == '.' {
		 := .source.Contents
		if .current < len() {
			 := [.current]
			return  >= '0' &&  <= '9'
		}
	} else if .codePoint == '+' || .codePoint == '-' {
		 := .source.Contents
		 := len()
		if .current <  {
			 := [.current]
			if  >= '0' &&  <= '9' {
				return true
			}
			if  == '.' && .current+1 <  {
				 = [.current+1]
				return  >= '0' &&  <= '9'
			}
		}
	}
	return false
}

Common case: no escapes, identifier is a substring of the input
	for IsNameContinue(.codePoint) {
		.step()
	}
	 := .source.Contents[.Token.Range.Loc.Start:.Token.Range.End()]
	if !.isValidEscape() {
		return 
	}
Uncommon case: escapes, identifier is allocated
	 := strings.Builder{}
	.WriteString()
	.WriteRune(.consumeEscape())
	for {
		if IsNameContinue(.codePoint) {
			.WriteRune(.codePoint)
			.step()
		} else if .isValidEscape() {
			.WriteRune(.consumeEscape())
		} else {
			break
		}
	}
	return .String()
}

func ( *lexer) () rune {
	.step() // Skip the backslash
	 := .codePoint

	if ,  := isHex();  {
		.step()
		for  := 0;  < 5; ++ {
			if ,  := isHex(.codePoint);  {
				.step()
				 = *16 + 
			} else {
				break
			}
		}
		if isWhitespace(.codePoint) {
			.step()
		}
		if  == 0 || ( >= 0xD800 &&  <= 0xDFFF) ||  > 0x10FFFF {
			return replacementCharacter
		}
		return rune()
	}

	if  == eof {
		return replacementCharacter
	}

	.step()
	return 
}

func ( *lexer) () T {
	 := .consumeName()

	if .codePoint == '(' {
		.step()
		if len() == 3 {
			, ,  := [0], [1], [2]
			if ( == 'u' ||  == 'U') && ( == 'r' ||  == 'R') && ( == 'l' ||  == 'L') {
				for isWhitespace(.codePoint) {
					.step()
				}
				if .codePoint != '"' && .codePoint != '\'' {
					return .consumeURL()
				}
			}
		}
		return TFunction
	}

	return TIdent
}

func ( *lexer) () T {
:
	for {
		switch .codePoint {
		case ')':
			.step()
			return TURL

		case eof:
			 := logger.Loc{Start: .Token.Range.End()}
			.log.AddError(&.source, , "Expected \")\" to end URL token")
			return TBadURL

		case ' ', '\t', '\n', '\r', '\f':
			.step()
			for isWhitespace(.codePoint) {
				.step()
			}
			if .codePoint != ')' {
				 := logger.Loc{Start: .Token.Range.End()}
				.log.AddError(&.source, , "Expected \")\" to end URL token")
				break 
			}
			.step()
			return TURL

		case '"', '\'', '(':
			 := logger.Range{Loc: logger.Loc{Start: .Token.Range.End()}, Len: 1}
			.log.AddRangeError(&.source, , "Expected \")\" to end URL token")
			break 

		case '\\':
			if !.isValidEscape() {
				 := logger.Range{Loc: logger.Loc{Start: .Token.Range.End()}, Len: 1}
				.log.AddRangeError(&.source, , "Invalid escape")
				break 
			}
			.consumeEscape()

		default:
			if isNonPrintable(.codePoint) {
				 := logger.Range{Loc: logger.Loc{Start: .Token.Range.End()}, Len: 1}
				.log.AddRangeError(&.source, , "Unexpected non-printable character in URL token")
			}
			.step()
		}
	}
Consume the remnants of a bad url
	for {
		switch .codePoint {
		case ')', eof:
			.step()
			return TBadURL

		case '\\':
			if .isValidEscape() {
				.consumeEscape()
			}
		}
		.step()
	}
}

func ( *lexer) () T {
	 := .codePoint
	.step()

	for {
		switch .codePoint {
		case '\\':
			.step()
Handle Windows CRLF
			if .codePoint == '\r' {
				.step()
				if .codePoint == '\n' {
					.step()
				}
				continue
			}
Otherwise, fall through to ignore the character after the backslash

		case eof:
			.log.AddError(&.source, logger.Loc{Start: .Token.Range.End()}, "Unterminated string token")
			return TBadString

		case '\n', '\r', '\f':
			.log.AddError(&.source, logger.Loc{Start: .Token.Range.End()}, "Unterminated string token")
			return TBadString

		case :
			.step()
			return TString
		}
		.step()
	}
}

Skip over leading sign
	if .codePoint == '+' || .codePoint == '-' {
		.step()
	}
Skip over leading digits
	for .codePoint >= '0' && .codePoint <= '9' {
		.step()
	}
Skip over digits after dot
	if .codePoint == '.' {
		.step()
		for .codePoint >= '0' && .codePoint <= '9' {
			.step()
		}
	}
Skip over exponent
	if .codePoint == 'e' || .codePoint == 'E' {
		 := .source.Contents
Look ahead before advancing to make sure this is an exponent, not a unit
		if .current < len() {
			 := [.current]
			if ( == '+' ||  == '-') && .current+1 < len() {
				 = [.current+1]
			}
Only consume this if it's an exponent
			if  >= '0' &&  <= '9' {
				.step()
				if .codePoint == '+' || .codePoint == '-' {
					.step()
				}
				for .codePoint >= '0' && .codePoint <= '9' {
					.step()
				}
			}
		}
	}
Determine the numeric type
	if .wouldStartIdentifier() {
		.Token.UnitOffset = uint16(.Token.Range.Len)
		.consumeName()
		return TDimension
	}
	if .codePoint == '%' {
		.step()
		return TPercentage
	}
	return TNumber
}

func ( rune) bool {
	return ( >= 'a' &&  <= 'z') || ( >= 'A' &&  <= 'Z') ||  == '_' ||  >= 0x80
}

func ( rune) bool {
	return IsNameStart() || ( >= '0' &&  <= '9') ||  == '-'
}

func ( rune) bool {
	switch  {
	case '\n', '\r', '\f':
		return true
	}
	return false
}

func ( rune) bool {
	switch  {
	case ' ', '\t', '\n', '\r', '\f':
		return true
	}
	return false
}

func ( rune) (int, bool) {
	if  >= '0' &&  <= '9' {
		return int( - '0'), true
	}
	if  >= 'a' &&  <= 'f' {
		return int( + (10 - 'a')), true
	}
	if  >= 'A' &&  <= 'F' {
		return int( + (10 - 'A')), true
	}
	return 0, false
}

func ( rune) bool {
	return  <= 0x08 ||  == 0x0B || ( >= 0x0E &&  <= 0x1F) ||  == 0x7F
}

func ( string) string {
	 := 0

	for  < len() {
		if [] == '\\' {
			break
		}
		++
	}

	if  == len() {
		return 
	}

	 := strings.Builder{}
	.WriteString([:])
	 = [:]

	for len() > 0 {
		,  := utf8.DecodeRuneInString()
		 = [:]

		if  != '\\' {
			.WriteRune()
			continue
		}

		if len() == 0 {
			.WriteRune(replacementCharacter)
			continue
		}

		,  = utf8.DecodeRuneInString()
		 = [:]
		,  := isHex()

		if ! {
			if  == '\n' ||  == '\f' {
				continue
			}
Handle Windows CRLF
			if  == '\r' {
				,  = utf8.DecodeRuneInString()
				if  == '\n' {
					 = [:]
				}
				continue
			}
If we get here, this is not a valid escape. However, this is still allowed. In this case the backslash is just ignored.
			.WriteRune()
			continue
		}
Parse up to five additional hex characters (so six in total)
		for  := 0;  < 5 && len() > 0; ++ {
			,  = utf8.DecodeRuneInString()
			if ,  := isHex();  {
				 = [:]
				 = *16 + 
			} else {
				break
			}
		}

		if len() > 0 {
			,  = utf8.DecodeRuneInString()
			if isWhitespace() {
				 = [:]
			}
		}

		if  == 0 || ( >= 0xD800 &&  <= 0xDFFF) ||  > 0x10FFFF {
			.WriteRune(replacementCharacter)
			continue
		}

		.WriteRune(rune())
	}

	return .String()