Source File
scanner.go
Belonging Package
go/scanner
package scanner
import (
)
type ErrorHandler func(pos token.Position, msg string)
ch rune // current character
offset int // character offset
rdOffset int // reading offset (position after current character)
lineOffset int // current line offset
insertSemi bool // insert a semicolon before next newline
ErrorCount int // number of errors encountered
}
const bom = 0xFEFF // byte order mark, only permitted as very first character
, = utf8.DecodeRune(.src[.rdOffset:])
if == utf8.RuneError && == 1 {
.error(.offset, "illegal UTF-8 encoding")
} else if == bom && .offset > 0 {
.error(.offset, "illegal byte order mark")
}
}
.rdOffset +=
.ch =
} else {
.offset = len(.src)
if .ch == '\n' {
.lineOffset = .offset
.file.AddLine(.offset)
}
.ch = -1 // eof
}
}
type Mode uint
const (
ScanComments Mode = 1 << iota // return comments as COMMENT tokens
dontInsertSemis // do not automatically insert semicolons - for testing only
)
if .Size() != len() {
panic(fmt.Sprintf("file size (%d) does not match src len (%d)", .Size(), len()))
}
.file =
.dir, _ = filepath.Split(.Name())
.src =
.err =
.mode =
.ch = ' '
.offset = 0
.rdOffset = 0
.lineOffset = 0
.insertSemi = false
.ErrorCount = 0
.next()
if .ch == bom {
.next() // ignore BOM at file beginning
}
}
func ( *Scanner) ( int, string) {
if .err != nil {
.err(.file.Position(.file.Pos()), )
}
.ErrorCount++
}
func ( *Scanner) ( int, string, ...interface{}) {
.error(, fmt.Sprintf(, ...))
}
:= .offset - 1 // position of initial '/'
:= -1 // position immediately following the comment; < 0 means invalid comment
:= 0
if >= 0 /* implies valid comment */ && ([1] == '*' || == .lineOffset) && bytes.HasPrefix([2:], prefix) {
.updateLineInfo(, , )
}
if > 0 {
= stripCR(, [1] == '*')
}
return string()
}
var prefix = []byte("line ")
if [1] == '*' {
= [:len()-2] // lop off trailing "*/"
}
= [7:] // lop off leading "//line " or "/*line "
+= 7
, , := trailingDigits()
if == 0 {
return // ignore (not a line directive)
.error(+, "invalid line number: "+string([:]))
return
}
var , int
, , := trailingDigits([:-1])
return true
return false
}
.next() // consume '/'
}
return false
}
func ( rune) bool {
return 'a' <= lower() && lower() <= 'z' || == '_' || >= utf8.RuneSelf && unicode.IsLetter()
}
func ( rune) bool {
return isDecimal() || >= utf8.RuneSelf && unicode.IsDigit()
}
func ( *Scanner) () string {
:= .offset
for isLetter(.ch) || isDigit(.ch) {
.next()
}
return string(.src[:.offset])
}
func ( rune) int {
switch {
case '0' <= && <= '9':
return int( - '0')
case 'a' <= lower() && lower() <= 'f':
return int(lower() - 'a' + 10)
}
return 16 // larger than any legal digit val
}
func ( rune) rune { return ('a' - 'A') | } // returns lower-case ch iff ch is ASCII letter
func ( rune) bool { return '0' <= && <= '9' }
func ( rune) bool { return '0' <= && <= '9' || 'a' <= lower() && lower() <= 'f' }
func ( *Scanner) ( int, *int) ( int) {
if <= 10 {
:= rune('0' + )
for isDecimal(.ch) || .ch == '_' {
:= 1
if .ch == '_' {
= 2
} else if .ch >= && * < 0 {
* = int(.offset) // record invalid rune offset
}
|=
.next()
}
} else {
for isHex(.ch) || .ch == '_' {
:= 1
if .ch == '_' {
= 2
}
|=
.next()
}
}
return
}
func ( *Scanner) () (token.Token, string) {
:= .offset
:= token.ILLEGAL
:= 10 // number base
:= rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
:= 0 // bit 0: digit present, bit 1: '_' present
:= -1 // index of invalid digit in literal, or < 0
if := lower(.ch); == 'e' || == 'p' {
switch {
case == 'e' && != 0 && != '0':
.errorf(.offset, "%q exponent requires decimal mantissa", .ch)
case == 'p' && != 'x':
.errorf(.offset, "%q exponent requires hexadecimal mantissa", .ch)
}
.next()
= token.FLOAT
if .ch == '+' || .ch == '-' {
.next()
}
:= .digits(10, nil)
|=
if &1 == 0 {
.error(.offset, "exponent has no digits")
}
} else if == 'x' && == token.FLOAT {
.error(.offset, "hexadecimal mantissa requires a 'p' exponent")
}
if .ch == 'i' {
= token.IMAG
.next()
}
:= string(.src[:.offset])
if == token.INT && >= 0 {
.errorf(, "invalid digit %q in %s", [-], litname())
}
if &2 != 0 {
if := invalidSep(); >= 0 {
.error(+, "'_' must separate successive digits")
}
}
return ,
}
func ( rune) string {
switch {
case 'x':
return "hexadecimal literal"
case 'o', '0':
return "octal literal"
case 'b':
return "binary literal"
}
return "decimal literal"
}
func ( *Scanner) ( rune) bool {
:= .offset
var int
var , uint32
switch .ch {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', :
.next()
return true
case '0', '1', '2', '3', '4', '5', '6', '7':
, , = 3, 8, 255
case 'x':
.next()
, , = 2, 16, 255
case 'u':
.next()
, , = 4, 16, unicode.MaxRune
case 'U':
.next()
, , = 8, 16, unicode.MaxRune
default:
:= "unknown escape sequence"
if .ch < 0 {
= "escape sequence not terminated"
}
.error(, )
return false
}
var uint32
for > 0 {
:= uint32(digitVal(.ch))
if >= {
:= fmt.Sprintf("illegal character %#U in escape sequence", .ch)
if .ch < 0 {
= "escape sequence not terminated"
}
.error(.offset, )
return false
}
= * +
.next()
--
}
if > || 0xD800 <= && < 0xE000 {
.error(, "escape sequence is invalid Unicode code point")
return false
}
return true
}
if {
.error(, "rune literal not terminated")
= false
}
break
}
.next()
if == '\'' {
break
}
++
if == '\\' {
if !.scanEscape('\'') {
= false
:= .offset - 1
:= false
for {
:= .ch
if < 0 {
.error(, "raw string literal not terminated")
break
}
.next()
if == '`' {
break
}
if == '\r' {
= true
}
}
:= .src[:.offset]
if {
= stripCR(, false)
}
return string()
}
func ( *Scanner) () {
for .ch == ' ' || .ch == '\t' || .ch == '\n' && !.insertSemi || .ch == '\r' {
.next()
}
}
func ( *Scanner) (, token.Token) token.Token {
if .ch == '=' {
.next()
return
}
return
}
func ( *Scanner) (, token.Token, rune, token.Token) token.Token {
if .ch == '=' {
.next()
return
}
if .ch == {
.next()
return
}
return
}
func ( *Scanner) (, token.Token, rune, , token.Token) token.Token {
if .ch == '=' {
.next()
return
}
if .ch == {
.next()
if .ch == '=' {
.next()
return
}
return
}
return
}
:= false
switch := .ch; {
case isLetter():
= .scanIdentifier()
= token.Lookup()
switch {
case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
= true
}
} else {
= true
= token.IDENT
}
case isDecimal() || == '.' && isDecimal(rune(.peek())):
= true
, = .scanNumber()
default:
.next() // always make progress
switch {
case -1:
if .insertSemi {
.insertSemi = false // EOF consumed
return , token.SEMICOLON, "\n"
}
= token.EOF
= token.PERIOD
if .ch == '.' && .peek() == '.' {
.next()
.next() // consume last '.'
= token.ELLIPSIS
}
case ',':
= token.COMMA
case ';':
= token.SEMICOLON
= ";"
case '(':
= token.LPAREN
case ')':
= true
= token.RPAREN
case '[':
= token.LBRACK
case ']':
= true
= token.RBRACK
case '{':
= token.LBRACE
case '}':
= true
= token.RBRACE
case '+':
= .switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC)
if == token.INC {
= true
}
case '-':
= .switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC)
if == token.DEC {
= true
}
case '*':
= .switch2(token.MUL, token.MUL_ASSIGN)
case '/':
.insertSemi = false // newline consumed
goto
}
= token.COMMENT
=
} else {
= .switch2(token.QUO, token.QUO_ASSIGN)
}
case '%':
= .switch2(token.REM, token.REM_ASSIGN)
case '^':
= .switch2(token.XOR, token.XOR_ASSIGN)
case '<':
if .ch == '-' {
.next()
= token.ARROW
} else {
= .switch4(token.LSS, token.LEQ, '<', token.SHL, token.SHL_ASSIGN)
}
case '>':
= .switch4(token.GTR, token.GEQ, '>', token.SHR, token.SHR_ASSIGN)
case '=':
= .switch2(token.ASSIGN, token.EQL)
case '!':
= .switch2(token.NOT, token.NEQ)
case '&':
if .ch == '^' {
.next()
= .switch2(token.AND_NOT, token.AND_NOT_ASSIGN)
} else {
= .switch3(token.AND, token.AND_ASSIGN, '&', token.LAND)
}
case '|':
= .switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
if != bom {
.errorf(.file.Offset(), "illegal character %#U", )
}
= .insertSemi // preserve insertSemi info
= token.ILLEGAL
= string()
}
}
if .mode&dontInsertSemis == 0 {
.insertSemi =
}
return
![]() |
The pages are generated with Golds v0.3.2-preview. (GOOS=darwin GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds. |