Source File
parse.go
Belonging Package
regexp/syntax
package syntax
import (
)
ErrInternalError ErrorCode = "regexp/syntax: internal error"
ErrInvalidCharClass ErrorCode = "invalid character class"
ErrInvalidCharRange ErrorCode = "invalid character class range"
ErrInvalidEscape ErrorCode = "invalid escape sequence"
ErrInvalidNamedCapture ErrorCode = "invalid named capture"
ErrInvalidPerlOp ErrorCode = "invalid or unsupported Perl syntax"
ErrInvalidRepeatOp ErrorCode = "invalid nested repetition operator"
ErrInvalidRepeatSize ErrorCode = "invalid repeat count"
ErrInvalidUTF8 ErrorCode = "invalid UTF-8"
ErrMissingBracket ErrorCode = "missing closing ]"
ErrMissingParen ErrorCode = "missing closing )"
ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"
ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression"
ErrUnexpectedParen ErrorCode = "unexpected )"
)
func ( ErrorCode) () string {
return string()
}
type Flags uint16
const (
FoldCase Flags = 1 << iota // case-insensitive match
Literal // treat pattern as literal string
ClassNL // allow character classes like [^a-z] and [[:space:]] to match newline
DotNL // allow . to match newline
OneLine // treat ^ and $ as only matching at beginning and end of text
NonGreedy // make repetition operators default to non-greedy
PerlX // allow Perl extensions
UnicodeGroups // allow \p{Han}, \P{Han} for Unicode group and negation
WasDollar // regexp OpEndText was $, not \z
Simple // regexp contains no counted repetition
MatchNL = ClassNL | DotNL
Perl = ClassNL | OneLine | PerlX | UnicodeGroups // as close to Perl as possible
POSIX Flags = 0 // POSIX syntax
)
const (
opLeftParen = opPseudo + iota
opVerticalBar
)
type parser struct {
flags Flags // parse mode flags
stack []*Regexp // stack of parsed expressions
free *Regexp
numCap int // number of capturing groups seen
wholeRegexp string
tmpClass []rune // temporary char class work space
}
func ( *parser) ( Op) *Regexp {
:= .free
if != nil {
.free = .Sub0[0]
* = Regexp{}
} else {
= new(Regexp)
}
.Op =
return
}
func ( *parser) ( *Regexp) {
.Sub0[0] = .free
.free =
}
if .maybeConcat(.Rune[0], .flags&^FoldCase) {
return nil
}
.Op = OpLiteral
.Rune = .Rune[:1]
.Flags = .flags &^ FoldCase
} else if .Op == OpCharClass && len(.Rune) == 4 &&
.Rune[0] == .Rune[1] && .Rune[2] == .Rune[3] &&
unicode.SimpleFold(.Rune[0]) == .Rune[2] &&
unicode.SimpleFold(.Rune[2]) == .Rune[0] ||
.Op == OpCharClass && len(.Rune) == 2 &&
.Rune[0]+1 == .Rune[1] &&
unicode.SimpleFold(.Rune[0]) == .Rune[1] &&
if .maybeConcat(.Rune[0], .flags|FoldCase) {
return nil
}
.maybeConcat(-1, 0)
}
.stack = append(.stack, )
return
}
func ( rune) rune {
if < minFold || > maxFold {
return
}
:=
:=
for = unicode.SimpleFold(); != ; = unicode.SimpleFold() {
if > {
=
}
}
return
}
return "", &Error{ErrInvalidRepeatOp, [:len()-len()]}
}
}
:= len(.stack)
if == 0 {
return "", &Error{ErrMissingRepeatArgument, [:len()-len()]}
}
:= .stack[-1]
if .Op >= opPseudo {
return "", &Error{ErrMissingRepeatArgument, [:len()-len()]}
}
:= .newRegexp()
.Min =
.Max =
.Flags =
.Sub = .Sub0[:1]
.Sub[0] =
.stack[-1] =
if == OpRepeat && ( >= 2 || >= 2) && !repeatIsValid(, 1000) {
return "", &Error{ErrInvalidRepeatSize, [:len()-len()]}
}
return , nil
}
func ( *parser) () *Regexp {
.maybeConcat(-1, 0)
func ( *Regexp) {
switch .Op {
case OpCharClass:
.Rune = cleanClass(&.Rune)
if len(.Rune) == 2 && .Rune[0] == 0 && .Rune[1] == unicode.MaxRune {
.Rune = nil
.Op = OpAnyChar
return
}
if len(.Rune) == 4 && .Rune[0] == 0 && .Rune[1] == '\n'-1 && .Rune[2] == '\n'+1 && .Rune[3] == unicode.MaxRune {
.Rune = nil
.Op = OpAnyCharNotNL
return
}
func ( *parser) ( []*Regexp, Op) *Regexp {
if len() == 1 {
return [0]
}
:= .newRegexp()
.Sub = .Sub0[:0]
for , := range {
if .Op == {
.Sub = append(.Sub, .Sub...)
.reuse()
} else {
.Sub = append(.Sub, )
}
}
if == OpAlternate {
.Sub = .factor(.Sub)
if len(.Sub) == 1 {
:=
= .Sub[0]
.reuse()
}
}
return
}
= [:]
continue
}
}
}
= append(, [])
=
=
=
}
=
= 0
= [:0]
var *Regexp
var *Regexp
if < len() {
= .leadingRegexp([])
(isCharClass() || (.Op == OpRepeat && .Min == .Max && isCharClass(.Sub[0]))) {
continue
}
}
= append(, [])
:=
for := ; < ; ++ {
:= != // prefix came from sub[start]
[] = .removeLeadingRegexp([], )
}
:= .collapse([:], OpAlternate) // recurse
:= .newRegexp(OpConcat)
.Sub = append(.Sub[:0], , )
= append(, )
}
=
=
}
=
= 0
= [:0]
if < len() && isCharClass([]) {
continue
}
} else if == +1 {
= append(, [])
= 0
= [:0]
for := range {
if +1 < len() && [].Op == OpEmptyMatch && [+1].Op == OpEmptyMatch {
continue
}
= append(, [])
}
=
return
}
func ( *parser) ( *Regexp, bool) *Regexp {
if .Op == OpConcat && len(.Sub) > 0 {
if {
.reuse(.Sub[0])
}
.Sub = .Sub[:copy(.Sub, .Sub[1:])]
switch len(.Sub) {
case 0:
.Op = OpEmptyMatch
.Sub = nil
case 1:
:=
= .Sub[0]
.reuse()
}
return
}
if {
.reuse()
}
return .newRegexp(OpEmptyMatch)
}
func ( string, Flags) *Regexp {
:= &Regexp{Op: OpLiteral}
.Flags =
.Rune = .Rune0[:0] // use local storage for small strings
for , := range {
if := checkUTF8(); != nil {
return nil,
}
return literalRegexp(, ), nil
}
if , = .parsePerlFlags(); != nil {
return nil,
}
break
}
.numCap++
.op(opLeftParen).Cap = .numCap
= [1:]
case '|':
if = .parseVerticalBar(); != nil {
return nil,
}
= [1:]
case ')':
if = .parseRightParen(); != nil {
return nil,
}
= [1:]
case '^':
if .flags&OneLine != 0 {
.op(OpBeginText)
} else {
.op(OpBeginLine)
}
= [1:]
case '$':
if .flags&OneLine != 0 {
.op(OpEndText).Flags |= WasDollar
} else {
.op(OpEndLine)
}
= [1:]
case '.':
if .flags&DotNL != 0 {
.op(OpAnyChar)
} else {
.op(OpAnyCharNotNL)
}
= [1:]
case '[':
if , = .parseClass(); != nil {
return nil,
}
case '*', '+', '?':
:=
switch [0] {
case '*':
= OpStar
case '+':
= OpPlus
case '?':
= OpQuest
}
:= [1:]
if , = .repeat(, 0, 0, , , ); != nil {
return nil,
}
=
=
case '{':
= OpRepeat
:=
, , , := .parseRepeat()
.literal('{')
= [1:]
break
}
return nil, &Error{ErrInvalidRepeatSize, [:len()-len()]}
}
if , = .repeat(, , , , , ); != nil {
return nil,
}
=
=
case '\\':
if .flags&PerlX != 0 && len() >= 2 {
switch [1] {
case 'A':
.op(OpBeginText)
= [2:]
break
case 'b':
.op(OpWordBoundary)
= [2:]
break
case 'B':
.op(OpNoWordBoundary)
= [2:]
break
return nil, &Error{ErrInvalidEscape, [:2]}
if , , = .parseEscape(); != nil {
return nil,
}
.literal()
}
=
}
.concat()
= -1
}
}
if == "" || [0] != '}' {
return
}
= [1:]
= true
return
}
:= strings.IndexRune(, '>')
if < 0 {
if = checkUTF8(); != nil {
return "",
}
return "", &Error{ErrInvalidNamedCapture, }
}
:= [:+1] // "(?P<name>"
:= [4:] // "name"
if = checkUTF8(); != nil {
return "",
}
if !isValidCaptureName() {
return "", &Error{ErrInvalidNamedCapture, }
}
case '-':
if < 0 {
break
}
= ^
= false
case ':', ')':
if < 0 {
if ! {
break
}
= ^
}
.op(opLeftParen)
}
.flags =
return , nil
}
}
return "", &Error{ErrInvalidPerlOp, [:len()-len()]}
}
if len() >= 2 && [0] == '0' && '0' <= [1] && [1] <= '9' {
return
}
:=
for != "" && '0' <= [0] && [0] <= '9' {
= [1:]
}
=
if >= 1e8 {
= -1
break
}
= *10 + int([]) - '0'
}
return
}
if !.swapVerticalBar() {
.op(opVerticalBar)
}
return nil
}
:= len(.stack)
if >= 3 && .stack[-2].Op == opVerticalBar && isCharClass(.stack[-1]) && isCharClass(.stack[-3]) {
:= .stack[-1]
.stack = .stack[:len(.stack)-1]
}
.alternate()
:= len(.stack)
if < 2 {
return &Error{ErrUnexpectedParen, .wholeRegexp}
}
:= .stack[-1]
:= .stack[-2]
.stack = .stack[:-2]
if .Op != opLeftParen {
return &Error{ErrUnexpectedParen, .wholeRegexp}
return , , nil
}
if == "" || [0] < '0' || [0] > '7' {
break
}
fallthrough
case 'a':
return '\a', ,
case 'f':
return '\f', ,
case 'n':
return '\n', ,
case 'r':
return '\r', ,
case 't':
return '\t', ,
case 'v':
return '\v', ,
}
return 0, "", &Error{ErrInvalidEscape, [:len()-len()]}
}
func ( *parser) ( string, []rune) ( []rune, string, error) {
if len() < 2 || [0] != '[' || [1] != ':' {
return
}
:= strings.Index([2:], ":]")
if < 0 {
return
}
+= 2
, := [0:+2], [+2:]
:= posixGroup[]
if .sign == 0 {
return nil, "", &Error{ErrInvalidCharRange, }
}
return .appendGroup(, ), , nil
}
func ( *parser) ( []rune, charGroup) []rune {
if .flags&FoldCase == 0 {
if .sign < 0 {
= appendNegatedClass(, .class)
} else {
= appendClass(, .class)
}
} else {
:= .tmpClass[:0]
= appendFoldedClass(, .class)
.tmpClass =
= cleanClass(&.tmpClass)
if .sign < 0 {
= appendNegatedClass(, )
} else {
= appendClass(, )
}
}
return
}
var anyTable = &unicode.RangeTable{
R16: []unicode.Range16{{Lo: 0, Hi: 1<<16 - 1, Stride: 1}},
R32: []unicode.Range32{{Lo: 1 << 16, Hi: unicode.MaxRune, Stride: 1}},
}
if == "Any" {
return anyTable, anyTable
}
if := unicode.Categories[]; != nil {
return , unicode.FoldCategory[]
}
if := unicode.Scripts[]; != nil {
return , unicode.FoldScript[]
}
return nil, nil
}
if != "" && [0] == '^' {
= -
= [1:]
}
, := unicodeTable()
if == nil {
return nil, "", &Error{ErrInvalidCharRange, }
}
if .flags&FoldCase == 0 || == nil {
if > 0 {
= appendTable(, )
} else {
= appendNegatedTable(, )
}
:= .tmpClass[:0]
= appendTable(, )
= appendTable(, )
.tmpClass =
= cleanClass(&.tmpClass)
if > 0 {
= appendClass(, )
} else {
= appendNegatedClass(, )
}
}
return , , nil
}
if != "" && [0] == '-' && .flags&PerlX == 0 && ! && (len() == 1 || [1] != ']') {
, := utf8.DecodeRuneInString([1:])
return "", &Error{Code: ErrInvalidCharRange, Expr: [:1+]}
}
= false
if len() > 2 && [0] == '[' && [1] == ':' {
, , := .parseNamedClass(, )
if != nil {
return "",
}
if != nil {
, = ,
continue
}
}
, , := .parseUnicodeClass(, )
if != nil {
return "",
}
if != nil {
, = ,
continue
}
if , := .parsePerlClassEscape(, ); != nil {
, = ,
continue
}
:=
var , rune
if , , = .parseClassChar(, ); != nil {
return "",
}
if len() >= 2 && [0] == '-' && [1] != ']' {
= [1:]
if , , = .parseClassChar(, ); != nil {
return "",
}
if < {
= [:len()-len()]
return "", &Error{Code: ErrInvalidCharRange, Expr: }
}
}
if .flags&FoldCase == 0 {
= appendRange(, , )
} else {
= appendFoldedRange(, , )
}
}
= [1:] // chop ]
.Rune =
= cleanClass(&.Rune)
if < 0 {
= negateClass()
}
.Rune =
.push()
return , nil
}
:= 2 // write index
for := 2; < len(); += 2 {
, := [], [+1]
if > [-1] {
[-1] =
}
continue
[] =
[+1] =
+= 2
}
return [:]
}
func ( []rune, rune, Flags) []rune {
if &FoldCase != 0 {
return appendFoldedRange(, , )
}
return appendRange(, , )
}
return appendRange(, , )
}
return appendRange(, , )
}
= appendRange(, , minFold-1)
= minFold
}
= appendRange(, maxFold+1, )
= maxFold
}
for := ; <= ; ++ {
= appendRange(, , )
:= unicode.SimpleFold()
for != {
= appendRange(, , )
= unicode.SimpleFold()
}
}
return
}
func ( []rune, []rune) []rune {
for := 0; < len(); += 2 {
= appendRange(, [], [+1])
}
return
}
func ( []rune, []rune) []rune {
for := 0; < len(); += 2 {
= appendFoldedRange(, [], [+1])
}
return
}
func ( []rune, []rune) []rune {
:= '\u0000'
for := 0; < len(); += 2 {
, := [], [+1]
if <= -1 {
= appendRange(, , -1)
}
= + 1
}
if <= unicode.MaxRune {
= appendRange(, , unicode.MaxRune)
}
return
}
func ( []rune, *unicode.RangeTable) []rune {
for , := range .R16 {
, , := rune(.Lo), rune(.Hi), rune(.Stride)
if == 1 {
= appendRange(, , )
continue
}
for := ; <= ; += {
= appendRange(, , )
}
}
for , := range .R32 {
, , := rune(.Lo), rune(.Hi), rune(.Stride)
if == 1 {
= appendRange(, , )
continue
}
for := ; <= ; += {
= appendRange(, , )
}
}
return
}
func ( []rune, *unicode.RangeTable) []rune {
:= '\u0000' // lo end of next class to add
for , := range .R16 {
, , := rune(.Lo), rune(.Hi), rune(.Stride)
if == 1 {
if <= -1 {
= appendRange(, , -1)
}
= + 1
continue
}
for := ; <= ; += {
if <= -1 {
= appendRange(, , -1)
}
= + 1
}
}
for , := range .R32 {
, , := rune(.Lo), rune(.Hi), rune(.Stride)
if == 1 {
if <= -1 {
= appendRange(, , -1)
}
= + 1
continue
}
for := ; <= ; += {
if <= -1 {
= appendRange(, , -1)
}
= + 1
}
}
if <= unicode.MaxRune {
= appendRange(, , unicode.MaxRune)
}
return
}
type ranges struct {
p *[]rune
}
func ( ranges) (, int) bool {
:= *.p
*= 2
*= 2
return [] < [] || [] == [] && [+1] > [+1]
}
func ( ranges) () int {
return len(*.p) / 2
}
func ( ranges) (, int) {
:= *.p
*= 2
*= 2
[], [+1], [], [+1] = [], [+1], [], [+1]
}
func ( string) error {
for != "" {
, := utf8.DecodeRuneInString()
if == utf8.RuneError && == 1 {
return &Error{Code: ErrInvalidUTF8, Expr: }
}
= [:]
}
return nil
}
func ( string) ( rune, string, error) {
, := utf8.DecodeRuneInString()
if == utf8.RuneError && == 1 {
return 0, "", &Error{Code: ErrInvalidUTF8, Expr: }
}
return , [:], nil
}
func ( rune) bool {
return '0' <= && <= '9' || 'A' <= && <= 'Z' || 'a' <= && <= 'z'
}
func ( rune) rune {
if '0' <= && <= '9' {
return - '0'
}
if 'a' <= && <= 'f' {
return - 'a' + 10
}
if 'A' <= && <= 'F' {
return - 'A' + 10
}
return -1
![]() |
The pages are generated with Golds v0.3.2-preview. (GOOS=darwin GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds. |