Copyright 2020 The Go Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
License regexp syntax and parsing See LRE doc comment in regexp.go for syntax.

package match

import (
	
	
	
	
	
)
A SyntaxError reports a syntax error during parsing.
type SyntaxError struct {
	File    string
	Offset  int
	Context string
	Err     string
}

func ( *SyntaxError) () string {
	var  string
	if .File != "" {
		 = fmt.Sprintf("%s:#%d: syntax error", .File, .Offset)
	} else {
		 = fmt.Sprintf("syntax error at offset %d", .Offset)
	}
	if .Context != "" {
		 += " near `" + .Context + "`"
	}
	 += ": " + .Err
	return 
}
reSyntaxError returns a *SyntaxError with context.
func ( string,  int,  error) error {
	 := [:]
	 :=  - 10
	for  >= 0 && [] != ' ' {
		--
	}
	if  >= 0 && - < 20 {
		 = [+1 : ]
	} else {
		if  < 0 {
			 = 0
		}
		 = [:]
	}
	return &SyntaxError{
		Offset:  ,
		Context: ,
		Err:     .Error(),
	}
}
A reSyntax is a regexp syntax tree.
type reSyntax struct {
	op  reOp        // opcode
	sub []*reSyntax // subexpressions (opConcat, opAlternate, opWild, opQuest)
	w   []WordID    // words (opWords)
	n   int32       // wildcard count (opWild)
}
A reOp is the opcode for a regexp syntax tree node.
pseudo-ops during parsing
string returns a text form for the regexp syntax. The dictionary d supplies the word literals.
func ( *reSyntax) ( *Dict) string {
	var  bytes.Buffer
	rePrint(&, , )
	return strings.Trim(.String(), "\n")
}
nl guarantees b ends with a complete, non-empty line with no trailing spaces or has no lines at all.
func ( *bytes.Buffer) {
	 := .Bytes()
	if len() == 0 || [len()-1] == '\n' {
		return
	}
	 := len()
	for  > 0 && [-1] == ' ' {
		--
	}
	if  < len() {
		.Truncate()
	}
	.WriteByte('\n')
}
rePrint prints re to b, using d for words.
func ( *bytes.Buffer,  *reSyntax,  *Dict) {
	switch .op {
	case opEmpty:
		.WriteString("(( ))")
	case opConcat:
		if len(.sub) == 0 {
			.WriteString("«empty concat»")
		}
		for ,  := range .sub {
			if  > 0 && .Len() > 0 && .Bytes()[.Len()-1] != '\n' {
				.WriteString(" ")
			}
			(, , )
		}

	case opAlternate:
		nl()
		.WriteString("((")
		for ,  := range .sub {
			if  > 0 {
				.WriteString(" || ")
			}
			(, , )
		}
		.WriteString("))\n")

	case opWild:
		fmt.Fprintf(, "__%d__", .n)

	case opQuest:
		 := .sub[0]
		nl()
		if .op == opAlternate {
			(, , )
			.Truncate(.Len() - 1) // strip \n
		} else {
			.WriteString("((")
			(, , )
			.WriteString("))")
		}
		.WriteString("??\n")

	case opWords:
		if len(.w) == 0 {
			.WriteString("«empty opWords»")
		}
		for ,  := range .w {
			if  > 0 && .Len() > 0 && .Bytes()[.Len()-1] != '\n' {
				.WriteString(" ")
			}
			 := .Words()[]
			if  == "" {
				.WriteString("''")
			} else {
				.WriteString(.Words()[])
			}
		}
	}
}
A reParser is the regexp parser state.
type reParser struct {
	dict  *Dict
	stack []*reSyntax
}
reParse parses a license regexp s and returns a reSyntax parse tree. reParse adds words to the dictionary d, so it is not safe to call reParse from concurrent goroutines using the same dictionary. If strict is false, the rules about operators at the start or end of line are ignored, to make trivial test expressions easier to write.
func ( *Dict,  string,  bool) (*reSyntax, error) {
	var  reParser
	.dict = 

	 := 0
	 := 0
	 := 0
	for  < len() {
		switch {
		case strings.HasPrefix([:], "(("):
			if  && !atBOL(, ) {
				return nil, reSyntaxError(, , fmt.Errorf("(( not at beginning of line"))
			}
			.words([:], "((")
			.push(&reSyntax{op: opLeftParen})
			 += 2
			 = 
			++

		case strings.HasPrefix([:], "||"):
			if  &&  == 0 {
				return nil, reSyntaxError(, , fmt.Errorf("|| outside (( ))"))
			}
			.words([:], "||")
			if  := .verticalBar();  != nil {
				return nil, reSyntaxError(, , )
			}
			 += 2
			 = 

)) must be followed by ?? or end line
			if  {
				 :=  + 2
				for  < len() && ([] == ' ' || [] == '\t') {
					++
				}
				if  < len() && [] != '\n' && (+1 >= len() || [] != '?' || [+1] != '?') {
					return nil, reSyntaxError(, , fmt.Errorf(")) not at end of line"))
				}
			}

			.words([:], "))")
			if  := .rightParen();  != nil {
				return nil, reSyntaxError(, , )
			}
			 += 2
			 = 
			--

?? must be preceded by )) on same line and must end the line.
			if  {
				 := 
				for  > 0 && ([-1] == ' ' || [-1] == '\t') {
					--
				}
				if  < 2 || [-1] != ')' || [-2] != ')' {
					return nil, reSyntaxError(, , fmt.Errorf("?? not preceded by ))"))
				}
			}
			if  && !atEOL(, +2) {
				return nil, reSyntaxError(, , fmt.Errorf("?? not at end of line"))
			}

			.words([:], "??")
			if  := .quest();  != nil {
				return nil, reSyntaxError(, , )
			}
			 += 2
			 = 

		case strings.HasPrefix([:], "__"):
			 :=  + 2
			for  < len() && '0' <= [] && [] <= '9' {
				++
			}
			if  == +2 {
				++
				continue
			}
			if !strings.HasPrefix([:], "__") {
				++
				continue
			}
			,  := strconv.Atoi([+2 : ])
			if  != nil {
				return nil, reSyntaxError(, , errors.New("invalid wildcard count "+[:+2]))
			}
			.words([:], "__")
			.push(&reSyntax{op: opWild, n: int32()})
			 =  + 2
			 = 

		case strings.HasPrefix([:], "//**"):
			 := strings.Index([+4:], "**//")
			if  < 0 {
				return nil, reSyntaxError(, , errors.New("opening //** without closing **//"))
			}
			.words([:], "//** **//")
			 += 4 +  + 4
			 = 

		default:
			++
		}
	}

	.words([:], "")
	.concat()
pop vertical bar
		.stack = .stack[:len(.stack)-1]
	}
	.alternate()

	 := len(.stack)
	if  != 1 {
		return nil, reSyntaxError(, len(), fmt.Errorf("missing )) at end"))
	}
	return .stack[0], nil
}
atBOL reports whether i is at the beginning of a line (ignoring spaces) in s.
func ( string,  int) bool {
	for  > 0 && ([-1] == ' ' || [-1] == '\t') {
		--
	}
	return  == 0 || [-1] == '\n'
}
atEOL reports whether i is at the end of a line (ignoring spaces) in s.
func ( string,  int) bool {
	for  < len() && ([] == ' ' || [] == '\t' || [] == '\r') {
		++
	}
	return  >= len() || [] == '\n'
}
push pushes the regexp re onto the parse stack and returns the regexp.
func ( *reParser) ( *reSyntax) *reSyntax {
	.stack = append(.stack, )
	return 
}
words handles a block of words in the input.
func ( *reParser) (,  string) {
	 := .dict.InsertSplit()
	if len() == 0 {
		return
	}
If the next operator is ??, we need to keep the last word separate, so that the ?? will only apply to that word. There are no other operators that grab the last word.
	var  Word
	if  == "??" {
		,  = [:len()-1], [len()-1]
	}
Add the words (with last possibly removed) into an opWords. If there's one atop the stack, use it. Otherwise, add one.
	if len() > 0 {
		var  *reSyntax
		if len(.stack) > 0 && .stack[len(.stack)-1].op == opWords {
			 = .stack[len(.stack)-1]
		} else {
			 = .push(&reSyntax{op: opWords})
		}
		for ,  := range  {
			.w = append(.w, .ID)
		}
	}
Add the last word if needed.
	if  == "??" {
		.stack = append(.stack, &reSyntax{op: opWords, w: []WordID{.ID}})
	}
}
verticalBar handles a || in the input.
func ( *reParser) () error {
	.concat()
The concatenation we just parsed is on top of the stack. If it sits above an opVerticalBar, swap it below (things below an opVerticalBar become an alternation). Otherwise, push a new vertical bar.
	if !.swapVerticalBar() {
		.push(&reSyntax{op: opVerticalBar})
	}

	return nil
}
If the top of the stack is an element followed by an opVerticalBar swapVerticalBar swaps the two and returns true. Otherwise it returns false.
func ( *reParser) () bool {
	 := len(.stack)
	if  >= 2 {
		 := .stack[-1]
		 := .stack[-2]
		if .op == opVerticalBar {
			.stack[-2] = 
			.stack[-1] = 
			return true
		}
	}
	return false
}
rightParen handles a )) in the input.
func ( *reParser) () error {
	.concat()
pop vertical bar
		.stack = .stack[:len(.stack)-1]
	}
	.alternate()

	 := len(.stack)
	if  < 2 {
		return fmt.Errorf("unexpected ))")
	}
	 := .stack[-1]
	 := .stack[-2]
	.stack = .stack[:-2]
	if .op != opLeftParen {
		return fmt.Errorf("unexpected ))")
	}

	.push()
	return nil
}
quest replaces the top stack element with itself made optional.
func ( *reParser) () error {
	 := len(.stack)
	if  == 0 {
		return fmt.Errorf("missing argument to ??")
	}
	 := .stack[-1]
	if .op >= opPseudo {
		return fmt.Errorf("missing argument to ??")
	}
Repeated ?? don't accomplish anything new.
		return nil
	}

	.stack[-1] = &reSyntax{op: opQuest, sub: []*reSyntax{}}
	return nil
}
concat replaces the top of the stack (above the topmost '||' or '((') with its concatenation.
Scan down to find pseudo-operator || or ((.
	 := len(.stack)
	for  > 0 && .stack[-1].op < opPseudo {
		--
	}
	 := .stack[:]
	.stack = .stack[:]
Empty concatenation is special case.
	if len() == 0 {
		return .push(&reSyntax{op: opEmpty})
	}

	return .push(.collapse(opConcat, ))
}
alternate replaces the top of the stack (above the topmost '((') with its alternation.
Scan down to find pseudo-operator ((. There are no || above ((.
	 := len(.stack)
	for  > 0 && .stack[-1].op < opPseudo {
		--
	}
	 := .stack[:]
	.stack = .stack[:]

	return .push(.collapse(opAlternate, ))
}
collapse returns the result of applying op to sub. If sub contains op nodes, they all get hoisted up so that there is never a concat of a concat or an alternate of an alternate.
func ( *reParser) ( reOp,  []*reSyntax) *reSyntax {
	if len() == 1 {
		return [0]
	}
	 := &reSyntax{op: }
	for ,  := range  {
		if .op ==  {
			.sub = append(.sub, .sub...)
		} else {
			.sub = append(.sub, )
		}
	}
	return 
}
leadingPhrases returns the set of possible initial phrases in any match of the given re syntax.
func ( *reSyntax) () []phrase {
	switch .op {
	default:
		panic("bad op in phrases")

	case opWild:
		return []phrase{{BadWord, BadWord}, {AnyWord, BadWord}, {AnyWord, AnyWord}}

	case opEmpty:
		return []phrase{{BadWord, BadWord}}

	case opWords:
		 := .w
		var  phrase
		if len() == 0 {
			 = phrase{BadWord, BadWord}
		} else if len() == 1 {
			 = phrase{[0], BadWord}
		} else {
			 = phrase{[0], [1]}
		}
		return []phrase{}

	case opQuest:
		 := .sub[0].()
		for ,  := range  {
			if [0] == BadWord {
				return 
			}
		}
		 = append(, phrase{BadWord, BadWord})
		return 

	case opAlternate:
		var  []phrase
		 := make(map[phrase]bool)
		for ,  := range .sub {
			for ,  := range .() {
				if ![] {
					[] = true
					 = append(, )
				}
			}
		}
		return 

	case opConcat:
		 := []phrase{{BadWord, BadWord}}
		for ,  := range .sub {
			 := true
			for ,  := range  {
				if [1] == BadWord {
					 = false
				}
			}
			if  {
				break
			}
			 := .()
			 := make(map[phrase]bool)
			var  []phrase
			for ,  := range  {
				if [1] != BadWord {
					if ![] {
						[] = true
						 = append(, )
					}
					continue
				}
				for ,  := range  {
					var  phrase
					if [0] == BadWord {
						 = 
					} else {
						 = phrase{[0], [0]}
					}
					if ![] {
						[] = true
						 = append(, )
					}
				}
			}
			 = 
		}
		return 
	}