Copyright 2014 The Go Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.

package regexp

import (
	
	
	
	
)
"One-pass" regexp execution. Some regexps can be analyzed to determine that they never need backtracking: they are guaranteed to run in one pass over the string without bothering to save all the usual NFA state. Detect those and execute them more quickly.
A onePassProg is a compiled one-pass regular expression program. It is the same as syntax.Prog except for the use of onePassInst.
type onePassProg struct {
	Inst   []onePassInst
	Start  int // index of start instruction
	NumCap int // number of InstCapture insts in re
}
A onePassInst is a single instruction in a one-pass regular expression program. It is the same as syntax.Inst except for the new 'Next' field.
type onePassInst struct {
	syntax.Inst
	Next []uint32
}
OnePassPrefix returns a literal string that all matches for the regexp must start with. Complete is true if the prefix is the entire match. Pc is the index of the last rune instruction in the string. The OnePassPrefix skips over the mandatory EmptyBeginText
func ( *syntax.Prog) ( string,  bool,  uint32) {
	 := &.Inst[.Start]
	if .Op != syntax.InstEmptyWidth || (syntax.EmptyOp(.Arg))&syntax.EmptyBeginText == 0 {
		return "", .Op == syntax.InstMatch, uint32(.Start)
	}
	 = .Out
	 = &.Inst[]
	for .Op == syntax.InstNop {
		 = .Out
		 = &.Inst[]
Avoid allocation of buffer if prefix is empty.
	if iop() != syntax.InstRune || len(.Rune) != 1 {
		return "", .Op == syntax.InstMatch, uint32(.Start)
	}
Have prefix; gather characters.
	var  strings.Builder
	for iop() == syntax.InstRune && len(.Rune) == 1 && syntax.Flags(.Arg)&syntax.FoldCase == 0 {
		.WriteRune(.Rune[0])
		,  = .Out, &.Inst[.Out]
	}
	if .Op == syntax.InstEmptyWidth &&
		syntax.EmptyOp(.Arg)&syntax.EmptyEndText != 0 &&
		.Inst[.Out].Op == syntax.InstMatch {
		 = true
	}
	return .String(), , 
}
OnePassNext selects the next actionable state of the prog, based on the input character. It should only be called when i.Op == InstAlt or InstAltMatch, and from the one-pass machine. One of the alternates may ultimately lead without input to end of line. If the instruction is InstAltMatch the path to the InstMatch is in i.Out, the normal node in i.Next.
func ( *onePassInst,  rune) uint32 {
	 := .MatchRunePos()
	if  >= 0 {
		return .Next[]
	}
	if .Op == syntax.InstAltMatch {
		return .Out
	}
	return 0
}

func ( *syntax.Inst) syntax.InstOp {
	 := .Op
	switch  {
	case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
		 = syntax.InstRune
	}
	return 
}
Sparse Array implementation is used as a queueOnePass.
type queueOnePass struct {
	sparse          []uint32
	dense           []uint32
	size, nextIndex uint32
}

func ( *queueOnePass) () bool {
	return .nextIndex >= .size
}

func ( *queueOnePass) () ( uint32) {
	 = .dense[.nextIndex]
	.nextIndex++
	return
}

func ( *queueOnePass) () {
	.size = 0
	.nextIndex = 0
}

func ( *queueOnePass) ( uint32) bool {
	if  >= uint32(len(.sparse)) {
		return false
	}
	return .sparse[] < .size && .dense[.sparse[]] == 
}

func ( *queueOnePass) ( uint32) {
	if !.contains() {
		.insertNew()
	}
}

func ( *queueOnePass) ( uint32) {
	if  >= uint32(len(.sparse)) {
		return
	}
	.sparse[] = .size
	.dense[.size] = 
	.size++
}

func ( int) ( *queueOnePass) {
	return &queueOnePass{
		sparse: make([]uint32, ),
		dense:  make([]uint32, ),
	}
}
mergeRuneSets merges two non-intersecting runesets, and returns the merged result, and a NextIp array. The idea is that if a rune matches the OnePassRunes at index i, NextIp[i/2] is the target. If the input sets intersect, an empty runeset and a NextIp array with the single element mergeFailed is returned. The code assumes that both inputs contain ordered and non-intersecting rune pairs.
const mergeFailed = uint32(0xffffffff)

var (
	noRune = []rune{}
	noNext = []uint32{mergeFailed}
)

func (,  *[]rune, ,  uint32) ([]rune, []uint32) {
	 := len(*)
	 := len(*)
	if &0x1 != 0 || &0x1 != 0 {
		panic("mergeRuneSets odd length []rune")
	}
	var (
		,  int
	)
	 := make([]rune, 0)
	 := make([]uint32, 0)
	 := true
	defer func() {
		if ! {
			 = nil
			 = nil
		}
	}()

	 := -1
	 := func( *int,  *[]rune,  uint32) bool {
		if  > 0 && (*)[*] <= [] {
			return false
		}
		 = append(, (*)[*], (*)[*+1])
		* += 2
		 += 2
		 = append(, )
		return true
	}

	for  <  ||  <  {
		switch {
		case  >= :
			 = (&, , )
		case  >= :
			 = (&, , )
		case (*)[] < (*)[]:
			 = (&, , )
		default:
			 = (&, , )
		}
		if ! {
			return noRune, noNext
		}
	}
	return , 
}
cleanupOnePass drops working memory, and restores certain shortcut instructions.
func ( *onePassProg,  *syntax.Prog) {
	for ,  := range .Inst {
		switch .Op {
		case syntax.InstAlt, syntax.InstAltMatch, syntax.InstRune:
		case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop, syntax.InstMatch, syntax.InstFail:
			.Inst[].Next = nil
		case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
			.Inst[].Next = nil
			.Inst[] = onePassInst{Inst: }
		}
	}
}
onePassCopy creates a copy of the original Prog, as we'll be modifying it
func ( *syntax.Prog) *onePassProg {
	 := &onePassProg{
		Start:  .Start,
		NumCap: .NumCap,
		Inst:   make([]onePassInst, len(.Inst)),
	}
	for ,  := range .Inst {
		.Inst[] = onePassInst{Inst: }
	}
rewrites one or more common Prog constructs that enable some otherwise non-onepass Progs to be onepass. A:BD (for example) means an InstAlt at ip A, that points to ips B & C. A:BC + B:DA => A:BC + B:CD A:BC + B:DC => A:DC + B:DC
	for  := range .Inst {
		switch .Inst[].Op {
		default:
			continue
A:Bx + B:Ay
			 := &.Inst[].Out
make sure a target is another Alt
			 := .Inst[*]
			if !(.Op == syntax.InstAlt || .Op == syntax.InstAltMatch) {
				,  = , 
				 = .Inst[*]
				if !(.Op == syntax.InstAlt || .Op == syntax.InstAltMatch) {
					continue
				}
			}
Analyzing both legs pointing to Alts is for another day
too complicated
				continue
simple empty transition loop A:BC + B:DA => A:BC + B:DC
			 := &.Inst[*].Out
			 := &.Inst[*].Arg
			 := false
			if .Out == uint32() {
				 = true
			} else if .Arg == uint32() {
				 = true
				,  = , 
			}
			if  {
				* = *
			}
empty transition to common target A:BC + B:DC => A:DC + B:DC
			if * == * {
				* = *
			}
		}
	}
	return 
}
runeSlice exists to permit sorting the case-folded rune sets.
type runeSlice []rune

func ( runeSlice) () int           { return len() }
func ( runeSlice) (,  int) bool { return [] < [] }
func ( runeSlice) (,  int)      { [], [] = [], [] }

var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
var anyRune = []rune{0, unicode.MaxRune}
makeOnePass creates a onepass Prog, if possible. It is possible if at any alt, the match engine can always tell which branch to take. The routine may modify p if it is turned into a onepass Prog. If it isn't possible for this to be a onepass Prog, the Prog nil is returned. makeOnePass is recursive to the size of the Prog.
If the machine is very long, it's not worth the time to check if we can use one pass.
	if len(.Inst) >= 1000 {
		return nil
	}

	var (
		    = newQueue(len(.Inst))
		   = newQueue(len(.Inst))
		        func(uint32, []bool) bool
		 = make([][]rune, len(.Inst))
	)
check that paths from Alt instructions are unambiguous, and rebuild the new program as a onepass program
	 = func( uint32,  []bool) ( bool) {
		 = true
		 := &.Inst[]
		if .contains() {
			return
		}
		.insert()
		switch .Op {
		case syntax.InstAlt, syntax.InstAltMatch:
check no-input paths to InstMatch
			 := [.Out]
			 := [.Arg]
			if  &&  {
				 = false
				break
Match on empty goes in inst.Out
			if  {
				.Out, .Arg = .Arg, .Out
				,  = , 
			}
			if  {
				[] = true
				.Op = syntax.InstAltMatch
			}
build a dispatch operator from the two legs of the alt.
			[], .Next = mergeRuneSets(
				&[.Out], &[.Arg], .Out, .Arg)
			if len(.Next) > 0 && .Next[0] == mergeFailed {
				 = false
				break
			}
		case syntax.InstCapture, syntax.InstNop:
			 = (.Out, )
pass matching runes back through these no-ops.
			[] = append([]rune{}, [.Out]...)
			.Next = make([]uint32, len([])/2+1)
			for  := range .Next {
				.Next[] = .Out
			}
		case syntax.InstEmptyWidth:
			 = (.Out, )
			[] = [.Out]
			[] = append([]rune{}, [.Out]...)
			.Next = make([]uint32, len([])/2+1)
			for  := range .Next {
				.Next[] = .Out
			}
		case syntax.InstMatch, syntax.InstFail:
			[] = .Op == syntax.InstMatch
		case syntax.InstRune:
			[] = false
			if len(.Next) > 0 {
				break
			}
			.insert(.Out)
			if len(.Rune) == 0 {
				[] = []rune{}
				.Next = []uint32{.Out}
				break
			}
			 := make([]rune, 0)
			if len(.Rune) == 1 && syntax.Flags(.Arg)&syntax.FoldCase != 0 {
				 := .Rune[0]
				 = append(, , )
				for  := unicode.SimpleFold();  != ;  = unicode.SimpleFold() {
					 = append(, , )
				}
				sort.Sort(runeSlice())
			} else {
				 = append(, .Rune...)
			}
			[] = 
			.Next = make([]uint32, len([])/2+1)
			for  := range .Next {
				.Next[] = .Out
			}
			.Op = syntax.InstRune
		case syntax.InstRune1:
			[] = false
			if len(.Next) > 0 {
				break
			}
			.insert(.Out)
expand case-folded runes
			if syntax.Flags(.Arg)&syntax.FoldCase != 0 {
				 := .Rune[0]
				 = append(, , )
				for  := unicode.SimpleFold();  != ;  = unicode.SimpleFold() {
					 = append(, , )
				}
				sort.Sort(runeSlice())
			} else {
				 = append(, .Rune[0], .Rune[0])
			}
			[] = 
			.Next = make([]uint32, len([])/2+1)
			for  := range .Next {
				.Next[] = .Out
			}
			.Op = syntax.InstRune
		case syntax.InstRuneAny:
			[] = false
			if len(.Next) > 0 {
				break
			}
			.insert(.Out)
			[] = append([]rune{}, anyRune...)
			.Next = []uint32{.Out}
		case syntax.InstRuneAnyNotNL:
			[] = false
			if len(.Next) > 0 {
				break
			}
			.insert(.Out)
			[] = append([]rune{}, anyRuneNotNL...)
			.Next = make([]uint32, len([])/2+1)
			for  := range .Next {
				.Next[] = .Out
			}
		}
		return
	}

	.clear()
	.insert(uint32(.Start))
	 := make([]bool, len(.Inst))
	for !.empty() {
		.clear()
		 := .next()
		if !(, ) {
			 = nil
			break
		}
	}
	if  != nil {
		for  := range .Inst {
			.Inst[].Rune = []
		}
	}
	return 
}
compileOnePass returns a new *syntax.Prog suitable for onePass execution if the original Prog can be recharacterized as a one-pass regexp program, or syntax.nil if the Prog cannot be converted. For a one pass prog, the fundamental condition that must be true is: at any InstAlt, there must be no ambiguity about what branch to take.
func ( *syntax.Prog) ( *onePassProg) {
	if .Start == 0 {
		return nil
onepass regexp is anchored
every instruction leading to InstMatch must be EmptyEndText
	for ,  := range .Inst {
		 := .Inst[.Out].Op
		switch .Op {
		default:
			if  == syntax.InstMatch {
				return nil
			}
		case syntax.InstAlt, syntax.InstAltMatch:
			if  == syntax.InstMatch || .Inst[.Arg].Op == syntax.InstMatch {
				return nil
			}
		case syntax.InstEmptyWidth:
			if  == syntax.InstMatch {
				if syntax.EmptyOp(.Arg)&syntax.EmptyEndText == syntax.EmptyEndText {
					continue
				}
				return nil
			}
		}
Creates a slightly optimized copy of the original Prog that cleans up some Prog idioms that block valid onepass programs
	 = onePassCopy()
checkAmbiguity on InstAlts, build onepass Prog if possible
	 = makeOnePass()

	if  != nil {
		cleanupOnePass(, )
	}
	return