Copyright 2020 The Go Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
Exported LRE interface.

package match

import (
	
	
)
An LRE is a compiled license regular expression. TODO: Move this comment somewhere non-internal later. A license regular expression (LRE) is a pattern syntax intended for describing large English texts such as software licenses, with minor allowed variations. The pattern syntax and the matching are word-based and case-insensitive; punctuation is ignored in the pattern and in the matched text. The valid LRE patterns are: word - a single case-insensitive word __N__ - any sequence of up to N words expr1 expr2 - concatenation expr1 || expr2 - alternation (( expr )) - grouping expr?? - zero or one instances of expr /* text */ - a comment To make patterns harder to misread in large texts: - || must only appear inside (( )) - ?? must only follow (( )) - (( must be at the start of a line, preceded only by spaces - )) must be at the end of a line, followed only by spaces and ??. For example: /* https://en.wikipedia.org/wiki/Filler_text */ Now is ((not))?? the time for all good ((men || women || people)) to come to the aid of their __1__.
ParseLRE parses the string s as a license regexp. The file name is used in error messages if non-empty.
func ( *Dict, ,  string) (*LRE, error) {
	,  := reParse(, , true)
	if  != nil {
		return nil, 
	}
	,  := .compile(nil, 0)
	if  != nil {
		return nil, 
	}
	return &LRE{dict: , file: , syntax: , prog: }, nil
}
Dict returns the Dict used by the LRE.
func ( *LRE) () *Dict {
	return .dict
}
File returns the file name passed to ParseLRE.
func ( *LRE) () string {
	return .file
}
Match reports whether text matches the license regexp.
func ( *LRE) ( string) bool {
	.onceDFA.Do(.compile)
	,  := .dfa.match(.dict, , .dict.Split())
	return  >= 0
}
compile initializes lre.dfa. It is invoked lazily (in Match) because most LREs end up only being inputs to a MultiLRE; we never need their DFAs directly.
func ( *LRE) () {
	.dfa = reCompileDFA(.prog)
}
A MultiLRE matches multiple LREs simultaneously against a text. It is more efficient than matching each LRE in sequence against the text.
type MultiLRE struct {
	dict *Dict // dict shared by all LREs
	dfa  reDFA // compiled DFA for all LREs
start contains the two-word phrases where a match can validly start, to allow for faster scans over non-license text.
	start map[phrase]struct{}
}
A phrase is a phrase of up to two words. The zero-word phrase is phrase{NoWord, NoWord}. A single-word phrase w is phrase{w, NoWord}.
type phrase [2]WordID
NewMultiLRE returns a MultiLRE looking for the given LREs. All the LREs must have been parsed using the same Dict; if not, NewMultiLRE panics.
func ( []*LRE) ( *MultiLRE,  error) {
	if len() == 0 {
		return &MultiLRE{}, nil
	}

	 := [0].dict
	for ,  := range [1:] {
		if .dict !=  {
			panic("MultiRE: LREs parsed with different Dicts")
		}
	}

	var  []reProg
	for ,  := range  {
		 = append(, .prog)
	}

	 := make(map[phrase]struct{})
	for ,  := range  {
		 := .syntax.leadingPhrases()
		if len() == 0 {
			return nil, fmt.Errorf("%s: no leading phrases", .File())
		}
		for ,  := range  {
			if [0] == BadWord {
				return nil, fmt.Errorf("%s: invalid pattern: matches empty text", .File())
			}
			if [0] == AnyWord {
				if [1] == BadWord {
					return nil, fmt.Errorf("%s: invalid pattern: matches a single wildcard", .File())
				}
				if [1] == AnyWord {
					return nil, fmt.Errorf("%s: invalid pattern: begins with two wildcards", .File())
				}
				return nil, fmt.Errorf("%s: invalid pattern: begins with wildcard phrase: __ %s", .File(), .Words()[[1]])
			}
			if [1] == BadWord {
				return nil, fmt.Errorf("%s: invalid pattern: matches single word %s", .File(), .Words()[[0]])
			}
			if [1] == AnyWord {
				return nil, fmt.Errorf("%s: invalid pattern: begins with wildcard phrase: %s __", .File(), .Words()[[0]])
			}
			[] = struct{}{}
		}
	}

	 := reCompileMulti()
	 := reCompileDFA()

	return &MultiLRE{, , }, nil
}
Dict returns the Dict used by the MultiLRE.
func ( *MultiLRE) () *Dict {
	return .dict
}
A Matches is a collection of all leftmost-longest, non-overlapping matches in text.
type Matches struct {
	Text  string  // the entire text
	Words []Word  // the text, split into Words
	List  []Match // the matches
}
A Match records the position of a single match in a text.
type Match struct {
	ID    int // index of LRE in list passed to NewMultiLRE
	Start int // word index of start of match
	End   int // word index of end of match
}
Match reports all leftmost-longest, non-overlapping matches in text. It always returns a non-nil *Matches, in order to return the split text. Check len(matches.List) to see whether any matches were found.
func ( *MultiLRE) ( string) *Matches {
	 := &Matches{
		Text:  ,
		Words: .dict.Split(),
	}
	 := phrase{BadWord, BadWord}
	for  := 0;  < len(.Words); ++ {
		[0], [1] = [1], .Words[].ID
		if ,  := .start[];  {
			,  := .dfa.match(.dict, , .Words[-1:])
			if  >= 0 &&  > 0 {
				 +=  - 1 // translate from index in m.Words[i-1:] to index in m.Words
				.List = append(.List, Match{ID: int(), Start:  - 1, End: })
Continue search at end of match.
				 =  - 1 // loop will i++
				[0] = BadWord
				continue
			}
		}
	}
	return