Copyright 2020 The Go Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.

package licensecheck

import (
	
	
	
	
	
	

	
)

builtinScanner is initialized lazily, because init is fairly expensive, and delaying it lets us see the init in test cpu profiles.
BuiltinLicenses returns the list of licenses built into the package. That is, the built-in checker is equivalent to New(BuiltinLicenses()).
Return a copy so caller cannot change list entries.
	 := append([]License{}, builtinLREs...)
	 := make(map[string]Type)
	for ,  := range  {
		[.ID] = .Type
	}
Fill in Type from builtinLREs.
		if ,  := [.ID];  {
			.Type = 
		} else {
			.Type = Unknown
		}
		 = append(, )
	}
	return 
}
A Scanner matches a set of known licenses.
NewScanner returns a new Scanner that recognizes the given set of licenses.
func ( []License) (*Scanner, error) {
	 := new(Scanner)
	 := .init()
	if  != nil {
		return nil, 
	}
	return , nil
}

func ( *Scanner) ( []License) error {
	 := new(match.Dict)
	.Insert("copyright")
	.Insert("http")
	var  []*match.LRE
	.urls = make(map[string]License)
	for ,  := range  {
		if .URL != "" {
			.urls[.URL] = 
		}
		if .LRE != "" {
			.licenses = append(.licenses, )
			,  := match.ParseLRE(, .ID, .LRE)
			if  != nil {
				return fmt.Errorf("parsing %v: %v", .ID, )
			}
			 = append(, )
		}
	}
	,  := match.NewMultiLRE()
	if  != nil {
		return 
	}
	if  == nil {
		return errors.New("missing lre")
	}
	.re = 
	return nil
}

const maxCopyrightWords = 50
Scan computes the coverage of the text according to the license set compiled into the package. An input text may match multiple licenses. If that happens, Match contains only disjoint matches. If multiple licenses match a particular section of the input, the best match is chosen so the returned coverage describes at most one match for each section of the input.
func ( []byte) Coverage {
	return builtinScanner.Scan()
}

var urlScanRE = regexp.MustCompile(`^(?i)https?://[-a-z0-9_.]+\.(org|com)(/[-a-z0-9_.#?=]+)+/?`)
Scan is like the top-level function Scan, but it uses the set of licenses in the Scanner instead of the built-in license set.
func ( *Scanner) ( []byte) Coverage {
	if  == builtinScanner {
		builtinScannerOnce.Do(func() {
			if  := builtinScanner.init(BuiltinLicenses());  != nil {
				panic("licensecheck: initializing Scan: " + .Error())
			}
		})
	}

	 := .re.Match(string()) // TODO remove conversion

	var  Coverage
	 := .Words
	 := 0
	 := 0
	 := .re.Dict().Lookup("copyright")
	 := .re.Dict().Lookup("http")
Add sentinel match trigger URL scan from last match to end of text.
	.List = append(.List, match.Match{Start: len(), ID: -1})

	for ,  := range .List {
		if .Start < len() &&  < .Start &&  >= 0 {
			 := .Start - maxCopyrightWords
			if  <  {
				 = 
			}
			for  := ;  < .Start; ++ {
				if [].ID ==  {
					.Start = 
					break
				}
			}
		}
Pick up any URLs before m.Start.
		for  := ;  < .Start; ++ {
			 := &[]
Potential URL match. urlRE only considers a match at the start of the input string. Only accept URLs that end before the next scan match.
				if  := urlScanRE.FindIndex([.Lo:]);  != nil && (.Start == len() || int(.Lo)+[1] <= int([.Start].Lo)) {
					,  := int(.Lo)+[0], int(.Lo)+[1]
					if ,  := .licenseURL(string([:]));  {
						.Match = append(.Match, Match{
							ID:    .ID,
							Type:  .Type,
							Start: ,
							End:   ,
							IsURL: true,
						})
						 := 
						for  < .Start && int([].Hi) <=  {
							++
						}
						 +=  - 
						-- // counter loop i++
					}
				}
			}
		}

		if .ID < 0 { // sentinel added above
			break
		}

		 := int([.Start].Lo) // byte offset (unlike m.Start)
		if .Start == 0 {
			 = 0
		} else {
			 := int([.Start-1].Hi)
			if  := bytes.LastIndexByte([:], '\n');  >= 0 {
				 =  +  + 1
			}
		}
		 := int([.End-1].Hi) // byte offset (unlike m.End)
		if .End == len() {
			 = len()
		} else {
			 := int([.End].Lo)
			if  := bytes.IndexByte([:], '\n');  >= 0 {
				 =  +  + 1
			}
		}
		 := &.licenses[.ID]
		.Match = append(.Match, Match{
			ID:    .ID,
			Type:  .Type,
			Start: ,
			End:   ,
		})
		 += .End - .Start
		 = .End
	}

	if len() > 0 { // len(words)==0 should be impossible, but avoid NaN
		.Percent = 100.0 * float64() / float64(len())
	}

	return 
}
licenseURL reports whether url is a known URL, and returns its name if it is.
We need to canonicalize the text for lookup. First, trim the leading http:// or https:// and the trailing /. Then we lower-case it.
	 = strings.TrimPrefix(, "http://")
	 = strings.TrimPrefix(, "https://")
	 = strings.TrimSuffix(, "/")
	 = strings.TrimSuffix(, "/legalcode") // Common for CC licenses.
	 = strings.ToLower()
	,  := .urls[]
	if  {
		return , true
	}
Try trimming one more path element, so that the ported URL https://creativecommons.org/licenses/by/3.0/us/ is recognized as the known unported URL https://creativecommons.org/licenses/by/3.0
	if  := strings.LastIndex(, "/");  >= 0 {
		if ,  = .urls[[:]];  {
			return , true
		}
	}

	return License{}, false