Copyright 2020 The Go Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
Package licenses detects licenses and determines whether they are redistributable. The functions in this package do not return errors; instead, they log any problems they encounter and fail closed by reporting that the module or package is not redistributable. Example (modproxy): d := licenses.NewDetector(modulePath, version, zipReader, log.Infof) modRedist := d.ModuleIsRedistributable() Example (discovery): d := licenses.NewDetector(modulePath, version, zipReader, log.Infof) modRedist := d.ModuleIsRedistributable() lics := d.AllLicenses() pkgRedist, pkgMetas := d.PackageInfo(pkgSubdir)
package licenses

import (
	
	
	
	
	
	
	
	
	
	

	
	oldlicensecheck 
	
	modzip 
	
)
go:generate rm -f exceptions.gen.gogo:generate go run gen_exceptions.go

coverageThreshold is the minimum percentage of the file that must contain license text.
unknownLicenseType is for text in a license file that's not recognized.
	unknownLicenseType = "UNKNOWN"
)
maxLicenseSize is the maximum allowable size (in bytes) for a license file. There are some license files larger than 1 million bytes: https://github.com/vmware/vic/LICENSE and github.com/goharbor/harbor/LICENSE, for example. var for testing
Metadata holds information extracted from a license file.
Types is the set of license types, as determined by the licensecheck package.
FilePath is the '/'-separated path to the license file in the module zip, relative to the contents directory.
The output of oldlicensecheck.Cover.
A License is a classified license file path and its contents.
type License struct {
	*Metadata
	Contents []byte
}
RemoveNonRedistributableData methods removes the license contents if the license is non-redistributable.
func ( *License) () {
	if !Redistributable(.Types) {
		.Contents = nil
	}
}

var (
	FileNames = []string{
		"COPYING",
		"COPYING.md",
		"COPYING.markdown",
		"COPYING.txt",
		"LICENCE",
		"LICENCE.md",
		"LICENCE.markdown",
		"LICENCE.txt",
		"LICENSE",
		"LICENSE.md",
		"LICENSE.markdown",
		"LICENSE.txt",
		"LICENSE-2.0.txt",
		"LICENCE-2.0.txt",
		"LICENSE-APACHE",
		"LICENCE-APACHE",
		"LICENSE-APACHE-2.0.txt",
		"LICENCE-APACHE-2.0.txt",
		"LICENSE-MIT",
		"LICENCE-MIT",
		"LICENSE.MIT",
		"LICENCE.MIT",
		"LICENSE.code",
		"LICENCE.code",
		"LICENSE.docs",
		"LICENCE.docs",
		"LICENSE.rst",
		"LICENCE.rst",
		"MIT-LICENSE",
		"MIT-LICENCE",
		"MIT-LICENSE.md",
		"MIT-LICENCE.md",
		"MIT-LICENSE.markdown",
		"MIT-LICENCE.markdown",
		"MIT-LICENSE.txt",
		"MIT-LICENCE.txt",
		"MIT_LICENSE",
		"MIT_LICENCE",
		"UNLICENSE",
		"UNLICENCE",
	}
standardRedistributableLicenseTypes is the list of license types, as reported by licensecheck, that allow redistribution, and also have a name that is an OSI or SPDX identifier.
Licenses acceptable by OSI.
		"AFL-3.0",
		"AGPL-3.0",
		"AGPL-3.0-only",
		"AGPL-3.0-or-later",
		"Apache-1.1",
		"Apache-2.0",
		"Artistic-2.0",
		"BlueOak-1.0.0",
		"0BSD",
		"BSD-1-Clause",
		"BSD-2-Clause",
		"BSD-2-Clause-Patent",
		"BSD-2-Clause-Views",
		"BSD-3-Clause",
		"BSD-3-Clause-Clear",
		"BSD-3-Clause-Open-MPI",
		"BSD-4-Clause",
		"BSD-4-Clause-UC",
		"BSL-1.0",
		"CC-BY-3.0",
		"CC-BY-4.0",
		"CC-BY-SA-3.0",
		"CC-BY-SA-4.0",
		"CC0-1.0",
		"EPL-1.0",
		"EPL-2.0",
		"EUPL-1.2",
		"GPL-2.0",
		"GPL-2.0-only",
		"GPL-2.0-or-later",
		"GPL-3.0",
		"GPL-3.0-only",
		"GPL-3.0-or-later",
		"HPND",
		"ISC",
		"JSON",
		"LGPL-2.1",
		"LGPL-2.1-or-later",
		"LGPL-3.0",
		"LGPL-3.0-or-later",
		"MIT",
		"MIT-0",
		"MPL-2.0",
		"MPL-2.0-no-copyleft-exception",
		"NIST-PD",
		"NIST-PD-fallback",
		"NCSA",
		"OpenSSL",
		"OSL-3.0",
		"PostgreSQL", // TODO: ask legal
		"Python-2.0",
		"Unlicense",
		"UPL-1.0",
		"Zlib",
	}
These aren't technically licenses, but they are recognized by licensecheck and safe to ignore.
	ignorableLicenseTypes = map[string]bool{
		"CC-Notice":          true,
		"GooglePatentClause": true,
		"GooglePatentsFile":  true,
		"blessing":           true,
		"OFL-1.1":            true, // concerns fonts only
	}
redistributableLicenseTypes is the set of license types, as reported by licensecheck, that allow redistribution. It consists of the standard types along with some exception types.
Add here all other types defined in the exceptions.
exceptionTypes is a map from License IDs from LREs in the exception directory to license types. Any type mentioned in an exception should be redistributable. If not, there's a problem.
	for ,  := range exceptionTypes {
		for ,  := range  {
			if !redistributableLicenseTypes[] {
				log.Fatalf(context.Background(), "%s is an exception type that is not redistributable.", )
			}
		}
	}
}
nonOSILicenses lists licenses that are not approved by OSI.
var nonOSILicenses = map[string]bool{
	"BlueOak-1.0.0":      true,
	"BSD-2-Clause-Views": true,
	"CC-BY-3.0":          true,
	"CC-BY-4.0":          true,
	"CC-BY-SA-3.0":       true,
	"CC-BY-SA-4.0":       true,
	"CC0-1.0":            true,
	"JSON":               true,
	"NIST":               true,
	"OpenSSL":            true,
}
fileNamesLowercase has all the entries of FileNames, downcased and made a set for fast case-insensitive matching.
var fileNamesLowercase = map[string]bool{}

func () {
	for ,  := range FileNames {
		fileNamesLowercase[strings.ToLower()] = true
	}
}
AcceptedLicenseInfo describes a license that is accepted by the discovery site.
AcceptedLicenses returns a sorted slice of license types that are accepted as redistributable. Its result is intended to be displayed to users.
func () []AcceptedLicenseInfo {
	var  []AcceptedLicenseInfo
	for ,  := range standardRedistributableLicenseTypes {
		var  string
		if nonOSILicenses[] {
			 = fmt.Sprintf("https://spdx.org/licenses/%s.html", )
		} else {
			 = fmt.Sprintf("https://opensource.org/licenses/%s", )
		}
		 = append(, AcceptedLicenseInfo{, })
	}
	sort.Slice(, func(,  int) bool { return [].Name < [].Name })
	return 
}

OmitExceptions causes the list of exceptions to be omitted from license detection. It is intended only to speed up testing, and must be set before the first use of this package.
A Detector detects licenses in a module and its packages.
type Detector struct {
	modulePath     string
	version        string
	zr             *zip.Reader
	logf           func(string, ...interface{})
	moduleRedist   bool
	moduleLicenses []*License // licenses at module root directory, or list from exceptions
	allLicenses    []*License
	licsByDir      map[string][]*License // from directory to list of licenses
}
NewDetector returns a Detector for the given module and version. zr should be the zip file for that module and version. logf is for logging; if nil, no logging is done.
func (,  string,  *zip.Reader,  func(string, ...interface{})) *Detector {
	if  == nil {
		 = func(string, ...interface{}) {}
	}
	 := &Detector{
		modulePath: ,
		version:    ,
		zr:         ,
		logf:       ,
	}
	.computeModuleInfo()
	return 
}
ModuleIsRedistributable reports whether the given module is redistributable.
ModuleLicenses returns the licenses that apply to the module.
func ( *Detector) () []*License {
	return .moduleLicenses
}
AllLicenses returns all the licenses detected in the entire module, including package licenses.
func ( *Detector) () []*License {
	if .allLicenses == nil {
		.computeAllLicenseInfo()
	}
	return .allLicenses
}
PackageInfo reports whether the package at dir, a directory relative to the module root, is redistributable. It also returns all the licenses that apply to the package.
func ( *Detector) ( string) ( bool,  []*License) {
	 := filepath.ToSlash(filepath.Clean())
	if path.IsAbs() || strings.HasPrefix(, "..") {
		return false, nil
	}
	if .allLicenses == nil {
		.computeAllLicenseInfo()
Collect all the license metadata for directories dir and above, excluding the root.
append a slash so that prefix a/b does not match a/bc/d
		if strings.HasPrefix(+"/", +"/") {
			 = append(, ...)
		}
A package is redistributable if its module is, and if other licenses on the path to the root are redistributable. Note that this is not the same as asking if the module licenses plus the package licenses are redistributable. A module that is granted an exception (see DetectFiles) may have licenses that are non-redistributable.
	 := types()
A package's licenses include the ones we've already computed, as well as the module licenses.
	return , append(, .moduleLicenses...)
}
computeModuleInfo determines values for the moduleRedist and moduleLicenses fields of d.
Check that all licenses in the contents directory are redistributable.
computeAllLicenseInfo collects all the detected licenses in the zip and stores them in the allLicenses field of d. It also maps detected licenses to their directories, to optimize Detector.PackageInfo.
func ( *Detector) () {
	.allLicenses = []*License{}
	.allLicenses = append(.allLicenses, .moduleLicenses...)
	 := .detectFiles(.Files(NonRootFiles))
	.allLicenses = append(.allLicenses, ...)
	.licsByDir = map[string][]*License{}
	for ,  := range  {
		 := path.Dir(.FilePath)
		.licsByDir[] = append(.licsByDir[], )
	}
}
WhichFiles describes which files from the zip should be returned by Detector.Files.
Only files from the root (contents) directory.
Only files that are not in the root directory.
All files; the union of root and non-root.
Files returns a list of license files from the zip. The which argument determines the location of the files considered.
func ( *Detector) ( WhichFiles) []*zip.File {
	 := contentsDir(.modulePath, .version)
	 := pathPrefix()
	var  []*zip.File
	for ,  := range .zr.File {
		if !fileNamesLowercase[strings.ToLower(path.Base(.Name))] {
			continue
		}
		if !strings.HasPrefix(.Name, ) {
			.logf("potential license file %q found outside of the expected path %q", .Name, )
			continue
Skip files we should ignore.
		if ignoreFiles[.modulePath+" "+strings.TrimPrefix(.Name, )] {
			continue
		}
Skip f since it's not at root.
			continue
		}
Skip f since it is at root.
			continue
		}
Skip if f is in the vendor directory.
			continue
		}
Skip if the file path is bad.
			.logf("module.CheckFilePath(%q): %v", .Name, )
			continue
		}
		 = append(, )
	}
	return 
}
isVendoredFile reports if the given file is in a proper subdirectory nested under a 'vendor' directory, to allow for Go packages named 'vendor'. e.g. isVendoredFile("vendor/LICENSE") == false, and isVendoredFile("vendor/foo/LICENSE") == true
func ( string) bool {
	var  int
	if strings.HasPrefix(, "vendor/") {
		 = len("vendor/")
	} else if  := strings.Index(, "/vendor/");  >= 0 {
		 =  + len("/vendor/")
no vendor directory
		return false
check if the file is in a proper subdirectory of vendor
	return strings.Contains([:], "/")
}
detectFiles runs DetectFile on each of the given files. If a file cannot be read, the error is logged and a license of type unknown is added.
func ( *Detector) ( []*zip.File) []*License {
	 := pathPrefix(contentsDir(.modulePath, .version))
	var  []*License
	for ,  := range  {
		,  := readZipFile()
		if  != nil {
			.logf("reading zip file %s: %v", .Name, )
			 = append(, &License{
				Metadata: &Metadata{
					Types:    []string{unknownLicenseType},
					FilePath: strings.TrimPrefix(.Name, ),
				},
			})
			continue
		}
		,  := DetectFile(, .Name, .logf)
		 = append(, &License{
			Metadata: &Metadata{
				Types:    ,
				FilePath: strings.TrimPrefix(.Name, ),
				Coverage: ,
			},
			Contents: ,
		})
	}
	return 
}
DetectFile return the set of license types for the given file contents. It also returns the licensecheck coverage information. The filename is used solely for logging.
func ( []byte,  string,  func(string, ...interface{})) ([]string, licensecheck.Coverage) {
	if  == nil {
		 = func(string, ...interface{}) {}
	}
	 := scanner().Scan()
	if .Percent < float64(coverageThreshold) {
		("%s license coverage too low (%+v), skipping", , )
		return []string{unknownLicenseType}, 
	}
	 := make(map[string]bool)
	for ,  := range .Match {
		 := exceptionTypes[.ID]
		if  == nil {
			 = []string{.ID}
		}
		for ,  := range  {
			[] = true
		}
	}
	if len() == 0 {
		("%s failed to classify license (%+v), skipping", , )
		return []string{unknownLicenseType}, 
	}
	return setToSortedSlice(), 
}
Redistributable reports whether the set of license types establishes that a module or package is redistributable. All the licenses we see that are relevant must be redistributable, and we must see at least one such license.
func ( []string) bool {
	 := false
	for ,  := range  {
		if ignorableLicenseTypes[] {
			continue
		}
		if !redistributableLicenseTypes[] {
			return false
		}
		 = true
	}
	return 
}

func ( []*License) []string {
	var  []string
	for ,  := range  {
		 = append(, .Types...)
	}
	return 
}

func ( map[string]bool) []string {
	var  []string
	for  := range  {
		 = append(, )
	}
	sort.Strings()
	return 
}

func ( *zip.File) ([]byte, error) {
	if .UncompressedSize64 > maxLicenseSize {
		return nil, fmt.Errorf("file size %d exceeds max license size %d", .UncompressedSize64, maxLicenseSize)
	}
	,  := .Open()
	if  != nil {
		return nil, 
	}
	defer .Close()
	return ioutil.ReadAll(io.LimitReader(, int64(maxLicenseSize)))
}

func (,  string) string {
	return  + "@" + 
}
pathPrefix appends a "/" to its argument if the argument is non-empty.
func ( string) string {
	if  != "" {
		return  + "/"
	}
	return ""