Copyright 2013 The Go Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.

package language

import (
	
	
	
	

	
)
isAlpha returns true if the byte is not a digit. b must be an ASCII letter or digit.
func ( byte) bool {
	return  > '9'
}
isAlphaNum returns true if the string contains only ASCII letters or digits.
func ( []byte) bool {
	for ,  := range  {
		if !('a' <=  &&  <= 'z' || 'A' <=  &&  <= 'Z' || '0' <=  &&  <= '9') {
			return false
		}
	}
	return true
}
ErrSyntax is returned by any of the parsing functions when the input is not well-formed, according to BCP 47. TODO: return the position at which the syntax error occurred?
var ErrSyntax = errors.New("language: tag is not well-formed")
ErrDuplicateKey is returned when a tag contains the same key twice with different values in the -u section.
var ErrDuplicateKey = errors.New("language: different values for same key in -u extension")
ValueError is returned by any of the parsing functions when the input is well-formed but the respective subtag is not recognized as a valid value.
type ValueError struct {
	v [8]byte
}
NewValueError creates a new ValueError.
func ( []byte) ValueError {
	var  ValueError
	copy(.v[:], )
	return 
}

func ( ValueError) () []byte {
	 := bytes.IndexByte(.v[:], 0)
	if  == -1 {
		 = 8
	}
	return .v[:]
}
Error implements the error interface.
func ( ValueError) () string {
	return fmt.Sprintf("language: subtag %q is well-formed but unknown", .tag())
}
Subtag returns the subtag for which the error occurred.
func ( ValueError) () string {
	return string(.tag())
}
scanner is used to scan BCP 47 tokens, which are separated by _ or -.
type scanner struct {
	b     []byte
	bytes [max99thPercentileSize]byte
	token []byte
	start int // start position of the current token
	end   int // end position of the current token
	next  int // next point for scan
	err   error
	done  bool
}

func ( string) scanner {
	 := scanner{}
	if len() <= len(.bytes) {
		.b = .bytes[:copy(.bytes[:], )]
	} else {
		.b = []byte()
	}
	.init()
	return 
}
makeScanner returns a scanner using b as the input buffer. b is not copied and may be modified by the scanner routines.
func ( []byte) scanner {
	 := scanner{b: }
	.init()
	return 
}

func ( *scanner) () {
	for ,  := range .b {
		if  == '_' {
			.b[] = '-'
		}
	}
	.scan()
}
restToLower converts the string between start and end to lower case.
func ( *scanner) (,  int) {
	for  := ;  < ; ++ {
		 := .b[]
		if 'A' <=  &&  <= 'Z' {
			.b[] += 'a' - 'A'
		}
	}
}

func ( *scanner) ( error) {
	if .err == nil || ( == ErrSyntax && .err != ErrSyntax) {
		.err = 
	}
}
resizeRange shrinks or grows the array at position oldStart such that a new string of size newSize can fit between oldStart and oldEnd. Sets the scan point to after the resized range.
func ( *scanner) (, ,  int) {
	.start = 
	if  :=  + ;  !=  {
		 :=  - 
		if  < cap(.b) {
			 := make([]byte, len(.b)+)
			copy(, .b[:])
			copy([:], .b[:])
			.b = 
		} else {
			.b = append(.b[:], .b[:]...)
		}
		.next =  + (.next - .end)
		.end = 
	}
}
replace replaces the current token with repl.
func ( *scanner) ( string) {
	.resizeRange(.start, .end, len())
	copy(.b[.start:], )
}
gobble removes the current token from the input. Caller must call scan after calling gobble.
func ( *scanner) ( error) {
	.setError()
	if .start == 0 {
		.b = .b[:+copy(.b, .b[.next:])]
		.end = 0
	} else {
		.b = .b[:.start-1+copy(.b[.start-1:], .b[.end:])]
		.end = .start - 1
	}
	.next = .start
}
deleteRange removes the given range from s.b before the current token.
func ( *scanner) (,  int) {
	.b = .b[:+copy(.b[:], .b[:])]
	 :=  - 
	.next -= 
	.start -= 
	.end -= 
}
scan parses the next token of a BCP 47 string. Tokens that are larger than 8 characters or include non-alphanumeric characters result in an error and are gobbled and removed from the output. It returns the end position of the last token consumed.
func ( *scanner) () ( int) {
	 = .end
	.token = nil
	for .start = .next; .next < len(.b); {
		 := bytes.IndexByte(.b[.next:], '-')
		if  == -1 {
			.end = len(.b)
			.next = len(.b)
			 = .end - .start
		} else {
			.end = .next + 
			.next = .end + 1
		}
		 := .b[.start:.end]
		if  < 1 ||  > 8 || !isAlphaNum() {
			.gobble(ErrSyntax)
			continue
		}
		.token = 
		return 
	}
	if  := len(.b);  > 0 && .b[-1] == '-' {
		.setError(ErrSyntax)
		.b = .b[:len(.b)-1]
	}
	.done = true
	return 
}
acceptMinSize parses multiple tokens of the given size or greater. It returns the end position of the last token consumed.
func ( *scanner) ( int) ( int) {
	 = .end
	.scan()
	for ; len(.token) >= ; .scan() {
		 = .end
	}
	return 
}
Parse parses the given BCP 47 string and returns a valid Tag. If parsing failed it returns an error and any part of the tag that could be parsed. If parsing succeeded but an unknown value was found, it returns ValueError. The Tag returned in this case is just stripped of the unknown value. All other values are preserved. It accepts tags in the BCP 47 format and extensions to this standard defined in https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
TODO: consider supporting old-style locale key-value pairs.
	if  == "" {
		return Und, ErrSyntax
	}
	if len() <= maxAltTaglen {
		 := [maxAltTaglen]byte{}
Generating invalid UTF-8 is okay as it won't match.
			if 'A' <=  &&  <= 'Z' {
				 += 'a' - 'A'
			} else if  == '_' {
				 = '-'
			}
			[] = byte()
		}
		if ,  := grandfathered();  {
			return , nil
		}
	}
	 := makeScannerString()
	return parse(&, )
}

func ( *scanner,  string) ( Tag,  error) {
	 = Und
	var  int
	if  := len(.token);  <= 1 {
		.toLower(0, len(.b))
		if  == 0 || .token[0] != 'x' {
			return , ErrSyntax
		}
		 = parseExtensions()
	} else if  >= 4 {
		return Und, ErrSyntax
	} else { // the usual case
		,  = parseTag()
		if  := len(.token);  == 1 {
			.pExt = uint16()
			 = parseExtensions()
		} else if  < len(.b) {
			.setError(ErrSyntax)
			.b = .b[:]
		}
	}
	if int(.pVariant) < len(.b) {
		if  < len() {
			 = [:]
		}
		if len() > 0 && tag.Compare(, .b) == 0 {
			.str = 
		} else {
			.str = string(.b)
		}
	} else {
		.pVariant, .pExt = 0, 0
	}
	return , .err
}
parseTag parses language, script, region and variants. It returns a Tag and the end position in the input that was parsed.
func ( *scanner) ( Tag,  int) {
TODO: set an error if an unknown lang, script or region is encountered.
	.LangID,  = getLangID(.token)
	.setError()
	.replace(.LangID.String())
	 := .start
	 = .scan()
From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent to a tag of the form <extlang>.
		,  := getLangID(.token)
		if  != 0 {
			.LangID = 
			copy(.b[:], .String())
			.b[+3] = '-'
			.start =  + 4
		}
		.gobble()
		 = .scan()
	}
	if len(.token) == 4 && isAlpha(.token[0]) {
		.ScriptID,  = getScriptID(script, .token)
		if .ScriptID == 0 {
			.gobble()
		}
		 = .scan()
	}
	if  := len(.token);  >= 2 &&  <= 3 {
		.RegionID,  = getRegionID(.token)
		if .RegionID == 0 {
			.gobble()
		} else {
			.replace(.RegionID.String())
		}
		 = .scan()
	}
	.toLower(.start, len(.b))
	.pVariant = byte()
	 = parseVariants(, , )
	.pExt = uint16()
	return , 
}

var separator = []byte{'-'}
parseVariants scans tokens as long as each token is a valid variant string. Duplicate variants are removed.
func ( *scanner,  int,  Tag) int {
	 := .start
	 := [4]uint8{}
	 := [4][]byte{}
	 := [:0]
	 := [:0]
	 := -1
	 := false
TODO: measure the impact of needing this conversion and redesign the data structure if there is an issue.
		,  := variantIndex[string(.token)]
unknown variant TODO: allow user-defined variants?
			.gobble(NewValueError(.token))
			continue
		}
		 = append(, )
		 = append(, .token)
		if ! {
			if  < int() {
				 = int()
			} else {
There is no legal combinations of more than 7 variants (and this is by no means a useful sequence).
				const  = 8
				if len() >  {
					break
				}
			}
		}
		 = .end
	}
	if  {
		sort.Sort(variantsSort{, })
		,  := 0, -1
		for ,  := range  {
			 := int()
Remove duplicates.
				continue
			}
			[] = []
			[] = []
			++
			 = 
		}
		if  := bytes.Join([:], separator); len() == 0 {
			 =  - 1
		} else {
			.resizeRange(, , len())
			copy(.b[.start:], )
			 = .end
		}
	}
	return 
}

type variantsSort struct {
	i []uint8
	v [][]byte
}

func ( variantsSort) () int {
	return len(.i)
}

func ( variantsSort) (,  int) {
	.i[], .i[] = .i[], .i[]
	.v[], .v[] = .v[], .v[]
}

func ( variantsSort) (,  int) bool {
	return .i[] < .i[]
}

type bytesSort struct {
	b [][]byte
	n int // first n bytes to compare
}

func ( bytesSort) () int {
	return len(.b)
}

func ( bytesSort) (,  int) {
	.b[], .b[] = .b[], .b[]
}

func ( bytesSort) (,  int) bool {
	for  := 0;  < .n; ++ {
		if .b[][] == .b[][] {
			continue
		}
		return .b[][] < .b[][]
	}
	return false
}
parseExtensions parses and normalizes the extensions in the buffer. It returns the last position of scan.b that is part of any extension. It also trims scan.b to remove excess parts accordingly.
func ( *scanner) int {
	 := .start
	 := [][]byte{}
	 := []byte{}
	 := .end
	for len(.token) == 1 {
		 := .start
		 := .token[0]
		 = parseExtension()
		 := .b[:]
		if len() < 3 || ( != 'x' && len() < 4) {
			.setError(ErrSyntax)
			 = 
			continue
		} else if  ==  && ( == 'x' || .start == len(.b)) {
			.b = .b[:]
			return 
		} else if  == 'x' {
			 = 
			break
		}
		 = append(, )
	}
	sort.Sort(bytesSort{, 1})
	if len() > 0 {
		 = append(, )
	}
	.b = .b[:]
	if len() > 0 {
		.b = append(.b, bytes.Join(, separator)...)
Strip trailing '-'.
		.b = .b[:-1]
	}
	return 
}
parseExtension parses a single extension and returns the position of the extension end.
func ( *scanner) int {
	,  := .start, .end
	switch .token[0] {
	case 'u':
		 := 
		.scan()
		for  := []byte{}; len(.token) > 2; .scan() {
Attributes are unsorted. Start over from scratch.
				 :=  + 1
				.next = 
				 := [][]byte{}
				for .scan(); len(.token) > 2; .scan() {
					 = append(, .token)
					 = .end
				}
				sort.Sort(bytesSort{, 3})
				copy(.b[:], bytes.Join(, separator))
				break
			}
			 = .token
			 = .end
		}
		var ,  []byte
		for  := ; len(.token) == 2;  =  {
			 = .token
			 := .end
TODO: check key value validity
We have an invalid key or the keys are not sorted. Start scanning keys from scratch and reorder.
				 :=  + 1
				.next = 
				 := [][]byte{}
				for .scan(); len(.token) == 2; {
					,  := .start, .end
					 = .acceptMinSize(3)
					if  !=  {
						 = append(, .b[:])
					} else {
						.setError(ErrSyntax)
						 = 
					}
				}
				sort.Stable(bytesSort{, 2})
				if  := len();  > 0 {
					 := 0
					for  := 1;  < ; ++ {
						if !bytes.Equal([][:2], [][:2]) {
							++
							[] = []
						} else if !bytes.Equal([], []) {
							.setError(ErrDuplicateKey)
						}
					}
					 = [:+1]
				}
				 := bytes.Join(, separator)
				if  :=  + len();  <  {
					.deleteRange(, )
					 = 
				}
				copy(.b[:], )
				break
			}
		}
	case 't':
		.scan()
		if  := len(.token);  >= 2 &&  <= 3 && isAlpha(.token[1]) {
			_,  = parseTag()
			.toLower(, )
		}
		for len(.token) == 2 && !isAlpha(.token[1]) {
			 = .acceptMinSize(3)
		}
	case 'x':
		 = .acceptMinSize(1)
	default:
		 = .acceptMinSize(2)
	}
	return 
}
getExtension returns the name, body and end position of the extension.
func ( string,  int) ( int,  string) {
	if [] == '-' {
		++
	}
	if [] == 'x' {
		return len(), [:]
	}
	 = nextExtension(, )
	return , [:]
}
nextExtension finds the next extension within the string, searching for the -<char>- pattern from position p. In the fast majority of cases, language tags will have at most one extension and extensions tend to be small.
func ( string,  int) int {
	for  := len() - 3;  < ; {
		if [] == '-' {
			if [+2] == '-' {
				return 
			}
			 += 3
		} else {
			++
		}
	}
	return len()