Copyright 2011 The Go Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
Note: the file data_test.go that is generated should not be checked in.go:generate go run maketables.go triegen.gogo:generate go test -tags test
Package norm contains types and functions for normalizing Unicode strings.
package norm // import "golang.org/x/text/unicode/norm"

import (
	

	
)
A Form denotes a canonical representation of Unicode code points. The Unicode-defined normalization and equivalence forms are: NFC Unicode Normalization Form C NFD Unicode Normalization Form D NFKC Unicode Normalization Form KC NFKD Unicode Normalization Form KD For a Form f, this documentation uses the notation f(x) to mean the bytes or string x converted to the given form. A position n in x is called a boundary if conversion to the form can proceed independently on both sides: f(x) == append(f(x[0:n]), f(x[n:])...) References: https://unicode.org/reports/tr15/ and https://unicode.org/notes/tn5/.
type Form int

const (
	NFC Form = iota
	NFD
	NFKC
	NFKD
)
Bytes returns f(b). May return b if f(b) = b.
func ( Form) ( []byte) []byte {
	 := inputBytes()
	 := formTable[]
	,  := .quickSpan(, 0, len(), true)
	if  {
		return 
	}
	 := make([]byte, , len())
	copy(, [0:])
	 := reorderBuffer{f: *, src: , nsrc: len(), out: , flushF: appendFlush}
	return doAppendInner(&, )
}
String returns f(s).
func ( Form) ( string) string {
	 := inputString()
	 := formTable[]
	,  := .quickSpan(, 0, len(), true)
	if  {
		return 
	}
	 := make([]byte, , len())
	copy(, [0:])
	 := reorderBuffer{f: *, src: , nsrc: len(), out: , flushF: appendFlush}
	return string(doAppendInner(&, ))
}
IsNormal returns true if b == f(b).
func ( Form) ( []byte) bool {
	 := inputBytes()
	 := formTable[]
	,  := .quickSpan(, 0, len(), true)
	if  {
		return true
	}
	 := reorderBuffer{f: *, src: , nsrc: len()}
	.setFlusher(nil, cmpNormalBytes)
	for  < len() {
		.out = [:]
		if  = decomposeSegment(&, , true);  < 0 {
			return false
		}
		, _ = .f.quickSpan(.src, , len(), true)
	}
	return true
}

func ( *reorderBuffer) bool {
	 := .out
	for  := 0;  < .nrune; ++ {
		 := .rune[]
		if int(.size) > len() {
			return false
		}
		 := .pos
		 :=  + .size
		for ;  < ; ++ {
			if [0] != .byte[] {
				return false
			}
			 = [1:]
		}
	}
	return true
}
IsNormalString returns true if s == f(s).
func ( Form) ( string) bool {
	 := inputString()
	 := formTable[]
	,  := .quickSpan(, 0, len(), true)
	if  {
		return true
	}
	 := reorderBuffer{f: *, src: , nsrc: len()}
	.setFlusher(nil, func( *reorderBuffer) bool {
		for  := 0;  < .nrune; ++ {
			 := .rune[]
			if +int(.size) > len() {
				return false
			}
			 := .pos
			 :=  + .size
			for ;  < ; ++ {
				if [] != .byte[] {
					return false
				}
				++
			}
		}
		return true
	})
	for  < len() {
		if  = decomposeSegment(&, , true);  < 0 {
			return false
		}
		, _ = .f.quickSpan(.src, , len(), true)
	}
	return true
}
patchTail fixes a case where a rune may be incorrectly normalized if it is followed by illegal continuation bytes. It returns the patched buffer and whether the decomposition is still in progress.
func ( *reorderBuffer) bool {
	,  := lastRuneStart(&.f, .out)
	if  == -1 || .size == 0 {
		return true
	}
	 :=  + int(.size)
	 := len(.out) - 
Potentially allocating memory. However, this only happens with ill-formed UTF-8.
		 := make([]byte, 0)
		 = append(, .out[len(.out)-:]...)
		.out = .out[:]
		decomposeToLastBoundary()
		.doFlush()
		.out = append(.out, ...)
		return false
	}
	 := .out[:]
	.out = .out[:]
	decomposeToLastBoundary()
	if  := .ss.next();  == ssStarter {
		.doFlush()
		.ss.first()
	} else if  == ssOverflow {
		.doFlush()
		.insertCGJ()
		.ss = 0
	}
	.insertUnsafe(inputBytes(), 0, )
	return true
}

func ( *reorderBuffer,  int) int {
	if .nsrc ==  {
		return 
	}
	,  := .f.quickSpan(.src, , .nsrc, true)
	.out = .src.appendSlice(.out, , )
	return 
}
Append returns f(append(out, b...)). The buffer out must be nil, empty, or equal to f(out).
func ( Form) ( []byte,  ...byte) []byte {
	return .doAppend(, inputBytes(), len())
}

func ( Form) ( []byte,  input,  int) []byte {
	if  == 0 {
		return 
	}
Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer.
	if len() == 0 {
		,  := .quickSpan(, 0, , true)
		 = .appendSlice(, 0, )
		if  ==  {
			return 
		}
		 := reorderBuffer{f: *, src: , nsrc: , out: , flushF: appendFlush}
		return doAppendInner(&, )
	}
	 := reorderBuffer{f: *, src: , nsrc: }
	return doAppend(&, , 0)
}

func ( *reorderBuffer,  []byte,  int) []byte {
	.setFlusher(, appendFlush)
	,  := .src, .nsrc
	 := len() > 0
Move leading non-starters to destination.
		.out = .appendSlice(.out, , )
		 = 
		 = patchTail()
	}
	 := &.f
	if  {
		var  Properties
		if  <  {
			 = .info(, )
			if !.BoundaryBefore() || .nLeadingNonStarters() > 0 {
				if  == 0 {
					decomposeToLastBoundary()
				}
				 = decomposeSegment(, , true)
			}
		}
		if .size == 0 {
Append incomplete UTF-8 encoding.
			return .appendSlice(.out, , )
		}
		if .nrune > 0 {
			return doAppendInner(, )
		}
	}
	 = appendQuick(, )
	return doAppendInner(, )
}

func ( *reorderBuffer,  int) []byte {
	for  := .nsrc;  < ; {
		 = decomposeSegment(, , true)
		 = appendQuick(, )
	}
	return .out
}
AppendString returns f(append(out, []byte(s))). The buffer out must be nil, empty, or equal to f(out).
func ( Form) ( []byte,  string) []byte {
	return .doAppend(, inputString(), len())
}
QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
func ( Form) ( []byte) int {
	,  := formTable[].quickSpan(inputBytes(), 0, len(), true)
	return 
}
Span implements transform.SpanningTransformer. It returns a boundary n such that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
func ( Form) ( []byte,  bool) ( int,  error) {
	,  := formTable[].quickSpan(inputBytes(), 0, len(), )
	if  < len() {
		if ! {
			 = transform.ErrEndOfSpan
		} else {
			 = transform.ErrShortSrc
		}
	}
	return , 
}
SpanString returns a boundary n such that s[0:n] == f(s[0:n]). It is not guaranteed to return the largest such n.
func ( Form) ( string,  bool) ( int,  error) {
	,  := formTable[].quickSpan(inputString(), 0, len(), )
	if  < len() {
		if ! {
			 = transform.ErrEndOfSpan
		} else {
			 = transform.ErrShortSrc
		}
	}
	return , 
}
quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and whether any non-normalized parts were found. If atEOF is false, n will not point past the last segment if this segment might be become non-normalized by appending other runes.
func ( *formInfo) ( input, ,  int,  bool) ( int,  bool) {
	var  uint8
	 := streamSafe(0)
	 := 
	for  = ;  < ; {
		if  := .skipASCII(, );  !=  {
			 = 
			 =  - 1
			 = 0
			 = 0
			continue
		}
		 := .info(, )
		if .size == 0 {
include incomplete runes
				return , true
			}
			return , true
This block needs to be before the next, because it is possible to have an overflow for runes that are starters (e.g. with U+FF9E).
		switch .next() {
		case ssStarter:
			 = 
		case ssOverflow:
			return , false
		case ssSuccess:
			if  > .ccc {
				return , false
			}
		}
		if .composing {
			if !.isYesC() {
				break
			}
		} else {
			if !.isYesD() {
				break
			}
		}
		 = .ccc
		 += int(.size)
	}
	if  ==  {
		if ! {
			 = 
		}
		return , true
	}
	return , false
}
QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]). It is not guaranteed to return the largest such n.
func ( Form) ( string) int {
	,  := formTable[].quickSpan(inputString(), 0, len(), true)
	return 
}
FirstBoundary returns the position i of the first boundary in b or -1 if b contains no boundary.
func ( Form) ( []byte) int {
	return .firstBoundary(inputBytes(), len())
}

func ( Form) ( input,  int) int {
	 := .skipContinuationBytes(0)
	if  >=  {
		return -1
	}
	 := formTable[]
We should call ss.first here, but we can't as the first rune is skipped already. This means FirstBoundary can't really determine CGJ insertion points correctly. Luckily it doesn't have to.
	for {
		 := .info(, )
		if .size == 0 {
			return -1
		}
		if  := .next();  != ssSuccess {
			return 
		}
		 += int(.size)
		if  >=  {
			if !.BoundaryAfter() && !.isMax() {
				return -1
			}
			return 
		}
	}
}
FirstBoundaryInString returns the position i of the first boundary in s or -1 if s contains no boundary.
NextBoundary reports the index of the boundary between the first and next segment in b or -1 if atEOF is false and there are not enough bytes to determine this boundary.
func ( Form) ( []byte,  bool) int {
	return .nextBoundary(inputBytes(), len(), )
}
NextBoundaryInString reports the index of the boundary between the first and next segment in b or -1 if atEOF is false and there are not enough bytes to determine this boundary.
func ( Form) ( string,  bool) int {
	return .nextBoundary(inputString(), len(), )
}

func ( Form) ( input,  int,  bool) int {
	if  == 0 {
		if  {
			return 0
		}
		return -1
	}
	 := formTable[]
	 := .info(, 0)
	if .size == 0 {
		if  {
			return 1
		}
		return -1
	}
	 := streamSafe(0)
	.first()

	for  := int(.size);  < ;  += int(.size) {
		 = .info(, )
		if .size == 0 {
			if  {
				return 
			}
			return -1
TODO: Using streamSafe to determine the boundary isn't the same as using BoundaryBefore. Determine which should be used.
		if  := .next();  != ssSuccess {
			return 
		}
	}
	if ! && !.BoundaryAfter() && !.isMax() {
		return -1
	}
	return 
}
LastBoundary returns the position i of the last boundary in b or -1 if b contains no boundary.
func ( Form) ( []byte) int {
	return lastBoundary(formTable[], )
}

func ( *formInfo,  []byte) int {
	 := len()
	,  := lastRuneStart(, )
	if  == -1 {
		return -1
	}
	if .size == 0 { // ends with incomplete rune
		if  == 0 { // starts with incomplete rune
			return -1
		}
		 = 
		,  = lastRuneStart(, [:])
		if  == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter
			return 
		}
	}
	if +int(.size) !=  { // trailing non-starter bytes: illegal UTF-8
		return 
	}
	if .BoundaryAfter() {
		return 
	}
	 := streamSafe(0)
	 := .backwards()
	for  = ;  >= 0 &&  != ssStarter;  =  {
		,  = lastRuneStart(, [:])
		if  = .backwards();  == ssOverflow {
			break
		}
		if +int(.size) !=  {
			if  == -1 { // no boundary found
				return -1
			}
			return  // boundary after an illegal UTF-8 encoding
		}
	}
	return 
}
decomposeSegment scans the first segment in src into rb. It inserts 0x034f (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters and returns the number of bytes consumed from src or iShortDst or iShortSrc.
Force one character to be consumed.
	 := .f.info(.src, )
	if .size == 0 {
		return 0
	}
TODO: this could be removed if we don't support merging.
		if .nrune > 0 {
			goto 
		}
	} else if  == ssOverflow {
		.insertCGJ()
		goto 
	}
	if  := .insertFlush(.src, , );  != iSuccess {
		return int()
	}
	for {
		 += int(.size)
		if  >= .nsrc {
			if ! && !.BoundaryAfter() {
				return int(iShortSrc)
			}
			break
		}
		 = .f.info(.src, )
		if .size == 0 {
			if ! {
				return int(iShortSrc)
			}
			break
		}
		if  := .ss.next();  == ssStarter {
			break
		} else if  == ssOverflow {
			.insertCGJ()
			break
		}
		if  := .insertFlush(.src, , );  != iSuccess {
			return int()
		}
	}
:
	if !.doFlush() {
		return int(iShortDst)
	}
	return 
}
lastRuneStart returns the runeInfo and position of the last rune in buf or the zero runeInfo and -1 if no rune was found.
func ( *formInfo,  []byte) (Properties, int) {
	 := len() - 1
	for ;  >= 0 && !utf8.RuneStart([]); -- {
	}
	if  < 0 {
		return Properties{}, -1
	}
	return .info(inputBytes(), ), 
}
decomposeToLastBoundary finds an open segment at the end of the buffer and scans it into rb. Returns the buffer minus the last segment.
func ( *reorderBuffer) {
	 := &.f
	,  := lastRuneStart(, .out)
illegal trailing continuation bytes
		return
	}
	if .BoundaryAfter() {
		return
	}
	var  [maxNonStarters + 1]Properties // stores runeInfo in reverse order
	 := 0
	 := streamSafe(0)
	 := len(.out)
	for {
		[] = 
		 := .backwards()
Note that if we have an overflow, it the string we are appending to is not correctly normalized. In this case the behavior is undefined.
			break
		}
		++
		 -= int(.size)
		if  == ssStarter ||  < 0 {
			break
		}
		,  = lastRuneStart(, .out[:])
		if int(.size) != - {
			break
		}
	}
Copy bytes for insertion as we may need to overwrite rb.out.
	var  [maxBufferSize * utf8.UTFMax]byte
	 := [:copy([:], .out[:])]
	.out = .out[:]
	for --;  >= 0; -- {
		 = []
		.insertUnsafe(inputBytes(), 0, )
		 = [.size:]
	}