Copyright 2016 The Go Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
Package bidirule implements the Bidi Rule defined by RFC 5893. This package is under development. The API may change without notice and without preserving backward compatibility.
package bidirule

import (
	
	

	
	
)
This file contains an implementation of RFC 5893: Right-to-Left Scripts for Internationalized Domain Names for Applications (IDNA) A label is an individual component of a domain name. Labels are usually shown separated by dots; for example, the domain name "www.example.com" is composed of three labels: "www", "example", and "com". An RTL label is a label that contains at least one character of class R, AL, or AN. An LTR label is any label that is not an RTL label. A "Bidi domain name" is a domain name that contains at least one RTL label. The following guarantees can be made based on the above: o In a domain name consisting of only labels that satisfy the rule, the requirements of Section 3 are satisfied. Note that even LTR labels and pure ASCII labels have to be tested. o In a domain name consisting of only LDH labels (as defined in the Definitions document [RFC5890]) and labels that satisfy the rule, the requirements of Section 3 are satisfied as long as a label that starts with an ASCII digit does not come after a right-to-left label. No guarantee is given for other combinations.
ErrInvalid indicates a label is invalid according to the Bidi Rule.
[2.1] The first character must be a character with Bidi property L, R, or AL. If it has the R or AL property, it is an RTL label; if it has the L property, it is an LTR label.
	ruleInitial: {
		{ruleLTRFinal, 1 << bidi.L},
		{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL},
	},
[2.3] In an RTL label, the end of the label must be a character with Bidi property R, AL, EN, or AN, followed by zero or more characters with Bidi property NSM.
		{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN},
[2.2] In an RTL label, only characters with the Bidi properties R, AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed. We exclude the entries from [2.3]
		{ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM},
	},
[2.3] In an RTL label, the end of the label must be a character with Bidi property R, AL, EN, or AN, followed by zero or more characters with Bidi property NSM.
		{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN | 1<<bidi.NSM},
[2.2] In an RTL label, only characters with the Bidi properties R, AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed. We exclude the entries from [2.3] and NSM.
		{ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN},
	},
[2.6] In an LTR label, the end of the label must be a character with Bidi property L or EN, followed by zero or more characters with Bidi property NSM.
		{ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN},
[2.5] In an LTR label, only characters with the Bidi properties L, EN, ES, CS, ET, ON, BN, or NSM are allowed. We exclude the entries from [2.6].
		{ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM},
	},
[2.6] In an LTR label, the end of the label must be a character with Bidi property L or EN, followed by zero or more characters with Bidi property NSM.
		{ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN | 1<<bidi.NSM},
[2.5] In an LTR label, only characters with the Bidi properties L, EN, ES, CS, ET, ON, BN, or NSM are allowed. We exclude the entries from [2.6].
		{ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN},
	},
	ruleInvalid: {
		{ruleInvalid, 0},
		{ruleInvalid, 0},
	},
}
[2.4] In an RTL label, if an EN is present, no AN may be present, and vice versa.
const exclusiveRTL = uint16(1<<bidi.EN | 1<<bidi.AN)
From RFC 5893 An RTL label is a label that contains at least one character of type R, AL, or AN. An LTR label is any label that is not an RTL label.
Direction reports the direction of the given label as defined by RFC 5893. The Bidi Rule does not have to be applied to labels of the category LeftToRight.
func ( []byte) bidi.Direction {
	for  := 0;  < len(); {
		,  := bidi.Lookup([:])
		if  == 0 {
			++
		}
		 := .Class()
		if  == bidi.R ||  == bidi.AL ||  == bidi.AN {
			return bidi.RightToLeft
		}
		 += 
	}
	return bidi.LeftToRight
}
DirectionString reports the direction of the given label as defined by RFC 5893. The Bidi Rule does not have to be applied to labels of the category LeftToRight.
func ( string) bidi.Direction {
	for  := 0;  < len(); {
		,  := bidi.LookupString([:])
		if  == 0 {
			++
			continue
		}
		 := .Class()
		if  == bidi.R ||  == bidi.AL ||  == bidi.AN {
			return bidi.RightToLeft
		}
		 += 
	}
	return bidi.LeftToRight
}
Valid reports whether b conforms to the BiDi rule.
func ( []byte) bool {
	var  Transformer
	if ,  := .advance(); ! ||  < len() {
		return false
	}
	return .isFinal()
}
ValidString reports whether s conforms to the BiDi rule.
func ( string) bool {
	var  Transformer
	if ,  := .advanceString(); ! ||  < len() {
		return false
	}
	return .isFinal()
}
New returns a Transformer that verifies that input adheres to the Bidi Rule.
func () *Transformer {
	return &Transformer{}
}
Transformer implements transform.Transform.
A rule can only be violated for "Bidi Domain names", meaning if one of the following categories has been observed.
func ( *Transformer) () bool {
	const  = 1<<bidi.R | 1<<bidi.AL | 1<<bidi.AN
	return .seen& != 0
}
Reset implements transform.Transformer.
func ( *Transformer) () { * = Transformer{} }
Transform implements transform.Transformer. This Transformer has state and needs to be reset between uses.
func ( *Transformer) (,  []byte,  bool) (,  int,  error) {
	if len() < len() {
		 = [:len()]
		 = false
		 = transform.ErrShortDst
	}
	,  := .Span(, )
	copy(, [:])
	if  == nil ||  != nil &&  != transform.ErrShortSrc {
		 = 
	}
	return , , 
}
Span returns the first n bytes of src that conform to the Bidi rule.
func ( *Transformer) ( []byte,  bool) ( int,  error) {
	if .state == ruleInvalid && .isRTL() {
		return 0, ErrInvalid
	}
	,  := .advance()
	switch {
	case !:
		 = ErrInvalid
	case  < len():
		if ! {
			 = transform.ErrShortSrc
			break
		}
		 = ErrInvalid
	case !.isFinal():
		 = ErrInvalid
	}
	return , 
}
Precomputing the ASCII values decreases running time for the ASCII fast path by about 30%.
var asciiTable [128]bidi.Properties

func () {
	for  := range asciiTable {
		,  := bidi.LookupRune(rune())
		asciiTable[] = 
	}
}

func ( *Transformer) ( []byte) ( int,  bool) {
	var  bidi.Properties
	var  int
	for  < len() {
		if [] < utf8.RuneSelf {
			,  = asciiTable[[]], 1
		} else {
			,  = bidi.Lookup([:])
			if  <= 1 {
We always consider invalid UTF-8 to be invalid, even if the string has not yet been determined to be RTL. TODO: is this correct?
					return , false
				}
				return , true // incomplete UTF-8 encoding
			}
TODO: using CompactClass would result in noticeable speedup. See unicode/bidi/prop.go:Properties.CompactClass.
		 := uint16(1 << .Class())
		.seen |= 
		if .seen&exclusiveRTL == exclusiveRTL {
			.state = ruleInvalid
			return , false
		}
		switch  := transitions[.state]; {
		case [0].mask& != 0:
			.state = [0].next
		case [1].mask& != 0:
			.state = [1].next
		default:
			.state = ruleInvalid
			if .isRTL() {
				return , false
			}
		}
		 += 
	}
	return , true
}

func ( *Transformer) ( string) ( int,  bool) {
	var  bidi.Properties
	var  int
	for  < len() {
		if [] < utf8.RuneSelf {
			,  = asciiTable[[]], 1
		} else {
			,  = bidi.LookupString([:])
			if  <= 1 {
				if  == 1 {
					return , false // invalid UTF-8
				}
				return , true // incomplete UTF-8 encoding
			}
TODO: using CompactClass results in noticeable speedup. See unicode/bidi/prop.go:Properties.CompactClass.
		 := uint16(1 << .Class())
		.seen |= 
		if .seen&exclusiveRTL == exclusiveRTL {
			.state = ruleInvalid
			return , false
		}
		switch  := transitions[.state]; {
		case [0].mask& != 0:
			.state = [0].next
		case [1].mask& != 0:
			.state = [1].next
		default:
			.state = ruleInvalid
			if .isRTL() {
				return , false
			}
		}
		 += 
	}
	return , true