Copyright 2009 The Go Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
This file provides Go implementations of elementary multi-precision arithmetic operations on word vectors. These have the suffix _g. These are needed for platforms without assembly implementations of these routines. This file also contains elementary operations that can be implemented sufficiently efficiently in Go.

package big

import 
A Word represents a single digit of a multi-precision unsigned integer.
type Word uint

const (
	_S = _W / 8 // word size in bytes

	_W = bits.UintSize // word size in bits
	_B = 1 << _W       // digit base
	_M = _B - 1        // digit mask
)
Many of the loops in this file are of the form for i := 0; i < len(z) && i < len(x) && i < len(y); i++ i < len(z) is the real condition. However, checking i < len(x) && i < len(y) as well is faster than having the compiler do a bounds check in the body of the loop; remarkably it is even faster than hoisting the bounds check out of the loop, by doing something like _, _ = x[len(z)-1], y[len(z)-1] There are other ways to hoist the bounds check out of the loop, but the compiler's BCE isn't powerful enough for them (yet?). See the discussion in CL 164966.
---------------------------------------------------------------------------- Elementary operations on words These operations are used by the vector operations below.
z1<<_W + z0 = x*y
func (,  Word) (,  Word) {
	,  := bits.Mul(uint(), uint())
	return Word(), Word()
}
z1<<_W + z0 = x*y + c
func (, ,  Word) (,  Word) {
	,  := bits.Mul(uint(), uint())
	var  uint
	,  = bits.Add(, uint(), 0)
	return Word( + ), Word()
}
nlz returns the number of leading zeros in x. Wraps bits.LeadingZeros call for convenience.
func ( Word) uint {
	return uint(bits.LeadingZeros(uint()))
}
The resulting carry c is either 0 or 1.
The comment near the top of this file discusses this for loop condition.
	for  := 0;  < len() &&  < len() &&  < len(); ++ {
		,  := bits.Add(uint([]), uint([]), uint())
		[] = Word()
		 = Word()
	}
	return
}
The resulting carry c is either 0 or 1.
The comment near the top of this file discusses this for loop condition.
	for  := 0;  < len() &&  < len() &&  < len(); ++ {
		,  := bits.Sub(uint([]), uint([]), uint())
		[] = Word()
		 = Word()
	}
	return
}
The resulting carry c is either 0 or 1.
func (,  []Word,  Word) ( Word) {
The comment near the top of this file discusses this for loop condition.
	for  := 0;  < len() &&  < len(); ++ {
		,  := bits.Add(uint([]), uint(), 0)
		[] = Word()
		 = Word()
	}
	return
}
addVWlarge is addVW, but intended for large z. The only difference is that we check on every iteration whether we are done with carries, and if so, switch to a much faster copy instead. This is only a good idea for large z, because the overhead of the check and the function call outweigh the benefits when z is small.
func (,  []Word,  Word) ( Word) {
The comment near the top of this file discusses this for loop condition.
	for  := 0;  < len() &&  < len(); ++ {
		if  == 0 {
			copy([:], [:])
			return
		}
		,  := bits.Add(uint([]), uint(), 0)
		[] = Word()
		 = Word()
	}
	return
}

func (,  []Word,  Word) ( Word) {
The comment near the top of this file discusses this for loop condition.
	for  := 0;  < len() &&  < len(); ++ {
		,  := bits.Sub(uint([]), uint(), 0)
		[] = Word()
		 = Word()
	}
	return
}
subVWlarge is to subVW as addVWlarge is to addVW.
func (,  []Word,  Word) ( Word) {
The comment near the top of this file discusses this for loop condition.
	for  := 0;  < len() &&  < len(); ++ {
		if  == 0 {
			copy([:], [:])
			return
		}
		,  := bits.Sub(uint([]), uint(), 0)
		[] = Word()
		 = Word()
	}
	return
}

func (,  []Word,  uint) ( Word) {
	if  == 0 {
		copy(, )
		return
	}
	if len() == 0 {
		return
	}
	 &= _W - 1 // hint to the compiler that shifts by s don't need guard code
	 := _W - 
	 &= _W - 1 // ditto
	 = [len()-1] >> 
	for  := len() - 1;  > 0; -- {
		[] = []<< | [-1]>>
	}
	[0] = [0] << 
	return
}

func (,  []Word,  uint) ( Word) {
	if  == 0 {
		copy(, )
		return
	}
	if len() == 0 {
		return
	}
	 &= _W - 1 // hint to the compiler that shifts by s don't need guard code
	 := _W - 
	 &= _W - 1 // ditto
	 = [0] << 
	for  := 0;  < len()-1; ++ {
		[] = []>> | [+1]<<
	}
	[len()-1] = [len()-1] >> 
	return
}

func (,  []Word, ,  Word) ( Word) {
The comment near the top of this file discusses this for loop condition.
	for  := 0;  < len() &&  < len(); ++ {
		, [] = mulAddWWW_g([], , )
	}
	return
}

The comment near the top of this file discusses this for loop condition.
	for  := 0;  < len() &&  < len(); ++ {
		,  := mulAddWWW_g([], , [])
		,  := bits.Add(uint(), uint(), 0)
		, [] = Word(), Word()
		 += 
	}
	return
}
q = ( x1 << _W + x0 - r)/y. m = floor(( _B^2 - 1 ) / d - _B). Requiring x1<y. An approximate reciprocal with a reference to "Improved Division by Invariant Integers (IEEE Transactions on Computers, 11 Jun. 2010)"
func (, , ,  Word) (,  Word) {
	 := nlz()
	if  != 0 {
		 = << | >>(_W-)
		 <<= 
		 <<= 
	}
We know that m = ⎣(B^2-1)/d⎦-B ⎣(B^2-1)/d⎦ = m+B (B^2-1)/d = m+B+delta1 0 <= delta1 <= (d-1)/d B^2/d = m+B+delta2 0 <= delta2 <= 1 The quotient we're trying to compute is quotient = ⎣(x1*B+x0)/d⎦ = ⎣(x1*B*(B^2/d)+x0*(B^2/d))/B^2⎦ = ⎣(x1*B*(m+B+delta2)+x0*(m+B+delta2))/B^2⎦ = ⎣(x1*m+x1*B+x0)/B + x0*m/B^2 + delta2*(x1*B+x0)/B^2⎦ The latter two terms of this three-term sum are between 0 and 1. So we can compute just the first term, and we will be low by at most 2.
	,  := bits.Mul(uint(), uint())
	,  := bits.Add(, uint(), 0)
The quotient is either t1, t1+1, or t1+2. We'll try t1 and adjust if needed.
compute remainder r=x-d*q.
	,  := bits.Mul(, )
	,  := bits.Sub(uint(), , 0)
The remainder we just computed is bounded above by B+d: r = x1*B + x0 - d*q. = x1*B + x0 - d*⎣(x1*m+x1*B+x0)/B⎦ = x1*B + x0 - d*((x1*m+x1*B+x0)/B-alpha) 0 <= alpha < 1 = x1*B + x0 - x1*d/B*m - x1*d - x0*d/B + d*alpha = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦ - x1*d - x0*d/B + d*alpha = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦ - x1*d - x0*d/B + d*alpha = x1*B + x0 - x1*d/B*((B^2-1)/d-B-beta) - x1*d - x0*d/B + d*alpha 0 <= beta < 1 = x1*B + x0 - x1*B + x1/B + x1*d + x1*d/B*beta - x1*d - x0*d/B + d*alpha = x0 + x1/B + x1*d/B*beta - x0*d/B + d*alpha = x0*(1-d/B) + x1*(1+d*beta)/B + d*alpha < B*(1-d/B) + d*B/B + d because x0<B (and 1-d/B>0), x1<d, 1+d*beta<=B, alpha<1 = B - d + d + d = B+d So r1 can only be 0 or 1. If r1 is 1, then we know q was too small. Add 1 to q and subtract d from r. That guarantees that r is <B, so we no longer need to keep track of r1.
	if  != 0 {
		++
		 -= 
If the remainder is still too large, increment q one more time.
	if  >=  {
		++
		 -= 
	}
	return Word(), Word( >> )
}

func ( []Word,  Word,  []Word,  Word) ( Word) {
	 = 
	if len() == 1 {
		,  := bits.Div(uint(), uint([0]), uint())
		[0] = Word()
		return Word()
	}
	 := reciprocalWord()
	for  := len() - 1;  >= 0; -- {
		[],  = divWW(, [], , )
	}
	return 
}
reciprocalWord return the reciprocal of the divisor. rec = floor(( _B^2 - 1 ) / u - _B). u = d1 << nlz(d1).
func ( Word) Word {
	 := uint( << nlz())
	 := ^
	 := uint(_M)
	,  := bits.Div(, , ) // (_B^2-1)/U-_B = (_B*(_M-C)+_M)/U
	return Word()