Copyright 2013 The Go Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.

package curve25519

import 
This code is a port of the public domain, "ref10" implementation of curve25519 from SUPERCOP 20130419 by D. J. Bernstein.
fieldElement represents an element of the field GF(2^255 - 19). An element t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on context.
type fieldElement [10]int32

func ( *fieldElement) {
	for  := range  {
		[] = 0
	}
}

func ( *fieldElement) {
	feZero()
	[0] = 1
}

func (, ,  *fieldElement) {
	for  := range  {
		[] = [] + []
	}
}

func (, ,  *fieldElement) {
	for  := range  {
		[] = [] - []
	}
}

func (,  *fieldElement) {
	for  := range  {
		[] = []
	}
}
feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0. Preconditions: b in {0,1}.
func (,  *fieldElement,  int32) {
	 = -
	for  := range  {
		 :=  & ([] ^ [])
		[] ^= 
		[] ^= 
	}
}
load3 reads a 24-bit, little-endian value from in.
func ( []byte) int64 {
	var  int64
	 = int64([0])
	 |= int64([1]) << 8
	 |= int64([2]) << 16
	return 
}
load4 reads a 32-bit, little-endian value from in.
func ( []byte) int64 {
	return int64(binary.LittleEndian.Uint32())
}

func ( *fieldElement,  *[32]byte) {
	 := load4([:])
	 := load3([4:]) << 6
	 := load3([7:]) << 5
	 := load3([10:]) << 3
	 := load3([13:]) << 2
	 := load4([16:])
	 := load3([20:]) << 7
	 := load3([23:]) << 5
	 := load3([26:]) << 4
	 := (load3([29:]) & 0x7fffff) << 2

	var  [10]int64
	[9] = ( + 1<<24) >> 25
	 += [9] * 19
	 -= [9] << 25
	[1] = ( + 1<<24) >> 25
	 += [1]
	 -= [1] << 25
	[3] = ( + 1<<24) >> 25
	 += [3]
	 -= [3] << 25
	[5] = ( + 1<<24) >> 25
	 += [5]
	 -= [5] << 25
	[7] = ( + 1<<24) >> 25
	 += [7]
	 -= [7] << 25

	[0] = ( + 1<<25) >> 26
	 += [0]
	 -= [0] << 26
	[2] = ( + 1<<25) >> 26
	 += [2]
	 -= [2] << 26
	[4] = ( + 1<<25) >> 26
	 += [4]
	 -= [4] << 26
	[6] = ( + 1<<25) >> 26
	 += [6]
	 -= [6] << 26
	[8] = ( + 1<<25) >> 26
	 += [8]
	 -= [8] << 26

	[0] = int32()
	[1] = int32()
	[2] = int32()
	[3] = int32()
	[4] = int32()
	[5] = int32()
	[6] = int32()
	[7] = int32()
	[8] = int32()
	[9] = int32()
}
feToBytes marshals h to s. Preconditions: |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. Write p=2^255-19; q=floor(h/p). Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). Proof: Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4. Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). Then 0<y<1. Write r=h-pq. Have 0<=r<=p-1=2^255-20. Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1. Write x=r+19(2^-255)r+y. Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q. Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1)) so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
func ( *[32]byte,  *fieldElement) {
	var  [10]int32

	 := (19*[9] + (1 << 24)) >> 25
	 = ([0] + ) >> 26
	 = ([1] + ) >> 25
	 = ([2] + ) >> 26
	 = ([3] + ) >> 25
	 = ([4] + ) >> 26
	 = ([5] + ) >> 25
	 = ([6] + ) >> 26
	 = ([7] + ) >> 25
	 = ([8] + ) >> 26
	 = ([9] + ) >> 25
Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
Goal: Output h-2^255 q, which is between 0 and 2^255-20.

	[0] = [0] >> 26
	[1] += [0]
	[0] -= [0] << 26
	[1] = [1] >> 25
	[2] += [1]
	[1] -= [1] << 25
	[2] = [2] >> 26
	[3] += [2]
	[2] -= [2] << 26
	[3] = [3] >> 25
	[4] += [3]
	[3] -= [3] << 25
	[4] = [4] >> 26
	[5] += [4]
	[4] -= [4] << 26
	[5] = [5] >> 25
	[6] += [5]
	[5] -= [5] << 25
	[6] = [6] >> 26
	[7] += [6]
	[6] -= [6] << 26
	[7] = [7] >> 25
	[8] += [7]
	[7] -= [7] << 25
	[8] = [8] >> 26
	[9] += [8]
	[8] -= [8] << 26
	[9] = [9] >> 25
h10 = carry9
Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. Have h[0]+...+2^230 h[9] between 0 and 2^255-1; evidently 2^255 h10-2^255 q = 0. Goal: Output h[0]+...+2^230 h[9].

	[0] = byte([0] >> 0)
	[1] = byte([0] >> 8)
	[2] = byte([0] >> 16)
	[3] = byte(([0] >> 24) | ([1] << 2))
	[4] = byte([1] >> 6)
	[5] = byte([1] >> 14)
	[6] = byte(([1] >> 22) | ([2] << 3))
	[7] = byte([2] >> 5)
	[8] = byte([2] >> 13)
	[9] = byte(([2] >> 21) | ([3] << 5))
	[10] = byte([3] >> 3)
	[11] = byte([3] >> 11)
	[12] = byte(([3] >> 19) | ([4] << 6))
	[13] = byte([4] >> 2)
	[14] = byte([4] >> 10)
	[15] = byte([4] >> 18)
	[16] = byte([5] >> 0)
	[17] = byte([5] >> 8)
	[18] = byte([5] >> 16)
	[19] = byte(([5] >> 24) | ([6] << 1))
	[20] = byte([6] >> 7)
	[21] = byte([6] >> 15)
	[22] = byte(([6] >> 23) | ([7] << 3))
	[23] = byte([7] >> 5)
	[24] = byte([7] >> 13)
	[25] = byte(([7] >> 21) | ([8] << 4))
	[26] = byte([8] >> 4)
	[27] = byte([8] >> 12)
	[28] = byte(([8] >> 20) | ([9] << 6))
	[29] = byte([9] >> 2)
	[30] = byte([9] >> 10)
	[31] = byte([9] >> 18)
}
feMul calculates h = f * g Can overlap h with f or g. Preconditions: |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. Postconditions: |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. Notes on implementation strategy: Using schoolbook multiplication. Karatsuba would save a little in some cost models. Most multiplications by 2 and 19 are 32-bit precomputations; cheaper than 64-bit postcomputations. There is one remaining multiplication by 19 in the carry chain; one *19 precomputation can be merged into this, but the resulting data flow is considerably less clean. There are 12 carries below. 10 of them are 2-way parallelizable and vectorizable. Can get away with 11 carries, but then data flow is much deeper. With tighter constraints on inputs can squeeze carries into int32.
func (, ,  *fieldElement) {
	 := [0]
	 := [1]
	 := [2]
	 := [3]
	 := [4]
	 := [5]
	 := [6]
	 := [7]
	 := [8]
	 := [9]
	 := [0]
	 := [1]
	 := [2]
	 := [3]
	 := [4]
	 := [5]
	 := [6]
	 := [7]
	 := [8]
	 := [9]
	 := 19 *  // 1.4*2^29
	 := 19 *  // 1.4*2^30; still ok
	 := 19 * 
	 := 19 * 
	 := 19 * 
	 := 19 * 
	 := 19 * 
	 := 19 * 
	 := 19 * 
	 := 2 * 
	 := 2 * 
	 := 2 * 
	 := 2 * 
	 := 2 * 
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 :=  +  +  +  +  +  +  +  +  + 
	 :=  +  +  +  +  +  +  +  +  + 
	 :=  +  +  +  +  +  +  +  +  + 
	 :=  +  +  +  +  +  +  +  +  + 
	 :=  +  +  +  +  +  +  +  +  + 
	 :=  +  +  +  +  +  +  +  +  + 
	 :=  +  +  +  +  +  +  +  +  + 
	 :=  +  +  +  +  +  +  +  +  + 
	 :=  +  +  +  +  +  +  +  +  + 
	 :=  +  +  +  +  +  +  +  +  + 
	var  [10]int64
|h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38)) i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8 |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19)) i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9

	[0] = ( + (1 << 25)) >> 26
	 += [0]
	 -= [0] << 26
	[4] = ( + (1 << 25)) >> 26
	 += [4]
|h0| <= 2^25 |h4| <= 2^25 |h1| <= 1.51*2^58 |h5| <= 1.51*2^58

	[1] = ( + (1 << 24)) >> 25
	 += [1]
	 -= [1] << 25
	[5] = ( + (1 << 24)) >> 25
	 += [5]
|h1| <= 2^24; from now on fits into int32 |h5| <= 2^24; from now on fits into int32 |h2| <= 1.21*2^59 |h6| <= 1.21*2^59

	[2] = ( + (1 << 25)) >> 26
	 += [2]
	 -= [2] << 26
	[6] = ( + (1 << 25)) >> 26
	 += [6]
|h2| <= 2^25; from now on fits into int32 unchanged |h6| <= 2^25; from now on fits into int32 unchanged |h3| <= 1.51*2^58 |h7| <= 1.51*2^58

	[3] = ( + (1 << 24)) >> 25
	 += [3]
	 -= [3] << 25
	[7] = ( + (1 << 24)) >> 25
	 += [7]
|h3| <= 2^24; from now on fits into int32 unchanged |h7| <= 2^24; from now on fits into int32 unchanged |h4| <= 1.52*2^33 |h8| <= 1.52*2^33

	[4] = ( + (1 << 25)) >> 26
	 += [4]
	 -= [4] << 26
	[8] = ( + (1 << 25)) >> 26
	 += [8]
|h4| <= 2^25; from now on fits into int32 unchanged |h8| <= 2^25; from now on fits into int32 unchanged |h5| <= 1.01*2^24 |h9| <= 1.51*2^58

	[9] = ( + (1 << 24)) >> 25
	 += [9] * 19
|h9| <= 2^24; from now on fits into int32 unchanged |h0| <= 1.8*2^37

	[0] = ( + (1 << 25)) >> 26
	 += [0]
|h0| <= 2^25; from now on fits into int32 unchanged |h1| <= 1.01*2^24

	[0] = int32()
	[1] = int32()
	[2] = int32()
	[3] = int32()
	[4] = int32()
	[5] = int32()
	[6] = int32()
	[7] = int32()
	[8] = int32()
	[9] = int32()
}
feSquare calculates h = f*f. Can overlap h with f. Preconditions: |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. Postconditions: |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
func (,  *fieldElement) {
	 := [0]
	 := [1]
	 := [2]
	 := [3]
	 := [4]
	 := [5]
	 := [6]
	 := [7]
	 := [8]
	 := [9]
	 := 2 * 
	 := 2 * 
	 := 2 * 
	 := 2 * 
	 := 2 * 
	 := 2 * 
	 := 2 * 
	 := 2 * 
	 := 38 *  // 1.31*2^30
	 := 19 *  // 1.31*2^30
	 := 38 *  // 1.31*2^30
	 := 19 *  // 1.31*2^30
	 := 38 *  // 1.31*2^30
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 := int64() * int64()
	 :=  +  +  +  +  + 
	 :=  +  +  +  + 
	 :=  +  +  +  +  + 
	 :=  +  +  +  + 
	 :=  +  +  +  +  + 
	 :=  +  +  +  + 
	 :=  +  +  +  +  + 
	 :=  +  +  +  + 
	 :=  +  +  +  +  + 
	 :=  +  +  +  + 
	var  [10]int64

	[0] = ( + (1 << 25)) >> 26
	 += [0]
	 -= [0] << 26
	[4] = ( + (1 << 25)) >> 26
	 += [4]
	 -= [4] << 26

	[1] = ( + (1 << 24)) >> 25
	 += [1]
	 -= [1] << 25
	[5] = ( + (1 << 24)) >> 25
	 += [5]
	 -= [5] << 25

	[2] = ( + (1 << 25)) >> 26
	 += [2]
	 -= [2] << 26
	[6] = ( + (1 << 25)) >> 26
	 += [6]
	 -= [6] << 26

	[3] = ( + (1 << 24)) >> 25
	 += [3]
	 -= [3] << 25
	[7] = ( + (1 << 24)) >> 25
	 += [7]
	 -= [7] << 25

	[4] = ( + (1 << 25)) >> 26
	 += [4]
	 -= [4] << 26
	[8] = ( + (1 << 25)) >> 26
	 += [8]
	 -= [8] << 26

	[9] = ( + (1 << 24)) >> 25
	 += [9] * 19
	 -= [9] << 25

	[0] = ( + (1 << 25)) >> 26
	 += [0]
	 -= [0] << 26

	[0] = int32()
	[1] = int32()
	[2] = int32()
	[3] = int32()
	[4] = int32()
	[5] = int32()
	[6] = int32()
	[7] = int32()
	[8] = int32()
	[9] = int32()
}
feMul121666 calculates h = f * 121666. Can overlap h with f. Preconditions: |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. Postconditions: |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
func (,  *fieldElement) {
	 := int64([0]) * 121666
	 := int64([1]) * 121666
	 := int64([2]) * 121666
	 := int64([3]) * 121666
	 := int64([4]) * 121666
	 := int64([5]) * 121666
	 := int64([6]) * 121666
	 := int64([7]) * 121666
	 := int64([8]) * 121666
	 := int64([9]) * 121666
	var  [10]int64

	[9] = ( + (1 << 24)) >> 25
	 += [9] * 19
	 -= [9] << 25
	[1] = ( + (1 << 24)) >> 25
	 += [1]
	 -= [1] << 25
	[3] = ( + (1 << 24)) >> 25
	 += [3]
	 -= [3] << 25
	[5] = ( + (1 << 24)) >> 25
	 += [5]
	 -= [5] << 25
	[7] = ( + (1 << 24)) >> 25
	 += [7]
	 -= [7] << 25

	[0] = ( + (1 << 25)) >> 26
	 += [0]
	 -= [0] << 26
	[2] = ( + (1 << 25)) >> 26
	 += [2]
	 -= [2] << 26
	[4] = ( + (1 << 25)) >> 26
	 += [4]
	 -= [4] << 26
	[6] = ( + (1 << 25)) >> 26
	 += [6]
	 -= [6] << 26
	[8] = ( + (1 << 25)) >> 26
	 += [8]
	 -= [8] << 26

	[0] = int32()
	[1] = int32()
	[2] = int32()
	[3] = int32()
	[4] = int32()
	[5] = int32()
	[6] = int32()
	[7] = int32()
	[8] = int32()
	[9] = int32()
}
feInvert sets out = z^-1.
func (,  *fieldElement) {
	var , , ,  fieldElement
	var  int

	feSquare(&, )
	for  = 1;  < 1; ++ {
		feSquare(&, &)
	}
	feSquare(&, &)
	for  = 1;  < 2; ++ {
		feSquare(&, &)
	}
	feMul(&, , &)
	feMul(&, &, &)
	feSquare(&, &)
	for  = 1;  < 1; ++ {
		feSquare(&, &)
	}
	feMul(&, &, &)
	feSquare(&, &)
	for  = 1;  < 5; ++ {
		feSquare(&, &)
	}
	feMul(&, &, &)
	feSquare(&, &)
	for  = 1;  < 10; ++ {
		feSquare(&, &)
	}
	feMul(&, &, &)
	feSquare(&, &)
	for  = 1;  < 20; ++ {
		feSquare(&, &)
	}
	feMul(&, &, &)
	feSquare(&, &)
	for  = 1;  < 10; ++ {
		feSquare(&, &)
	}
	feMul(&, &, &)
	feSquare(&, &)
	for  = 1;  < 50; ++ {
		feSquare(&, &)
	}
	feMul(&, &, &)
	feSquare(&, &)
	for  = 1;  < 100; ++ {
		feSquare(&, &)
	}
	feMul(&, &, &)
	feSquare(&, &)
	for  = 1;  < 50; ++ {
		feSquare(&, &)
	}
	feMul(&, &, &)
	feSquare(&, &)
	for  = 1;  < 5; ++ {
		feSquare(&, &)
	}
	feMul(, &, &)
}

func (, ,  *[32]byte) {
	var  [32]byte

	copy([:], [:])
	[0] &= 248
	[31] &= 127
	[31] |= 64

	var , , , , , ,  fieldElement
	feFromBytes(&, )
	feOne(&)
	feCopy(&, &)
	feOne(&)

	 := int32(0)
	for  := 254;  >= 0; -- {
		 := [/8] >> uint(&7)
		 &= 1
		 ^= int32()
		feCSwap(&, &, )
		feCSwap(&, &, )
		 = int32()

		feSub(&, &, &)
		feSub(&, &, &)
		feAdd(&, &, &)
		feAdd(&, &, &)
		feMul(&, &, &)
		feMul(&, &, &)
		feSquare(&, &)
		feSquare(&, &)
		feAdd(&, &, &)
		feSub(&, &, &)
		feMul(&, &, &)
		feSub(&, &, &)
		feSquare(&, &)
		feMul121666(&, &)
		feSquare(&, &)
		feAdd(&, &, &)
		feMul(&, &, &)
		feMul(&, &, &)
	}

	feCSwap(&, &, )
	feCSwap(&, &, )

	feInvert(&, &)
	feMul(&, &, &)
	feToBytes(, &)