Curve25519.java example

Explorer
RipplePower-master
- eclipse
  - RipplePower
    - src
    - tests
      - org
        ripple
        power
        chart
        test
        DialogTest.java
        OnionKitHelper.java
        TestNames.java
        TextEditorDemo.java
        TextEditorDemo2.java
  - jcoinlibs
    - src
- loon-shell
  - apktest
    - gen
      - com
        shell
        apktest
        BuildConfig.java
        R.java
    - src
      - com
        shell
        apktest
        Main.java
  - src
    - loon
      - apk
        shell
        APKActivityCallback.java
        APKActivityControl.java
        APKCallbacks.java
        APKCreateFailedException.java
        APKDexLoader.java
        APKInstrumentObject.java
        APKManager.java
        APKNotFoundException.java
        APKObject.java
        APKProxy.java
        APKReflect.java
        APKServiceProxy.java
        APKSetting.java
        APKShellActivity.java
        APKTools.java
package org.ripple.power;

public final class Curve25519 {

	/* key size */
	public static final int KEY_SIZE = 32;

	/* 0 */
	public static final byte[] ZERO = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };

	/* the prime 2^255-19 */
	public static final byte[] PRIME = { (byte) 237, (byte) 255, (byte) 255,
			(byte) 255, (byte) 255, (byte) 255, (byte) 255, (byte) 255,
			(byte) 255, (byte) 255, (byte) 255, (byte) 255, (byte) 255,
			(byte) 255, (byte) 255, (byte) 255, (byte) 255, (byte) 255,
			(byte) 255, (byte) 255, (byte) 255, (byte) 255, (byte) 255,
			(byte) 255, (byte) 255, (byte) 255, (byte) 255, (byte) 255,
			(byte) 255, (byte) 255, (byte) 255, (byte) 127 };

	/* group order (a prime near 2^252+2^124) */
	public static final byte[] ORDER = { (byte) 237, (byte) 211, (byte) 245,
			(byte) 92, (byte) 26, (byte) 99, (byte) 18, (byte) 88, (byte) 214,
			(byte) 156, (byte) 247, (byte) 162, (byte) 222, (byte) 249,
			(byte) 222, (byte) 20, (byte) 0, (byte) 0, (byte) 0, (byte) 0,
			(byte) 0, (byte) 0, (byte) 0, (byte) 0, (byte) 0, (byte) 0,
			(byte) 0, (byte) 0, (byte) 0, (byte) 0, (byte) 0, (byte) 16 };

	/********* KEY AGREEMENT *********/

	/*
	 * Private key clamping k [out] your private key for key agreement k [in] 32
	 * random bytes
	 */
	public static void clamp(byte[] k) {
		k[31] &= 0x7F;
		k[31] |= 0x40;
		k[0] &= 0xF8;
	}

	/*
	 * Key-pair generation P [out] your public key s [out] your private key for
	 * signing k [out] your private key for key agreement k [in] 32 random bytes
	 * s may be NULL if you don't care
	 * 
	 * WARNING: if s is not NULL, this function has data-dependent timing
	 */
	public static void keygen(byte[] P, byte[] k) {
		clamp(k);
		core(P, k, null);
	}

	/*
	 * Key agreement Z [out] shared secret (needs hashing before use) k [in]
	 * your private key for key agreement P [in] peer's public key
	 */
	public static void curve(byte[] Z, byte[] k, byte[] P) {
		core(Z, null, P);
	}

	/********* DIGITAL SIGNATURES *********/

	/*
	 * deterministic EC-KCDSA
	 * 
	 * s is the private key for signing P is the corresponding public key Z is
	 * the context data (signer public key or certificate, etc)
	 * 
	 * signing:
	 * 
	 * m = hash(Z, message) x = hash(m, s) keygen25519(Y, NULL, x); r = hash(Y);
	 * h = m XOR r sign25519(v, h, x, s);
	 * 
	 * output (v,r) as the signature
	 * 
	 * verification:
	 * 
	 * m = hash(Z, message); h = m XOR r verify25519(Y, v, h, P)
	 * 
	 * confirm r == hash(Y)
	 * 
	 * It would seem to me that it would be simpler to have the signer directly
	 * do h = hash(m, Y) and send that to the recipient instead of r, who can
	 * verify the signature by checking h == hash(m, Y). If there are any
	 * problems with such a scheme, please let me know.
	 * 
	 * Also, EC-KCDSA (like most DS algorithms) picks x random, which is a waste
	 * of perfectly good entropy, but does allow Y to be calculated in advance
	 * of (or parallel to) hashing the message.
	 */

	/*
	 * Signature generation primitive, calculates (x-h)s mod q v [out] signature
	 * value h [in] signature hash (of message, signature pub key, and context
	 * data) x [in] signature private key s [in] private key for signing returns
	 * true on success, false on failure (use different x or h)
	 */
	public static boolean sign(byte[] v, byte[] h, byte[] x, byte[] s) {
		// v = (x - h) s mod q
		int w, i;
		byte[] h1 = new byte[32], x1 = new byte[32];
		byte[] tmp1 = new byte[64];
		byte[] tmp2 = new byte[64];

		// Don't clobber the arguments, be nice!
		cpy32(h1, h);
		cpy32(x1, x);

		// Reduce modulo group order
		byte[] tmp3 = new byte[32];
		divmod(tmp3, h1, 32, ORDER, 32);
		divmod(tmp3, x1, 32, ORDER, 32);

		// v = x1 - h1
		// If v is negative, add the group order to it to become positive.
		// If v was already positive we don't have to worry about overflow
		// when adding the order because v < ORDER and 2*ORDER < 2^256
		mula_small(v, x1, 0, h1, 32, -1);
		mula_small(v, v, 0, ORDER, 32, 1);

		// tmp1 = (x-h)*s mod q
		mula32(tmp1, v, s, 32, 1);
		divmod(tmp2, tmp1, 64, ORDER, 32);

		for (w = 0, i = 0; i < 32; i++)
			w |= v[i] = tmp1[i];
		return w != 0;
	}

	/*
	 * Signature verification primitive, calculates Y = vP + hG Y [out]
	 * signature public key v [in] signature value h [in] signature hash P [in]
	 * public key
	 */
	public static void verify(byte[] Y, byte[] v, byte[] h, byte[] P) {
		/* Y = v abs(P) + h G */
		byte[] d = new byte[32];
		long10[] p = new long10[] { new long10(), new long10() }, s = new long10[] {
				new long10(), new long10() }, yx = new long10[] { new long10(),
				new long10(), new long10() }, yz = new long10[] { new long10(),
				new long10(), new long10() }, t1 = new long10[] { new long10(),
				new long10(), new long10() }, t2 = new long10[] { new long10(),
				new long10(), new long10() };

		int vi = 0, hi = 0, di = 0, nvh = 0, i, j, k;

		/* set p[0] to G and p[1] to P */

		set(p[0], 9);
		unpack(p[1], P);

		/* set s[0] to P+G and s[1] to P-G */

		/* s[0] = (Py^2 + Gy^2 - 2 Py Gy)/(Px - Gx)^2 - Px - Gx - 486662 */
		/* s[1] = (Py^2 + Gy^2 + 2 Py Gy)/(Px - Gx)^2 - Px - Gx - 486662 */

		x_to_y2(t1[0], t2[0], p[1]); /* t2[0] = Py^2 */
		sqrt(t1[0], t2[0]); /* t1[0] = Py or -Py */
		j = is_negative(t1[0]); /* ... check which */
		t2[0]._0 += 39420360; /* t2[0] = Py^2 + Gy^2 */
		mul(t2[1], BASE_2Y, t1[0]);/* t2[1] = 2 Py Gy or -2 Py Gy */
		sub(t1[j], t2[0], t2[1]); /* t1[0] = Py^2 + Gy^2 - 2 Py Gy */
		add(t1[1 - j], t2[0], t2[1]);/* t1[1] = Py^2 + Gy^2 + 2 Py Gy */
		cpy(t2[0], p[1]); /* t2[0] = Px */
		t2[0]._0 -= 9; /* t2[0] = Px - Gx */
		sqr(t2[1], t2[0]); /* t2[1] = (Px - Gx)^2 */
		recip(t2[0], t2[1], 0); /* t2[0] = 1/(Px - Gx)^2 */
		mul(s[0], t1[0], t2[0]); /* s[0] = t1[0]/(Px - Gx)^2 */
		sub(s[0], s[0], p[1]); /* s[0] = t1[0]/(Px - Gx)^2 - Px */
		s[0]._0 -= 9 + 486662; /* s[0] = X(P+G) */
		mul(s[1], t1[1], t2[0]); /* s[1] = t1[1]/(Px - Gx)^2 */
		sub(s[1], s[1], p[1]); /* s[1] = t1[1]/(Px - Gx)^2 - Px */
		s[1]._0 -= 9 + 486662; /* s[1] = X(P-G) */
		mul_small(s[0], s[0], 1); /* reduce s[0] */
		mul_small(s[1], s[1], 1); /* reduce s[1] */

		/* prepare the chain */
		for (i = 0; i < 32; i++) {
			vi = (vi >> 8) ^ (v[i] & 0xFF) ^ ((v[i] & 0xFF) << 1);
			hi = (hi >> 8) ^ (h[i] & 0xFF) ^ ((h[i] & 0xFF) << 1);
			nvh = ~(vi ^ hi);
			di = (nvh & (di & 0x80) >> 7) ^ vi;
			di ^= nvh & (di & 0x01) << 1;
			di ^= nvh & (di & 0x02) << 1;
			di ^= nvh & (di & 0x04) << 1;
			di ^= nvh & (di & 0x08) << 1;
			di ^= nvh & (di & 0x10) << 1;
			di ^= nvh & (di & 0x20) << 1;
			di ^= nvh & (di & 0x40) << 1;
			d[i] = (byte) di;
		}

		di = ((nvh & (di & 0x80) << 1) ^ vi) >> 8;

		/* initialize state */
		set(yx[0], 1);
		cpy(yx[1], p[di]);
		cpy(yx[2], s[0]);
		set(yz[0], 0);
		set(yz[1], 1);
		set(yz[2], 1);

		/*
		 * y[0] is (even)P + (even)G y[1] is (even)P + (odd)G if current d-bit
		 * is 0 y[1] is (odd)P + (even)G if current d-bit is 1 y[2] is (odd)P +
		 * (odd)G
		 */

		vi = 0;
		hi = 0;

		/* and go for it! */
		for (i = 32; i-- != 0;) {
			vi = (vi << 8) | (v[i] & 0xFF);
			hi = (hi << 8) | (h[i] & 0xFF);
			di = (di << 8) | (d[i] & 0xFF);

			for (j = 8; j-- != 0;) {
				mont_prep(t1[0], t2[0], yx[0], yz[0]);
				mont_prep(t1[1], t2[1], yx[1], yz[1]);
				mont_prep(t1[2], t2[2], yx[2], yz[2]);

				k = ((vi ^ vi >> 1) >> j & 1) + ((hi ^ hi >> 1) >> j & 1);
				mont_dbl(yx[2], yz[2], t1[k], t2[k], yx[0], yz[0]);

				k = (di >> j & 2) ^ ((di >> j & 1) << 1);
				mont_add(t1[1], t2[1], t1[k], t2[k], yx[1], yz[1],
						p[di >> j & 1]);

				mont_add(t1[2], t2[2], t1[0], t2[0], yx[2], yz[2],
						s[((vi ^ hi) >> j & 2) >> 1]);
			}
		}

		k = (vi & 1) + (hi & 1);
		recip(t1[0], yz[k], 0);
		mul(t1[1], yx[k], t1[0]);

		pack(t1[1], Y);
	}

	public static boolean isCanonicalSignature(byte[] v) {
		byte[] vCopy = java.util.Arrays.copyOfRange(v, 0, 32);
		byte[] tmp = new byte[32];
		divmod(tmp, vCopy, 32, ORDER, 32);
		for (int i = 0; i < 32; i++) {
			if (v[i] != vCopy[i])
				return false;
		}
		return true;
	}

	public static boolean isCanonicalPublicKey(byte[] publicKey) {
		if (publicKey.length != 32) {
			return false;
		}
		long10 publicKeyUnpacked = new long10();
		unpack(publicKeyUnpacked, publicKey);
		byte[] publicKeyCopy = new byte[32];
		pack(publicKeyUnpacked, publicKeyCopy);
		for (int i = 0; i < 32; i++) {
			if (publicKeyCopy[i] != publicKey[i]) {
				return false;
			}
		}
		return true;
	}

	// /////////////////////////////////////////////////////////////////////////

	/*
	 * sahn0: Using this class instead of long[10] to avoid bounds checks.
	 */
	private static final class long10 {
		public long10() {
		}

		public long10(long _0, long _1, long _2, long _3, long _4, long _5,
				long _6, long _7, long _8, long _9) {
			this._0 = _0;
			this._1 = _1;
			this._2 = _2;
			this._3 = _3;
			this._4 = _4;
			this._5 = _5;
			this._6 = _6;
			this._7 = _7;
			this._8 = _8;
			this._9 = _9;
		}

		public long _0, _1, _2, _3, _4, _5, _6, _7, _8, _9;
	}

	/********************* radix 2^8 math *********************/

	private static void cpy32(byte[] d, byte[] s) {
		int i;
		for (i = 0; i < 32; i++)
			d[i] = s[i];
	}

	/* p[m..n+m-1] = q[m..n+m-1] + z * x */
	/* n is the size of x */
	/* n+m is the size of p and q */
	private static int mula_small(byte[] p, byte[] q, int m, byte[] x, int n,
			int z) {
		int v = 0;
		for (int i = 0; i < n; ++i) {
			v += (q[i + m] & 0xFF) + z * (x[i] & 0xFF);
			p[i + m] = (byte) v;
			v >>= 8;
		}
		return v;
	}

	/*
	 * p += x * y * z where z is a small integer x is size 32, y is size t, p is
	 * size 32+t y is allowed to overlap with p+32 if you don't care about the
	 * upper half
	 */
	private static int mula32(byte[] p, byte[] x, byte[] y, int t, int z) {
		final int n = 31;
		int w = 0;
		int i = 0;
		for (; i < t; i++) {
			int zy = z * (y[i] & 0xFF);
			w += mula_small(p, p, i, x, n, zy) + (p[i + n] & 0xFF) + zy
					* (x[n] & 0xFF);
			p[i + n] = (byte) w;
			w >>= 8;
		}
		p[i + n] = (byte) (w + (p[i + n] & 0xFF));
		return w >> 8;
	}

	/*
	 * divide r (size n) by d (size t), returning quotient q and remainder r
	 * quotient is size n-t+1, remainder is size t requires t > 0 && d[t-1] != 0
	 * requires that r[-1] and d[-1] are valid memory locations q may overlap
	 * with r+t
	 */
	private static void divmod(byte[] q, byte[] r, int n, byte[] d, int t) {
		int rn = 0;
		int dt = ((d[t - 1] & 0xFF) << 8);
		if (t > 1) {
			dt |= (d[t - 2] & 0xFF);
		}
		while (n-- >= t) {
			int z = (rn << 16) | ((r[n] & 0xFF) << 8);
			if (n > 0) {
				z |= (r[n - 1] & 0xFF);
			}
			z /= dt;
			rn += mula_small(r, r, n - t + 1, d, t, -z);
			q[n - t + 1] = (byte) ((z + rn) & 0xFF); /* rn is 0 or -1 (underflow) */
			mula_small(r, r, n - t + 1, d, t, -rn);
			rn = (r[n] & 0xFF);
			r[n] = 0;
		}
		r[t - 1] = (byte) rn;
	}

	/********************* radix 2^25.5 GF(2^255-19) math *********************/

	private static final int P25 = 33554431; /* (1 << 25) - 1 */
	private static final int P26 = 67108863; /* (1 << 26) - 1 */

	/* Convert to internal format from little-endian byte format */
	private static void unpack(long10 x, byte[] m) {
		x._0 = ((m[0] & 0xFF)) | ((m[1] & 0xFF)) << 8 | (m[2] & 0xFF) << 16
				| ((m[3] & 0xFF) & 3) << 24;
		x._1 = ((m[3] & 0xFF) & ~3) >> 2 | (m[4] & 0xFF) << 6
				| (m[5] & 0xFF) << 14 | ((m[6] & 0xFF) & 7) << 22;
		x._2 = ((m[6] & 0xFF) & ~7) >> 3 | (m[7] & 0xFF) << 5
				| (m[8] & 0xFF) << 13 | ((m[9] & 0xFF) & 31) << 21;
		x._3 = ((m[9] & 0xFF) & ~31) >> 5 | (m[10] & 0xFF) << 3
				| (m[11] & 0xFF) << 11 | ((m[12] & 0xFF) & 63) << 19;
		x._4 = ((m[12] & 0xFF) & ~63) >> 6 | (m[13] & 0xFF) << 2
				| (m[14] & 0xFF) << 10 | (m[15] & 0xFF) << 18;
		x._5 = (m[16] & 0xFF) | (m[17] & 0xFF) << 8 | (m[18] & 0xFF) << 16
				| ((m[19] & 0xFF) & 1) << 24;
		x._6 = ((m[19] & 0xFF) & ~1) >> 1 | (m[20] & 0xFF) << 7
				| (m[21] & 0xFF) << 15 | ((m[22] & 0xFF) & 7) << 23;
		x._7 = ((m[22] & 0xFF) & ~7) >> 3 | (m[23] & 0xFF) << 5
				| (m[24] & 0xFF) << 13 | ((m[25] & 0xFF) & 15) << 21;
		x._8 = ((m[25] & 0xFF) & ~15) >> 4 | (m[26] & 0xFF) << 4
				| (m[27] & 0xFF) << 12 | ((m[28] & 0xFF) & 63) << 20;
		x._9 = ((m[28] & 0xFF) & ~63) >> 6 | (m[29] & 0xFF) << 2
				| (m[30] & 0xFF) << 10 | (m[31] & 0xFF) << 18;
	}

	/* Check if reduced-form input >= 2^255-19 */
	private static boolean is_overflow(long10 x) {
		return (((x._0 > P26 - 19))
				&& ((x._1 & x._3 & x._5 & x._7 & x._9) == P25) && ((x._2 & x._4
				& x._6 & x._8) == P26))
				|| (x._9 > P25);
	}

	/*
	 * Convert from internal format to little-endian byte format. The number
	 * must be in a reduced form which is output by the following ops: unpack,
	 * mul, sqr set -- if input in range 0 .. P25 If you're unsure if the number
	 * is reduced, first multiply it by 1.
	 */
	private static void pack(long10 x, byte[] m) {
		int ld = 0, ud = 0;
		long t;
		ld = (is_overflow(x) ? 1 : 0) - ((x._9 < 0) ? 1 : 0);
		ud = ld * -(P25 + 1);
		ld *= 19;
		t = ld + x._0 + (x._1 << 26);
		m[0] = (byte) t;
		m[1] = (byte) (t >> 8);
		m[2] = (byte) (t >> 16);
		m[3] = (byte) (t >> 24);
		t = (t >> 32) + (x._2 << 19);
		m[4] = (byte) t;
		m[5] = (byte) (t >> 8);
		m[6] = (byte) (t >> 16);
		m[7] = (byte) (t >> 24);
		t = (t >> 32) + (x._3 << 13);
		m[8] = (byte) t;
		m[9] = (byte) (t >> 8);
		m[10] = (byte) (t >> 16);
		m[11] = (byte) (t >> 24);
		t = (t >> 32) + (x._4 << 6);
		m[12] = (byte) t;
		m[13] = (byte) (t >> 8);
		m[14] = (byte) (t >> 16);
		m[15] = (byte) (t >> 24);
		t = (t >> 32) + x._5 + (x._6 << 25);
		m[16] = (byte) t;
		m[17] = (byte) (t >> 8);
		m[18] = (byte) (t >> 16);
		m[19] = (byte) (t >> 24);
		t = (t >> 32) + (x._7 << 19);
		m[20] = (byte) t;
		m[21] = (byte) (t >> 8);
		m[22] = (byte) (t >> 16);
		m[23] = (byte) (t >> 24);
		t = (t >> 32) + (x._8 << 12);
		m[24] = (byte) t;
		m[25] = (byte) (t >> 8);
		m[26] = (byte) (t >> 16);
		m[27] = (byte) (t >> 24);
		t = (t >> 32) + ((x._9 + ud) << 6);
		m[28] = (byte) t;
		m[29] = (byte) (t >> 8);
		m[30] = (byte) (t >> 16);
		m[31] = (byte) (t >> 24);
	}

	/* Copy a number */
	private static void cpy(long10 out, long10 in) {
		out._0 = in._0;
		out._1 = in._1;
		out._2 = in._2;
		out._3 = in._3;
		out._4 = in._4;
		out._5 = in._5;
		out._6 = in._6;
		out._7 = in._7;
		out._8 = in._8;
		out._9 = in._9;
	}

	/* Set a number to value, which must be in range -185861411 .. 185861411 */
	private static void set(long10 out, int in) {
		out._0 = in;
		out._1 = 0;
		out._2 = 0;
		out._3 = 0;
		out._4 = 0;
		out._5 = 0;
		out._6 = 0;
		out._7 = 0;
		out._8 = 0;
		out._9 = 0;
	}

	/*
	 * Add/subtract two numbers. The inputs must be in reduced form, and the
	 * output isn't, so to do another addition or subtraction on the output,
	 * first multiply it by one to reduce it.
	 */
	private static void add(long10 xy, long10 x, long10 y) {
		xy._0 = x._0 + y._0;
		xy._1 = x._1 + y._1;
		xy._2 = x._2 + y._2;
		xy._3 = x._3 + y._3;
		xy._4 = x._4 + y._4;
		xy._5 = x._5 + y._5;
		xy._6 = x._6 + y._6;
		xy._7 = x._7 + y._7;
		xy._8 = x._8 + y._8;
		xy._9 = x._9 + y._9;
	}

	private static void sub(long10 xy, long10 x, long10 y) {
		xy._0 = x._0 - y._0;
		xy._1 = x._1 - y._1;
		xy._2 = x._2 - y._2;
		xy._3 = x._3 - y._3;
		xy._4 = x._4 - y._4;
		xy._5 = x._5 - y._5;
		xy._6 = x._6 - y._6;
		xy._7 = x._7 - y._7;
		xy._8 = x._8 - y._8;
		xy._9 = x._9 - y._9;
	}

	/*
	 * Multiply a number by a small integer in range -185861411 .. 185861411.
	 * The output is in reduced form, the input x need not be. x and xy may
	 * point to the same buffer.
	 */
	private static long10 mul_small(long10 xy, long10 x, long y) {
		long t;
		t = (x._8 * y);
		xy._8 = (t & ((1 << 26) - 1));
		t = (t >> 26) + (x._9 * y);
		xy._9 = (t & ((1 << 25) - 1));
		t = 19 * (t >> 25) + (x._0 * y);
		xy._0 = (t & ((1 << 26) - 1));
		t = (t >> 26) + (x._1 * y);
		xy._1 = (t & ((1 << 25) - 1));
		t = (t >> 25) + (x._2 * y);
		xy._2 = (t & ((1 << 26) - 1));
		t = (t >> 26) + (x._3 * y);
		xy._3 = (t & ((1 << 25) - 1));
		t = (t >> 25) + (x._4 * y);
		xy._4 = (t & ((1 << 26) - 1));
		t = (t >> 26) + (x._5 * y);
		xy._5 = (t & ((1 << 25) - 1));
		t = (t >> 25) + (x._6 * y);
		xy._6 = (t & ((1 << 26) - 1));
		t = (t >> 26) + (x._7 * y);
		xy._7 = (t & ((1 << 25) - 1));
		t = (t >> 25) + xy._8;
		xy._8 = (t & ((1 << 26) - 1));
		xy._9 += (t >> 26);
		return xy;
	}

	/*
	 * Multiply two numbers. The output is in reduced form, the inputs need not
	 * be.
	 */
	private static long10 mul(long10 xy, long10 x, long10 y) {
		/*
		 * sahn0: Using local variables to avoid class access. This seem to
		 * improve performance a bit...
		 */
		long x_0 = x._0, x_1 = x._1, x_2 = x._2, x_3 = x._3, x_4 = x._4, x_5 = x._5, x_6 = x._6, x_7 = x._7, x_8 = x._8, x_9 = x._9;
		long y_0 = y._0, y_1 = y._1, y_2 = y._2, y_3 = y._3, y_4 = y._4, y_5 = y._5, y_6 = y._6, y_7 = y._7, y_8 = y._8, y_9 = y._9;
		long t;
		t = (x_0 * y_8) + (x_2 * y_6) + (x_4 * y_4) + (x_6 * y_2) + (x_8 * y_0)
				+ 2 * ((x_1 * y_7) + (x_3 * y_5) + (x_5 * y_3) + (x_7 * y_1))
				+ 38 * (x_9 * y_9);
		xy._8 = (t & ((1 << 26) - 1));
		t = (t >> 26) + (x_0 * y_9) + (x_1 * y_8) + (x_2 * y_7) + (x_3 * y_6)
				+ (x_4 * y_5) + (x_5 * y_4) + (x_6 * y_3) + (x_7 * y_2)
				+ (x_8 * y_1) + (x_9 * y_0);
		xy._9 = (t & ((1 << 25) - 1));
		t = (x_0 * y_0)
				+ 19
				* ((t >> 25) + (x_2 * y_8) + (x_4 * y_6) + (x_6 * y_4) + (x_8 * y_2))
				+ 38
				* ((x_1 * y_9) + (x_3 * y_7) + (x_5 * y_5) + (x_7 * y_3) + (x_9 * y_1));
		xy._0 = (t & ((1 << 26) - 1));
		t = (t >> 26)
				+ (x_0 * y_1)
				+ (x_1 * y_0)
				+ 19
				* ((x_2 * y_9) + (x_3 * y_8) + (x_4 * y_7) + (x_5 * y_6)
						+ (x_6 * y_5) + (x_7 * y_4) + (x_8 * y_3) + (x_9 * y_2));
		xy._1 = (t & ((1 << 25) - 1));
		t = (t >> 25) + (x_0 * y_2) + (x_2 * y_0) + 19
				* ((x_4 * y_8) + (x_6 * y_6) + (x_8 * y_4)) + 2 * (x_1 * y_1)
				+ 38 * ((x_3 * y_9) + (x_5 * y_7) + (x_7 * y_5) + (x_9 * y_3));
		xy._2 = (t & ((1 << 26) - 1));
		t = (t >> 26)
				+ (x_0 * y_3)
				+ (x_1 * y_2)
				+ (x_2 * y_1)
				+ (x_3 * y_0)
				+ 19
				* ((x_4 * y_9) + (x_5 * y_8) + (x_6 * y_7) + (x_7 * y_6)
						+ (x_8 * y_5) + (x_9 * y_4));
		xy._3 = (t & ((1 << 25) - 1));
		t = (t >> 25) + (x_0 * y_4) + (x_2 * y_2) + (x_4 * y_0) + 19
				* ((x_6 * y_8) + (x_8 * y_6)) + 2 * ((x_1 * y_3) + (x_3 * y_1))
				+ 38 * ((x_5 * y_9) + (x_7 * y_7) + (x_9 * y_5));
		xy._4 = (t & ((1 << 26) - 1));
		t = (t >> 26) + (x_0 * y_5) + (x_1 * y_4) + (x_2 * y_3) + (x_3 * y_2)
				+ (x_4 * y_1) + (x_5 * y_0) + 19
				* ((x_6 * y_9) + (x_7 * y_8) + (x_8 * y_7) + (x_9 * y_6));
		xy._5 = (t & ((1 << 25) - 1));
		t = (t >> 25) + (x_0 * y_6) + (x_2 * y_4) + (x_4 * y_2) + (x_6 * y_0)
				+ 19 * (x_8 * y_8) + 2
				* ((x_1 * y_5) + (x_3 * y_3) + (x_5 * y_1)) + 38
				* ((x_7 * y_9) + (x_9 * y_7));
		xy._6 = (t & ((1 << 26) - 1));
		t = (t >> 26) + (x_0 * y_7) + (x_1 * y_6) + (x_2 * y_5) + (x_3 * y_4)
				+ (x_4 * y_3) + (x_5 * y_2) + (x_6 * y_1) + (x_7 * y_0) + 19
				* ((x_8 * y_9) + (x_9 * y_8));
		xy._7 = (t & ((1 << 25) - 1));
		t = (t >> 25) + xy._8;
		xy._8 = (t & ((1 << 26) - 1));
		xy._9 += (t >> 26);
		return xy;
	}

	/* Square a number. Optimization of mul25519(x2, x, x) */
	private static long10 sqr(long10 x2, long10 x) {
		long x_0 = x._0, x_1 = x._1, x_2 = x._2, x_3 = x._3, x_4 = x._4, x_5 = x._5, x_6 = x._6, x_7 = x._7, x_8 = x._8, x_9 = x._9;
		long t;
		t = (x_4 * x_4) + 2 * ((x_0 * x_8) + (x_2 * x_6)) + 38 * (x_9 * x_9)
				+ 4 * ((x_1 * x_7) + (x_3 * x_5));
		x2._8 = (t & ((1 << 26) - 1));
		t = (t >> 26)
				+ 2
				* ((x_0 * x_9) + (x_1 * x_8) + (x_2 * x_7) + (x_3 * x_6) + (x_4 * x_5));
		x2._9 = (t & ((1 << 25) - 1));
		t = 19 * (t >> 25) + (x_0 * x_0) + 38
				* ((x_2 * x_8) + (x_4 * x_6) + (x_5 * x_5)) + 76
				* ((x_1 * x_9) + (x_3 * x_7));
		x2._0 = (t & ((1 << 26) - 1));
		t = (t >> 26) + 2 * (x_0 * x_1) + 38
				* ((x_2 * x_9) + (x_3 * x_8) + (x_4 * x_7) + (x_5 * x_6));
		x2._1 = (t & ((1 << 25) - 1));
		t = (t >> 25) + 19 * (x_6 * x_6) + 2 * ((x_0 * x_2) + (x_1 * x_1)) + 38
				* (x_4 * x_8) + 76 * ((x_3 * x_9) + (x_5 * x_7));
		x2._2 = (t & ((1 << 26) - 1));
		t = (t >> 26) + 2 * ((x_0 * x_3) + (x_1 * x_2)) + 38
				* ((x_4 * x_9) + (x_5 * x_8) + (x_6 * x_7));
		x2._3 = (t & ((1 << 25) - 1));
		t = (t >> 25) + (x_2 * x_2) + 2 * (x_0 * x_4) + 38
				* ((x_6 * x_8) + (x_7 * x_7)) + 4 * (x_1 * x_3) + 76
				* (x_5 * x_9);
		x2._4 = (t & ((1 << 26) - 1));
		t = (t >> 26) + 2 * ((x_0 * x_5) + (x_1 * x_4) + (x_2 * x_3)) + 38
				* ((x_6 * x_9) + (x_7 * x_8));
		x2._5 = (t & ((1 << 25) - 1));
		t = (t >> 25) + 19 * (x_8 * x_8) + 2
				* ((x_0 * x_6) + (x_2 * x_4) + (x_3 * x_3)) + 4 * (x_1 * x_5)
				+ 76 * (x_7 * x_9);
		x2._6 = (t & ((1 << 26) - 1));
		t = (t >> 26) + 2
				* ((x_0 * x_7) + (x_1 * x_6) + (x_2 * x_5) + (x_3 * x_4)) + 38
				* (x_8 * x_9);
		x2._7 = (t & ((1 << 25) - 1));
		t = (t >> 25) + x2._8;
		x2._8 = (t & ((1 << 26) - 1));
		x2._9 += (t >> 26);
		return x2;
	}

	/*
	 * Calculates a reciprocal. The output is in reduced form, the inputs need
	 * not be. Simply calculates y = x^(p-2) so it's not too fast.
	 */
	/* When sqrtassist is true, it instead calculates y = x^((p-5)/8) */
	private static void recip(long10 y, long10 x, int sqrtassist) {
		long10 t0 = new long10(), t1 = new long10(), t2 = new long10(), t3 = new long10(), t4 = new long10();
		int i;
		/* the chain for x^(2^255-21) is straight from djb's implementation */
		sqr(t1, x); /* 2 == 2 * 1 */
		sqr(t2, t1); /* 4 == 2 * 2 */
		sqr(t0, t2); /* 8 == 2 * 4 */
		mul(t2, t0, x); /* 9 == 8 + 1 */
		mul(t0, t2, t1); /* 11 == 9 + 2 */
		sqr(t1, t0); /* 22 == 2 * 11 */
		mul(t3, t1, t2); /*
						 * 31 == 22 + 9 == 2^5 - 2^0
						 */
		sqr(t1, t3); /* 2^6 - 2^1 */
		sqr(t2, t1); /* 2^7 - 2^2 */
		sqr(t1, t2); /* 2^8 - 2^3 */
		sqr(t2, t1); /* 2^9 - 2^4 */
		sqr(t1, t2); /* 2^10 - 2^5 */
		mul(t2, t1, t3); /* 2^10 - 2^0 */
		sqr(t1, t2); /* 2^11 - 2^1 */
		sqr(t3, t1); /* 2^12 - 2^2 */
		for (i = 1; i < 5; i++) {
			sqr(t1, t3);
			sqr(t3, t1);
		} /* t3 *//* 2^20 - 2^10 */
		mul(t1, t3, t2); /* 2^20 - 2^0 */
		sqr(t3, t1); /* 2^21 - 2^1 */
		sqr(t4, t3); /* 2^22 - 2^2 */
		for (i = 1; i < 10; i++) {
			sqr(t3, t4);
			sqr(t4, t3);
		} /* t4 *//* 2^40 - 2^20 */
		mul(t3, t4, t1); /* 2^40 - 2^0 */
		for (i = 0; i < 5; i++) {
			sqr(t1, t3);
			sqr(t3, t1);
		} /* t3 *//* 2^50 - 2^10 */
		mul(t1, t3, t2); /* 2^50 - 2^0 */
		sqr(t2, t1); /* 2^51 - 2^1 */
		sqr(t3, t2); /* 2^52 - 2^2 */
		for (i = 1; i < 25; i++) {
			sqr(t2, t3);
			sqr(t3, t2);
		} /* t3 *//* 2^100 - 2^50 */
		mul(t2, t3, t1); /* 2^100 - 2^0 */
		sqr(t3, t2); /* 2^101 - 2^1 */
		sqr(t4, t3); /* 2^102 - 2^2 */
		for (i = 1; i < 50; i++) {
			sqr(t3, t4);
			sqr(t4, t3);
		} /* t4 *//* 2^200 - 2^100 */
		mul(t3, t4, t2); /* 2^200 - 2^0 */
		for (i = 0; i < 25; i++) {
			sqr(t4, t3);
			sqr(t3, t4);
		} /* t3 *//* 2^250 - 2^50 */
		mul(t2, t3, t1); /* 2^250 - 2^0 */
		sqr(t1, t2); /* 2^251 - 2^1 */
		sqr(t2, t1); /* 2^252 - 2^2 */
		if (sqrtassist != 0) {
			mul(y, x, t2); /* 2^252 - 3 */
		} else {
			sqr(t1, t2); /* 2^253 - 2^3 */
			sqr(t2, t1); /* 2^254 - 2^4 */
			sqr(t1, t2); /* 2^255 - 2^5 */
			mul(y, t1, t0); /* 2^255 - 21 */
		}
	}

	/* checks if x is "negative", requires reduced input */
	private static int is_negative(long10 x) {
		return (int) (((is_overflow(x) || (x._9 < 0)) ? 1 : 0) ^ (x._0 & 1));
	}

	/* a square root */
	private static void sqrt(long10 x, long10 u) {
		long10 v = new long10(), t1 = new long10(), t2 = new long10();
		add(t1, u, u); /* t1 = 2u */
		recip(v, t1, 1); /* v = (2u)^((p-5)/8) */
		sqr(x, v); /* x = v^2 */
		mul(t2, t1, x); /* t2 = 2uv^2 */
		t2._0--; /* t2 = 2uv^2-1 */
		mul(t1, v, t2); /* t1 = v(2uv^2-1) */
		mul(x, u, t1); /* x = uv(2uv^2-1) */
	}

	/********************* Elliptic curve *********************/

	/* y^2 = x^3 + 486662 x^2 + x over GF(2^255-19) */

	/*
	 * t1 = ax + az t2 = ax - az
	 */
	private static void mont_prep(long10 t1, long10 t2, long10 ax, long10 az) {
		add(t1, ax, az);
		sub(t2, ax, az);
	}

	/*
	 * A = P + Q where X(A) = ax/az X(P) = (t1+t2)/(t1-t2) X(Q) =
	 * (t3+t4)/(t3-t4) X(P-Q) = dx clobbers t1 and t2, preserves t3 and t4
	 */
	private static void mont_add(long10 t1, long10 t2, long10 t3, long10 t4,
			long10 ax, long10 az, long10 dx) {
		mul(ax, t2, t3);
		mul(az, t1, t4);
		add(t1, ax, az);
		sub(t2, ax, az);
		sqr(ax, t1);
		sqr(t1, t2);
		mul(az, t1, dx);
	}

	/*
	 * B = 2 * Q where X(B) = bx/bz X(Q) = (t3+t4)/(t3-t4) clobbers t1 and t2,
	 * preserves t3 and t4
	 */
	private static void mont_dbl(long10 t1, long10 t2, long10 t3, long10 t4,
			long10 bx, long10 bz) {
		sqr(t1, t3);
		sqr(t2, t4);
		mul(bx, t1, t2);
		sub(t2, t1, t2);
		mul_small(bz, t2, 121665);
		add(t1, t1, bz);
		mul(bz, t1, t2);
	}

	/*
	 * Y^2 = X^3 + 486662 X^2 + X t is a temporary
	 */
	private static void x_to_y2(long10 t, long10 y2, long10 x) {
		sqr(t, x);
		mul_small(y2, x, 486662);
		add(t, t, y2);
		t._0++;
		mul(y2, t, x);
	}

	/* P = kG and s = sign(P)/k */
	private static void core(byte[] Px, byte[] k, byte[] Gx) {
		long10 dx = new long10(), t1 = new long10(), t2 = new long10(), t3 = new long10(), t4 = new long10();
		long10[] x = new long10[] { new long10(), new long10() }, z = new long10[] {
				new long10(), new long10() };
		int i, j;

		/* unpack the base */
		if (Gx != null)
			unpack(dx, Gx);
		else
			set(dx, 9);

		/* 0G = point-at-infinity */
		set(x[0], 1);
		set(z[0], 0);

		/* 1G = G */
		cpy(x[1], dx);
		set(z[1], 1);

		for (i = 32; i-- != 0;) {
			if (i == 0) {
				i = 0;
			}
			for (j = 8; j-- != 0;) {
				/* swap arguments depending on bit */
				int bit1 = (k[i] & 0xFF) >> j & 1;
				int bit0 = ~(k[i] & 0xFF) >> j & 1;
				long10 ax = x[bit0];
				long10 az = z[bit0];
				long10 bx = x[bit1];
				long10 bz = z[bit1];

				/* a' = a + b */
				/* b' = 2 b */
				mont_prep(t1, t2, ax, az);
				mont_prep(t3, t4, bx, bz);
				mont_add(t1, t2, t3, t4, ax, az, dx);
				mont_dbl(t1, t2, t3, t4, bx, bz);
			}
		}

		recip(t1, z[0], 0);
		mul(dx, x[0], t1);
		pack(dx, Px);

	}

	/* constants 2Gy and 1/(2Gy) */
	private static final long10 BASE_2Y = new long10(39999547, 18689728,
			59995525, 1648697, 57546132, 24010086, 19059592, 5425144, 63499247,
			16420658);

}