25 files changed, 5498 insertions, 1042 deletions
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/AbstractECMultiplier.java b/bcprov/src/main/java/org/bouncycastle/math/ec/AbstractECMultiplier.java
new file mode 100644
index 0000000..69ab797
--- /dev/null
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/AbstractECMultiplier.java
@@ -0,0 +1,20 @@
+package org.bouncycastle.math.ec;
+
+import java.math.BigInteger;
+
+public abstract class AbstractECMultiplier implements ECMultiplier
+{
+    public ECPoint multiply(ECPoint p, BigInteger k)
+    {
+        int sign = k.signum();
+        if (sign == 0 || p.isInfinity())
+        {
+            return p.getCurve().getInfinity();
+        }
+
+        ECPoint positive = multiplyPositive(p, k.abs());
+        return sign > 0 ? positive : positive.negate();
+    }
+
+    protected abstract ECPoint multiplyPositive(ECPoint p, BigInteger k);
+}
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/DoubleAddMultiplier.java b/bcprov/src/main/java/org/bouncycastle/math/ec/DoubleAddMultiplier.java
new file mode 100644
index 0000000..aae2e00
--- /dev/null
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/DoubleAddMultiplier.java
@@ -0,0 +1,24 @@
+package org.bouncycastle.math.ec;
+
+import java.math.BigInteger;
+
+public class DoubleAddMultiplier extends AbstractECMultiplier
+{
+    /**
+     * Joye's double-add algorithm.
+     */
+    protected ECPoint multiplyPositive(ECPoint p, BigInteger k)
+    {
+        ECPoint[] R = new ECPoint[]{ p.getCurve().getInfinity(), p };
+
+        int n = k.bitLength();
+        for (int i = 0; i < n; ++i)
+        {
+            int b = k.testBit(i) ? 1 : 0;
+            int bp = 1 - b;
+            R[bp] = R[bp].twicePlus(R[b]);
+        }
+
+        return R[0];
+    }
+}
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/ECAlgorithms.java b/bcprov/src/main/java/org/bouncycastle/math/ec/ECAlgorithms.java
index 78a7a8f..d640b5f 100644
--- a/bcprov/src/main/java/org/bouncycastle/math/ec/ECAlgorithms.java
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/ECAlgorithms.java
@@ -7,16 +7,13 @@ public class ECAlgorithms
     public static ECPoint sumOfTwoMultiplies(ECPoint P, BigInteger a,
         ECPoint Q, BigInteger b)
     {
-        ECCurve c = P.getCurve();
-        if (!c.equals(Q.getCurve()))
-        {
-            throw new IllegalArgumentException("P and Q must be on same curve");
-        }
+        ECCurve cp = P.getCurve();
+        Q = importPoint(cp, Q);
 
         // Point multiplication for Koblitz curves (using WTNAF) beats Shamir's trick
-        if (c instanceof ECCurve.F2m)
+        if (cp instanceof ECCurve.F2m)
         {
-            ECCurve.F2m f2mCurve = (ECCurve.F2m)c;
+            ECCurve.F2m f2mCurve = (ECCurve.F2m)cp;
             if (f2mCurve.isKoblitz())
             {
                 return P.multiply(a).add(Q.multiply(b));
@@ -48,43 +45,83 @@ public class ECAlgorithms
     public static ECPoint shamirsTrick(ECPoint P, BigInteger k,
         ECPoint Q, BigInteger l)
     {
-        if (!P.getCurve().equals(Q.getCurve()))
+        ECCurve cp = P.getCurve();
+        Q = importPoint(cp, Q);
+
+        return implShamirsTrick(P, k, Q, l);
+    }
+
+    public static ECPoint importPoint(ECCurve c, ECPoint p)
+    {
+        ECCurve cp = p.getCurve();
+        if (!c.equals(cp))
         {
-            throw new IllegalArgumentException("P and Q must be on same curve");
+            throw new IllegalArgumentException("Point must be on the same curve");
         }
+        return c.importPoint(p);
+    }
 
-        return implShamirsTrick(P, k, Q, l);
+    static void implMontgomeryTrick(ECFieldElement[] zs, int off, int len)
+    {
+        /*
+         * Uses the "Montgomery Trick" to invert many field elements, with only a single actual
+         * field inversion. See e.g. the paper:
+         * "Fast Multi-scalar Multiplication Methods on Elliptic Curves with Precomputation Strategy Using Montgomery Trick"
+         * by Katsuyuki Okeya, Kouichi Sakurai.
+         */
+
+        ECFieldElement[] c = new ECFieldElement[len];
+        c[0] = zs[off];
+
+        int i = 0;
+        while (++i < len)
+        {
+            c[i] = c[i - 1].multiply(zs[off + i]);
+        }
+
+        ECFieldElement u = c[--i].invert();
+
+        while (i > 0)
+        {
+            int j = off + i--;
+            ECFieldElement tmp = zs[j];
+            zs[j] = c[i].multiply(u);
+            u = u.multiply(tmp);
+        }
+
+        zs[off] = u;
     }
 
-    private static ECPoint implShamirsTrick(ECPoint P, BigInteger k,
+    static ECPoint implShamirsTrick(ECPoint P, BigInteger k,
         ECPoint Q, BigInteger l)
     {
-        int m = Math.max(k.bitLength(), l.bitLength());
-        ECPoint Z = P.add(Q);
-        ECPoint R = P.getCurve().getInfinity();
+        ECCurve curve = P.getCurve();
+        ECPoint infinity = curve.getInfinity();
+
+        // TODO conjugate co-Z addition (ZADDC) can return both of these
+        ECPoint PaddQ = P.add(Q);
+        ECPoint PsubQ = P.subtract(Q);
+
+        ECPoint[] points = new ECPoint[]{ Q, PsubQ, P, PaddQ };
+        curve.normalizeAll(points);
 
-        for (int i = m - 1; i >= 0; --i)
+        ECPoint[] table = new ECPoint[] {
+            points[3].negate(), points[2].negate(), points[1].negate(),
+            points[0].negate(), infinity, points[0],
+            points[1], points[2], points[3] };
+
+        byte[] jsf = WNafUtil.generateJSF(k, l);
+
+        ECPoint R = infinity;
+
+        int i = jsf.length;
+        while (--i >= 0)
         {
-            R = R.twice();
+            int jsfi = jsf[i];
+            int kDigit = (jsfi >> 4), lDigit = ((jsfi << 28) >> 28);
 
-            if (k.testBit(i))
-            {
-                if (l.testBit(i))
-                {
-                    R = R.add(Z);
-                }
-                else
-                {
-                    R = R.add(P);
-                }
-            }
-            else
-            {
-                if (l.testBit(i))
-                {
-                    R = R.add(Q);
-                }
-            }
+            int index = 4 + (kDigit * 3) + lDigit;
+            R = R.twicePlus(table[index]);
         }
 
         return R;
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/ECCurve.java b/bcprov/src/main/java/org/bouncycastle/math/ec/ECCurve.java
index 58281af..19f0062 100644
--- a/bcprov/src/main/java/org/bouncycastle/math/ec/ECCurve.java
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/ECCurve.java
@@ -3,18 +3,199 @@ package org.bouncycastle.math.ec;
 import java.math.BigInteger;
 import java.util.Random;
 
+import org.bouncycastle.util.BigIntegers;
+
 /**
  * base class for an elliptic curve
  */
 public abstract class ECCurve
 {
-    ECFieldElement a, b;
+    public static final int COORD_AFFINE = 0;
+    public static final int COORD_HOMOGENEOUS = 1;
+    public static final int COORD_JACOBIAN = 2;
+    public static final int COORD_JACOBIAN_CHUDNOVSKY = 3;
+    public static final int COORD_JACOBIAN_MODIFIED = 4;
+    public static final int COORD_LAMBDA_AFFINE = 5;
+    public static final int COORD_LAMBDA_PROJECTIVE = 6;
+    public static final int COORD_SKEWED = 7;
+
+    public static int[] getAllCoordinateSystems()
+    {
+        return new int[]{ COORD_AFFINE, COORD_HOMOGENEOUS, COORD_JACOBIAN, COORD_JACOBIAN_CHUDNOVSKY,
+            COORD_JACOBIAN_MODIFIED, COORD_LAMBDA_AFFINE, COORD_LAMBDA_PROJECTIVE, COORD_SKEWED };
+    }
+
+    public class Config
+    {
+        protected int coord;
+        protected ECMultiplier multiplier;
+
+        Config(int coord, ECMultiplier multiplier)
+        {
+            this.coord = coord;
+            this.multiplier = multiplier;
+        }
+
+        public Config setCoordinateSystem(int coord)
+        {
+            this.coord = coord;
+            return this;
+        }
+
+        public Config setMultiplier(ECMultiplier multiplier)
+        {
+            this.multiplier = multiplier;
+            return this;
+        }
+
+        public ECCurve create()
+        {
+            if (!supportsCoordinateSystem(coord))
+            {
+                throw new IllegalStateException("unsupported coordinate system");
+            }
+
+            ECCurve c = cloneCurve();
+            if (c == ECCurve.this)
+            {
+                throw new IllegalStateException("implementation returned current curve");
+            }
+
+            c.coord = coord;
+            c.multiplier = multiplier;
+
+            return c;
+        }
+    }
+
+    protected ECFieldElement a, b;
+    protected int coord = COORD_AFFINE;
+    protected ECMultiplier multiplier = null;
 
     public abstract int getFieldSize();
 
     public abstract ECFieldElement fromBigInteger(BigInteger x);
 
-    public abstract ECPoint createPoint(BigInteger x, BigInteger y, boolean withCompression);
+    public Config configure()
+    {
+        return new Config(this.coord, this.multiplier);
+    }
+
+    public ECPoint createPoint(BigInteger x, BigInteger y)
+    {
+        return createPoint(x, y, false);
+    }
+
+    /**
+     * @deprecated per-point compression property will be removed, use {@link #createPoint(BigInteger, BigInteger)}
+     * and refer {@link ECPoint#getEncoded(boolean)}
+     */
+    public ECPoint createPoint(BigInteger x, BigInteger y, boolean withCompression)
+    {
+        return createRawPoint(fromBigInteger(x), fromBigInteger(y), withCompression);
+    }
+
+    protected abstract ECCurve cloneCurve();
+
+    protected abstract ECPoint createRawPoint(ECFieldElement x, ECFieldElement y, boolean withCompression);
+
+    protected ECMultiplier createDefaultMultiplier()
+    {
+        return new WNafL2RMultiplier();
+    }
+
+    public boolean supportsCoordinateSystem(int coord)
+    {
+        return coord == COORD_AFFINE;
+    }
+
+    public PreCompInfo getPreCompInfo(ECPoint p)
+    {
+        checkPoint(p);
+        return p.preCompInfo;
+    }
+
+    /**
+     * Sets the <code>PreCompInfo</code> for a point on this curve. Used by
+     * <code>ECMultiplier</code>s to save the precomputation for this <code>ECPoint</code> for use
+     * by subsequent multiplication.
+     * 
+     * @param point
+     *            The <code>ECPoint</code> to store precomputations for.
+     * @param preCompInfo
+     *            The values precomputed by the <code>ECMultiplier</code>.
+     */
+    public void setPreCompInfo(ECPoint point, PreCompInfo preCompInfo)
+    {
+        checkPoint(point);
+        point.preCompInfo = preCompInfo;
+    }
+
+    public ECPoint importPoint(ECPoint p)
+    {
+        if (this == p.getCurve())
+        {
+            return p;
+        }
+        if (p.isInfinity())
+        {
+            return getInfinity();
+        }
+
+        // TODO Default behaviour could be improved if the two curves have the same coordinate system by copying any Z coordinates.
+        p = p.normalize();
+
+        return createPoint(p.getXCoord().toBigInteger(), p.getYCoord().toBigInteger(), p.withCompression);
+    }
+
+    /**
+     * Normalization ensures that any projective coordinate is 1, and therefore that the x, y
+     * coordinates reflect those of the equivalent point in an affine coordinate system. Where more
+     * than one point is to be normalized, this method will generally be more efficient than
+     * normalizing each point separately.
+     * 
+     * @param points
+     *            An array of points that will be updated in place with their normalized versions,
+     *            where necessary
+     */
+    public void normalizeAll(ECPoint[] points)
+    {
+        checkPoints(points);
+
+        if (this.getCoordinateSystem() == ECCurve.COORD_AFFINE)
+        {
+            return;
+        }
+
+        /*
+         * Figure out which of the points actually need to be normalized
+         */
+        ECFieldElement[] zs = new ECFieldElement[points.length];
+        int[] indices = new int[points.length];
+        int count = 0;
+        for (int i = 0; i < points.length; ++i)
+        {
+            ECPoint p = points[i];
+            if (null != p && !p.isNormalized())
+            {
+                zs[count] = p.getZCoord(0);
+                indices[count++] = i;
+            }
+        }
+
+        if (count == 0)
+        {
+            return;
+        }
+
+        ECAlgorithms.implMontgomeryTrick(zs, 0, count);
+
+        for (int j = 0; j < count; ++j)
+        {
+            int index = indices[j];
+            points[index] = points[index].normalize(zs[j]);
+        }
+    }
 
     public abstract ECPoint getInfinity();
 
@@ -28,9 +209,26 @@ public abstract class ECCurve
         return b;
     }
 
+    public int getCoordinateSystem()
+    {
+        return coord;
+    }
+
     protected abstract ECPoint decompressPoint(int yTilde, BigInteger X1);
 
     /**
+     * Sets the default <code>ECMultiplier</code>, unless already set. 
+     */
+    public ECMultiplier getMultiplier()
+    {
+        if (this.multiplier == null)
+        {
+            this.multiplier = createDefaultMultiplier();
+        }
+        return this.multiplier;
+    }
+
+    /**
      * Decode a point on this curve from its ASN.1 encoding. The different
      * encodings are taken account of, including point compression for
      * <code>F<sub>p</sub></code> (X9.62 s 4.2.1 pg 17).
@@ -62,9 +260,9 @@ public abstract class ECCurve
             }
 
             int yTilde = encoded[0] & 1;
-            BigInteger X1 = fromArray(encoded, 1, expectedLength);
+            BigInteger X = BigIntegers.fromUnsignedByteArray(encoded, 1, expectedLength);
 
-            p = decompressPoint(yTilde, X1);
+            p = decompressPoint(yTilde, X);
             break;
         }
         case 0x04: // uncompressed
@@ -76,10 +274,10 @@ public abstract class ECCurve
                 throw new IllegalArgumentException("Incorrect length for uncompressed/hybrid encoding");
             }
 
-            BigInteger X1 = fromArray(encoded, 1, expectedLength);
-            BigInteger Y1 = fromArray(encoded, 1 + expectedLength, expectedLength);
+            BigInteger X = BigIntegers.fromUnsignedByteArray(encoded, 1, expectedLength);
+            BigInteger Y = BigIntegers.fromUnsignedByteArray(encoded, 1 + expectedLength, expectedLength);
 
-            p = createPoint(X1, Y1, false);
+            p = createPoint(X, Y);
             break;
         }
         default:
@@ -89,11 +287,29 @@ public abstract class ECCurve
         return p;
     }
 
-    private static BigInteger fromArray(byte[] buf, int off, int length)
+    protected void checkPoint(ECPoint point)
     {
-        byte[] mag = new byte[length];
-        System.arraycopy(buf, off, mag, 0, length);
-        return new BigInteger(1, mag);
+        if (null == point || (this != point.getCurve()))
+        {
+            throw new IllegalArgumentException("'point' must be non-null and on this curve");
+        }
+    }
+
+    protected void checkPoints(ECPoint[] points)
+    {
+        if (points == null)
+        {
+            throw new IllegalArgumentException("'points' cannot be null");
+        }
+
+        for (int i = 0; i < points.length; ++i)
+        {
+            ECPoint point = points[i];
+            if (null != point && this != point.getCurve())
+            {
+                throw new IllegalArgumentException("'points' entries must be null or on this curve");
+            }
+        }
     }
 
     /**
@@ -101,15 +317,50 @@ public abstract class ECCurve
      */
     public static class Fp extends ECCurve
     {
-        BigInteger q;
+        private static final int FP_DEFAULT_COORDS = COORD_JACOBIAN_MODIFIED;
+
+        BigInteger q, r;
         ECPoint.Fp infinity;
 
         public Fp(BigInteger q, BigInteger a, BigInteger b)
         {
             this.q = q;
+            this.r = ECFieldElement.Fp.calculateResidue(q);
+            this.infinity = new ECPoint.Fp(this, null, null);
+
             this.a = fromBigInteger(a);
             this.b = fromBigInteger(b);
+            this.coord = FP_DEFAULT_COORDS;
+        }
+
+        protected Fp(BigInteger q, BigInteger r, ECFieldElement a, ECFieldElement b)
+        {
+            this.q = q;
+            this.r = r;
             this.infinity = new ECPoint.Fp(this, null, null);
+
+            this.a = a;
+            this.b = b;
+            this.coord = FP_DEFAULT_COORDS;
+        }
+
+        protected ECCurve cloneCurve()
+        {
+            return new Fp(q, r, a, b);
+        }
+
+        public boolean supportsCoordinateSystem(int coord)
+        {
+            switch (coord)
+            {
+            case COORD_AFFINE:
+            case COORD_HOMOGENEOUS:
+            case COORD_JACOBIAN:
+            case COORD_JACOBIAN_MODIFIED:
+                return true;
+            default:
+                return false;
+            }
         }
 
         public BigInteger getQ()
@@ -124,12 +375,34 @@ public abstract class ECCurve
 
         public ECFieldElement fromBigInteger(BigInteger x)
         {
-            return new ECFieldElement.Fp(this.q, x);
+            return new ECFieldElement.Fp(this.q, this.r, x);
         }
 
-        public ECPoint createPoint(BigInteger x, BigInteger y, boolean withCompression)
+        protected ECPoint createRawPoint(ECFieldElement x, ECFieldElement y, boolean withCompression)
         {
-            return new ECPoint.Fp(this, fromBigInteger(x), fromBigInteger(y), withCompression);
+            return new ECPoint.Fp(this, x, y, withCompression);
+        }
+
+        public ECPoint importPoint(ECPoint p)
+        {
+            if (this != p.getCurve() && this.getCoordinateSystem() == COORD_JACOBIAN && !p.isInfinity())
+            {
+                switch (p.getCurve().getCoordinateSystem())
+                {
+                case COORD_JACOBIAN:
+                case COORD_JACOBIAN_CHUDNOVSKY:
+                case COORD_JACOBIAN_MODIFIED:
+                    return new ECPoint.Fp(this,
+                        fromBigInteger(p.x.toBigInteger()),
+                        fromBigInteger(p.y.toBigInteger()),
+                        new ECFieldElement[]{ fromBigInteger(p.zs[0].toBigInteger()) },
+                        p.withCompression);
+                default:
+                    break;
+                }
+            }
+
+            return super.importPoint(p);
         }
 
         protected ECPoint decompressPoint(int yTilde, BigInteger X1)
@@ -148,9 +421,7 @@ public abstract class ECCurve
             }
 
             BigInteger betaValue = beta.toBigInteger();
-            int bit0 = betaValue.testBit(0) ? 1 : 0;
-
-            if (bit0 != yTilde)
+            if (betaValue.testBit(0) != (yTilde == 1))
             {
                 // Use the other root
                 beta = fromBigInteger(q.subtract(betaValue));
@@ -195,6 +466,8 @@ public abstract class ECCurve
      */
     public static class F2m extends ECCurve
     {
+        private static final int F2M_DEFAULT_COORDS = COORD_AFFINE;
+
         /**
          * The exponent <code>m</code> of <code>F<sub>2<sup>m</sup></sub></code>.
          */
@@ -401,9 +674,53 @@ public abstract class ECCurve
                 }
             }
 
+            this.infinity = new ECPoint.F2m(this, null, null);
             this.a = fromBigInteger(a);
             this.b = fromBigInteger(b);
+            this.coord = F2M_DEFAULT_COORDS;
+        }
+
+        protected F2m(int m, int k1, int k2, int k3, ECFieldElement a, ECFieldElement b, BigInteger n, BigInteger h)
+        {
+            this.m = m;
+            this.k1 = k1;
+            this.k2 = k2;
+            this.k3 = k3;
+            this.n = n;
+            this.h = h;
+
             this.infinity = new ECPoint.F2m(this, null, null);
+            this.a = a;
+            this.b = b;
+            this.coord = F2M_DEFAULT_COORDS;
+        }
+
+        protected ECCurve cloneCurve()
+        {
+            return new F2m(m, k1, k2, k3, a, b, n, h);
+        }
+
+        public boolean supportsCoordinateSystem(int coord)
+        {
+            switch (coord)
+            {
+            case COORD_AFFINE:
+            case COORD_HOMOGENEOUS:
+            case COORD_LAMBDA_PROJECTIVE:
+                return true;
+            default:
+                return false;
+            }
+        }
+
+        protected ECMultiplier createDefaultMultiplier()
+        {
+            if (isKoblitz())
+            {
+                return new WTauNafMultiplier();
+            }
+
+            return super.createDefaultMultiplier();
         }
 
         public int getFieldSize()
@@ -418,7 +735,32 @@ public abstract class ECCurve
 
         public ECPoint createPoint(BigInteger x, BigInteger y, boolean withCompression)
         {
-            return new ECPoint.F2m(this, fromBigInteger(x), fromBigInteger(y), withCompression);
+            ECFieldElement X = fromBigInteger(x), Y = fromBigInteger(y);
+
+            switch (this.getCoordinateSystem())
+            {
+            case COORD_LAMBDA_AFFINE:
+            case COORD_LAMBDA_PROJECTIVE:
+            {
+                if (!X.isZero())
+                {
+                    // Y becomes Lambda (X + Y/X) here
+                    Y = Y.divide(X).add(X);
+                }
+                break;
+            }
+            default:
+            {
+                break;
+            }
+            }
+
+            return createRawPoint(X, Y, withCompression);
+        }
+
+        protected ECPoint createRawPoint(ECFieldElement x, ECFieldElement y, boolean withCompression)
+        {
+            return new ECPoint.F2m(this, x, y, withCompression);
         }
 
         public ECPoint getInfinity()
@@ -432,10 +774,7 @@ public abstract class ECCurve
          */
         public boolean isKoblitz()
         {
-            return ((n != null) && (h != null) &&
-                    ((a.toBigInteger().equals(ECConstants.ZERO)) ||
-                    (a.toBigInteger().equals(ECConstants.ONE))) &&
-                    (b.toBigInteger().equals(ECConstants.ONE)));
+            return n != null && h != null && a.bitLength() <= 1 && b.bitLength() == 1;
         }
 
         /**
@@ -480,7 +819,7 @@ public abstract class ECCurve
         {
             ECFieldElement xp = fromBigInteger(X1);
             ECFieldElement yp = null;
-            if (xp.toBigInteger().equals(ECConstants.ZERO))
+            if (xp.isZero())
             {
                 yp = (ECFieldElement.F2m)b;
                 for (int i = 0; i < m - 1; i++)
@@ -491,17 +830,31 @@ public abstract class ECCurve
             else
             {
                 ECFieldElement beta = xp.add(a).add(b.multiply(xp.square().invert()));
-                ECFieldElement z = solveQuadradicEquation(beta);
+                ECFieldElement z = solveQuadraticEquation(beta);
                 if (z == null)
                 {
                     throw new IllegalArgumentException("Invalid point compression");
                 }
-                int zBit = z.toBigInteger().testBit(0) ? 1 : 0;
-                if (zBit != yTilde)
+                if (z.testBitZero() != (yTilde == 1))
                 {
-                    z = z.add(fromBigInteger(ECConstants.ONE));
+                    z = z.addOne();
                 }
+
                 yp = xp.multiply(z);
+
+                switch (this.getCoordinateSystem())
+                {
+                case COORD_LAMBDA_AFFINE:
+                case COORD_LAMBDA_PROJECTIVE:
+                {
+                    yp = yp.divide(xp).add(xp);
+                    break;
+                }
+                default:
+                {
+                    break;
+                }
+                }
             }
 
             return new ECPoint.F2m(this, xp, yp, true);
@@ -512,28 +865,26 @@ public abstract class ECCurve
          * D.1.6) The other solution is <code>z + 1</code>.
          * 
          * @param beta
-         *            The value to solve the qradratic equation for.
+         *            The value to solve the quadratic equation for.
          * @return the solution for <code>z<sup>2</sup> + z = beta</code> or
          *         <code>null</code> if no solution exists.
          */
-        private ECFieldElement solveQuadradicEquation(ECFieldElement beta)
+        private ECFieldElement solveQuadraticEquation(ECFieldElement beta)
         {
-            ECFieldElement zeroElement = new ECFieldElement.F2m(
-                    this.m, this.k1, this.k2, this.k3, ECConstants.ZERO);
-
-            if (beta.toBigInteger().equals(ECConstants.ZERO))
+            if (beta.isZero())
             {
-                return zeroElement;
+                return beta;
             }
 
+            ECFieldElement zeroElement = fromBigInteger(ECConstants.ZERO);
+
             ECFieldElement z = null;
-            ECFieldElement gamma = zeroElement;
+            ECFieldElement gamma = null;
 
             Random rand = new Random();
             do
             {
-                ECFieldElement t = new ECFieldElement.F2m(this.m, this.k1,
-                        this.k2, this.k3, new BigInteger(m, rand));
+                ECFieldElement t = fromBigInteger(new BigInteger(m, rand));
                 z = zeroElement;
                 ECFieldElement w = beta;
                 for (int i = 1; i <= m - 1; i++)
@@ -542,13 +893,13 @@ public abstract class ECCurve
                     z = z.square().add(w2.multiply(t));
                     w = w2.add(beta);
                 }
-                if (!w.toBigInteger().equals(ECConstants.ZERO))
+                if (!w.isZero())
                 {
                     return null;
                 }
                 gamma = z.square().add(z);
             }
-            while (gamma.toBigInteger().equals(ECConstants.ZERO));
+            while (gamma.isZero());
 
             return z;
         }
@@ -567,7 +918,7 @@ public abstract class ECCurve
             }
 
             ECCurve.F2m other = (ECCurve.F2m)anObject;
-            
+
             return (this.m == other.m) && (this.k1 == other.k1)
                 && (this.k2 == other.k2) && (this.k3 == other.k3)
                 && a.equals(other.a) && b.equals(other.b);
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/ECFieldElement.java b/bcprov/src/main/java/org/bouncycastle/math/ec/ECFieldElement.java
index b5e9aa5..87608eb 100644
--- a/bcprov/src/main/java/org/bouncycastle/math/ec/ECFieldElement.java
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/ECFieldElement.java
@@ -3,14 +3,17 @@ package org.bouncycastle.math.ec;
 import java.math.BigInteger;
 import java.util.Random;
 
+import org.bouncycastle.util.Arrays;
+import org.bouncycastle.util.BigIntegers;
+
 public abstract class ECFieldElement
     implements ECConstants
 {
-
     public abstract BigInteger     toBigInteger();
     public abstract String         getFieldName();
     public abstract int            getFieldSize();
     public abstract ECFieldElement add(ECFieldElement b);
+    public abstract ECFieldElement addOne();
     public abstract ECFieldElement subtract(ECFieldElement b);
     public abstract ECFieldElement multiply(ECFieldElement b);
     public abstract ECFieldElement divide(ECFieldElement b);
@@ -19,27 +22,99 @@ public abstract class ECFieldElement
     public abstract ECFieldElement invert();
     public abstract ECFieldElement sqrt();
 
+    public int bitLength()
+    {
+        return toBigInteger().bitLength();
+    }
+
+    public boolean isZero()
+    {
+        return 0 == toBigInteger().signum();
+    }
+
+    public boolean testBitZero()
+    {
+        return toBigInteger().testBit(0);
+    }
+
     public String toString()
     {
-        return this.toBigInteger().toString(2);
+        return this.toBigInteger().toString(16);
+    }
+
+    public byte[] getEncoded()
+    {
+        return BigIntegers.asUnsignedByteArray((getFieldSize() + 7) / 8, toBigInteger());
     }
 
     public static class Fp extends ECFieldElement
     {
-        BigInteger x;
+        BigInteger q, r, x;
 
-        BigInteger q;
-        
+//        static int[] calculateNaf(BigInteger p)
+//        {
+//            int[] naf = WNafUtil.generateCompactNaf(p);
+//
+//            int bit = 0;
+//            for (int i = 0; i < naf.length; ++i)
+//            {
+//                int ni = naf[i];
+//                int digit = ni >> 16, zeroes = ni & 0xFFFF;
+//
+//                bit += zeroes;
+//                naf[i] = digit < 0 ? ~bit : bit;
+//                ++bit;
+//            }
+//
+//            int last = naf.length - 1;
+//            if (last > 0 && last <= 16)
+//            {
+//                int top = naf[last], top2 = naf[last - 1];
+//                if (top2 < 0)
+//                {
+//                    top2 = ~top2;
+//                }
+//                if (top - top2 >= 64)
+//                {
+//                    return naf;
+//                }
+//            }
+//
+//            return null;
+//        }
+
+        static BigInteger calculateResidue(BigInteger p)
+        {
+            int bitLength = p.bitLength();
+            if (bitLength > 128)
+            {
+                BigInteger firstWord = p.shiftRight(bitLength - 64);
+                if (firstWord.longValue() == -1L)
+                {
+                    return ONE.shiftLeft(bitLength).subtract(p);
+                }
+            }
+            return null;
+        }
+
+        /**
+         * @deprecated Use ECCurve.fromBigInteger to construct field elements
+         */
         public Fp(BigInteger q, BigInteger x)
         {
-            this.x = x;
-            
-            if (x.compareTo(q) >= 0)
+            this(q, calculateResidue(q), x);
+        }
+
+        Fp(BigInteger q, BigInteger r, BigInteger x)
+        {
+            if (x == null || x.signum() < 0 || x.compareTo(q) >= 0)
             {
-                throw new IllegalArgumentException("x value too large in field element");
+                throw new IllegalArgumentException("x value invalid in Fp field element");
             }
 
             this.q = q;
+            this.r = r;
+            this.x = x;
         }
 
         public BigInteger toBigInteger()
@@ -66,40 +141,70 @@ public abstract class ECFieldElement
         {
             return q;
         }
-        
+
         public ECFieldElement add(ECFieldElement b)
         {
-            return new Fp(q, x.add(b.toBigInteger()).mod(q));
+            return new Fp(q, r, modAdd(x, b.toBigInteger()));
+        }
+
+        public ECFieldElement addOne()
+        {
+            BigInteger x2 = x.add(ECConstants.ONE);
+            if (x2.compareTo(q) == 0)
+            {
+                x2 = ECConstants.ZERO;
+            }
+            return new Fp(q, r, x2);
         }
 
         public ECFieldElement subtract(ECFieldElement b)
         {
-            return new Fp(q, x.subtract(b.toBigInteger()).mod(q));
+            BigInteger x2 = b.toBigInteger();
+            BigInteger x3 = x.subtract(x2);
+            if (x3.signum() < 0)
+            {
+                x3 = x3.add(q);
+            }
+            return new Fp(q, r, x3);
         }
 
         public ECFieldElement multiply(ECFieldElement b)
         {
-            return new Fp(q, x.multiply(b.toBigInteger()).mod(q));
+            return new Fp(q, r, modMult(x, b.toBigInteger()));
         }
 
         public ECFieldElement divide(ECFieldElement b)
         {
-            return new Fp(q, x.multiply(b.toBigInteger().modInverse(q)).mod(q));
+            return new Fp(q, modMult(x, b.toBigInteger().modInverse(q)));
         }
 
         public ECFieldElement negate()
         {
-            return new Fp(q, x.negate().mod(q));
+            BigInteger x2;
+            if (x.signum() == 0)
+            {
+                x2 = x;
+            }
+            else if (ONE.equals(r))
+            {
+                x2 = q.xor(x);
+            }
+            else
+            {
+                x2 = q.subtract(x);
+            }
+            return new Fp(q, r, x2);
         }
 
         public ECFieldElement square()
         {
-            return new Fp(q, x.multiply(x).mod(q));
+            return new Fp(q, r, modMult(x, x));
         }
 
         public ECFieldElement invert()
         {
-            return new Fp(q, x.modInverse(q));
+            // TODO Modular inversion can be faster for a (Generalized) Mersenne Prime.
+            return new Fp(q, r, x.modInverse(q));
         }
 
         // D.1.4 91
@@ -120,7 +225,7 @@ public abstract class ECFieldElement
             if (q.testBit(1))
             {
                 // z = g^(u+1) + p, p = 4u + 3
-                ECFieldElement z = new Fp(q, x.modPow(q.shiftRight(2).add(ECConstants.ONE), q));
+                ECFieldElement z = new Fp(q, r, x.modPow(q.shiftRight(2).add(ECConstants.ONE), q));
 
                 return z.square().equals(this) ? z : null;
             }
@@ -138,7 +243,7 @@ public abstract class ECFieldElement
             BigInteger k = u.shiftLeft(1).add(ECConstants.ONE);
 
             BigInteger Q = this.x;
-            BigInteger fourQ = Q.shiftLeft(2).mod(q);
+            BigInteger fourQ = modDouble(modDouble(Q));
 
             BigInteger U, V;
             Random rand = new Random();
@@ -152,11 +257,11 @@ public abstract class ECFieldElement
                 while (P.compareTo(q) >= 0
                     || !(P.multiply(P).subtract(fourQ).modPow(legendreExponent, q).equals(qMinusOne)));
 
-                BigInteger[] result = lucasSequence(q, P, Q, k);
+                BigInteger[] result = lucasSequence(P, Q, k);
                 U = result[0];
                 V = result[1];
 
-                if (V.multiply(V).mod(q).equals(fourQ))
+                if (modMult(V, V).equals(fourQ))
                 {
                     // Integer division by 2, mod q
                     if (V.testBit(0))
@@ -168,7 +273,7 @@ public abstract class ECFieldElement
 
                     //assert V.multiply(V).mod(q).equals(x);
 
-                    return new ECFieldElement.Fp(q, V);
+                    return new ECFieldElement.Fp(q, r, V);
                 }
             }
             while (U.equals(ECConstants.ONE) || U.equals(qMinusOne));
@@ -230,8 +335,7 @@ public abstract class ECFieldElement
 //            return r.multiply(r).multiply(x.modPow(q.subtract(ECConstants.TWO), q)).subtract(ECConstants.TWO).mod(p);
 //        }
 
-        private static BigInteger[] lucasSequence(
-            BigInteger  p,
+        private BigInteger[] lucasSequence(
             BigInteger  P,
             BigInteger  Q,
             BigInteger  k)
@@ -247,40 +351,122 @@ public abstract class ECFieldElement
 
             for (int j = n - 1; j >= s + 1; --j)
             {
-                Ql = Ql.multiply(Qh).mod(p);
+                Ql = modMult(Ql, Qh);
 
                 if (k.testBit(j))
                 {
-                    Qh = Ql.multiply(Q).mod(p);
-                    Uh = Uh.multiply(Vh).mod(p);
-                    Vl = Vh.multiply(Vl).subtract(P.multiply(Ql)).mod(p);
-                    Vh = Vh.multiply(Vh).subtract(Qh.shiftLeft(1)).mod(p);
+                    Qh = modMult(Ql, Q);
+                    Uh = modMult(Uh, Vh);
+                    Vl = modReduce(Vh.multiply(Vl).subtract(P.multiply(Ql)));
+                    Vh = modReduce(Vh.multiply(Vh).subtract(Qh.shiftLeft(1)));
                 }
                 else
                 {
                     Qh = Ql;
-                    Uh = Uh.multiply(Vl).subtract(Ql).mod(p);
-                    Vh = Vh.multiply(Vl).subtract(P.multiply(Ql)).mod(p);
-                    Vl = Vl.multiply(Vl).subtract(Ql.shiftLeft(1)).mod(p);
+                    Uh = modReduce(Uh.multiply(Vl).subtract(Ql));
+                    Vh = modReduce(Vh.multiply(Vl).subtract(P.multiply(Ql)));
+                    Vl = modReduce(Vl.multiply(Vl).subtract(Ql.shiftLeft(1)));
                 }
             }
 
-            Ql = Ql.multiply(Qh).mod(p);
-            Qh = Ql.multiply(Q).mod(p);
-            Uh = Uh.multiply(Vl).subtract(Ql).mod(p);
-            Vl = Vh.multiply(Vl).subtract(P.multiply(Ql)).mod(p);
-            Ql = Ql.multiply(Qh).mod(p);
+            Ql = modMult(Ql, Qh);
+            Qh = modMult(Ql, Q);
+            Uh = modReduce(Uh.multiply(Vl).subtract(Ql));
+            Vl = modReduce(Vh.multiply(Vl).subtract(P.multiply(Ql)));
+            Ql = modMult(Ql, Qh);
 
             for (int j = 1; j <= s; ++j)
             {
-                Uh = Uh.multiply(Vl).mod(p);
-                Vl = Vl.multiply(Vl).subtract(Ql.shiftLeft(1)).mod(p);
-                Ql = Ql.multiply(Ql).mod(p);
+                Uh = modMult(Uh, Vl);
+                Vl = modReduce(Vl.multiply(Vl).subtract(Ql.shiftLeft(1)));
+                Ql = modMult(Ql, Ql);
             }
 
             return new BigInteger[]{ Uh, Vl };
         }
-        
+
+        protected BigInteger modAdd(BigInteger x1, BigInteger x2)
+        {
+            BigInteger x3 = x1.add(x2);
+            if (x3.compareTo(q) >= 0)
+            {
+                x3 = x3.subtract(q);
+            }
+            return x3;
+        }
+
+        protected BigInteger modDouble(BigInteger x)
+        {
+            BigInteger _2x = x.shiftLeft(1);
+            if (_2x.compareTo(q) >= 0)
+            {
+                _2x = _2x.subtract(q);
+            }
+            return _2x;
+        }
+
+        protected BigInteger modMult(BigInteger x1, BigInteger x2)
+        {
+            return modReduce(x1.multiply(x2));
+        }
+
+        protected BigInteger modReduce(BigInteger x)
+        {
+//            if (naf != null)
+//            {
+//                int last = naf.length - 1;
+//                int bits = naf[last];
+//                while (x.bitLength() > (bits + 1))
+//                {
+//                    BigInteger u = x.shiftRight(bits);
+//                    BigInteger v = x.subtract(u.shiftLeft(bits));
+//
+//                    x = v;
+//
+//                    for (int i = 0; i < last; ++i)
+//                    {
+//                        int ni = naf[i];
+//                        if (ni < 0)
+//                        {
+//                            x = x.add(u.shiftLeft(~ni));
+//                        }
+//                        else
+//                        {
+//                            x = x.subtract(u.shiftLeft(ni));
+//                        }
+//                    }
+//                }
+//                while (x.compareTo(q) >= 0)
+//                {
+//                    x = x.subtract(q);
+//                }
+//            }
+//            else
+            if (r != null)
+            {
+                int qLen = q.bitLength();
+                while (x.bitLength() > (qLen + 1))
+                {
+                    BigInteger u = x.shiftRight(qLen);
+                    BigInteger v = x.subtract(u.shiftLeft(qLen));
+                    if (!r.equals(ONE))
+                    {
+                        u = u.multiply(r);
+                    }
+                    x = u.add(v); 
+                }
+                while (x.compareTo(q) >= 0)
+                {
+                    x = x.subtract(q);
+                }
+            }
+            else
+            {
+                x = x.mod(q);
+            }
+            return x;
+        }
+
         public boolean equals(Object other)
         {
             if (other == this)
@@ -669,7 +855,7 @@ public abstract class ECFieldElement
 //                g1z = g1z.xor(g2z.shiftLeft(j));
 ////                if (g1z.bitLength() > this.m) {
 ////                    throw new ArithmeticException(
-////                            "deg(g1z) >= m, g1z = " + g1z.toString(2));
+////                            "deg(g1z) >= m, g1z = " + g1z.toString(16));
 ////                }
 //            }
 //            return new ECFieldElement.F2m(
@@ -801,41 +987,38 @@ public abstract class ECFieldElement
          */
         private int m;
 
-        /**
-         * TPB: The integer <code>k</code> where <code>x<sup>m</sup> +
-         * x<sup>k</sup> + 1</code> represents the reduction polynomial
-         * <code>f(z)</code>.<br>
-         * PPB: The integer <code>k1</code> where <code>x<sup>m</sup> +
-         * x<sup>k3</sup> + x<sup>k2</sup> + x<sup>k1</sup> + 1</code>
-         * represents the reduction polynomial <code>f(z)</code>.<br>
-         */
-        private int k1;
-
-        /**
-         * TPB: Always set to <code>0</code><br>
-         * PPB: The integer <code>k2</code> where <code>x<sup>m</sup> +
-         * x<sup>k3</sup> + x<sup>k2</sup> + x<sup>k1</sup> + 1</code>
-         * represents the reduction polynomial <code>f(z)</code>.<br>
-         */
-        private int k2;
-
-        /**
-         * TPB: Always set to <code>0</code><br>
-         * PPB: The integer <code>k3</code> where <code>x<sup>m</sup> +
-         * x<sup>k3</sup> + x<sup>k2</sup> + x<sup>k1</sup> + 1</code>
-         * represents the reduction polynomial <code>f(z)</code>.<br>
-         */
-        private int k3;
+//        /**
+//         * TPB: The integer <code>k</code> where <code>x<sup>m</sup> +
+//         * x<sup>k</sup> + 1</code> represents the reduction polynomial
+//         * <code>f(z)</code>.<br>
+//         * PPB: The integer <code>k1</code> where <code>x<sup>m</sup> +
+//         * x<sup>k3</sup> + x<sup>k2</sup> + x<sup>k1</sup> + 1</code>
+//         * represents the reduction polynomial <code>f(z)</code>.<br>
+//         */
+//        private int k1;
+//
+//        /**
+//         * TPB: Always set to <code>0</code><br>
+//         * PPB: The integer <code>k2</code> where <code>x<sup>m</sup> +
+//         * x<sup>k3</sup> + x<sup>k2</sup> + x<sup>k1</sup> + 1</code>
+//         * represents the reduction polynomial <code>f(z)</code>.<br>
+//         */
+//        private int k2;
+//
+//        /**
+//         * TPB: Always set to <code>0</code><br>
+//         * PPB: The integer <code>k3</code> where <code>x<sup>m</sup> +
+//         * x<sup>k3</sup> + x<sup>k2</sup> + x<sup>k1</sup> + 1</code>
+//         * represents the reduction polynomial <code>f(z)</code>.<br>
+//         */
+//        private int k3;
 
-        /**
-         * The <code>IntArray</code> holding the bits.
-         */
-        private IntArray x;
+        private int[] ks;
 
         /**
-         * The number of <code>int</code>s required to hold <code>m</code> bits.
+         * The <code>LongArray</code> holding the bits.
          */
-        private int t;
+        private LongArray x;
 
         /**
          * Constructor for PPB.
@@ -851,6 +1034,7 @@ public abstract class ECFieldElement
          * x<sup>k3</sup> + x<sup>k2</sup> + x<sup>k1</sup> + 1</code>
          * represents the reduction polynomial <code>f(z)</code>.
          * @param x The BigInteger representing the value of the field element.
+         * @deprecated Use ECCurve.fromBigInteger to construct field elements
          */
         public F2m(
             int m, 
@@ -859,13 +1043,10 @@ public abstract class ECFieldElement
             int k3,
             BigInteger x)
         {
-            // t = m / 32 rounded up to the next integer
-            t = (m + 31) >> 5;
-            this.x = new IntArray(x, t);
-
             if ((k2 == 0) && (k3 == 0))
             {
                 this.representation = TPB;
+                this.ks = new int[]{ k1 }; 
             }
             else
             {
@@ -880,17 +1061,11 @@ public abstract class ECFieldElement
                             "k2 must be larger than 0");
                 }
                 this.representation = PPB;
-            }
-
-            if (x.signum() < 0)
-            {
-                throw new IllegalArgumentException("x value cannot be negative");
+                this.ks = new int[]{ k1, k2, k3 }; 
             }
 
             this.m = m;
-            this.k1 = k1;
-            this.k2 = k2;
-            this.k3 = k3;
+            this.x = new LongArray(x);
         }
 
         /**
@@ -901,6 +1076,7 @@ public abstract class ECFieldElement
          * x<sup>k</sup> + 1</code> represents the reduction
          * polynomial <code>f(z)</code>.
          * @param x The BigInteger representing the value of the field element.
+         * @deprecated Use ECCurve.fromBigInteger to construct field elements
          */
         public F2m(int m, int k, BigInteger x)
         {
@@ -908,24 +1084,27 @@ public abstract class ECFieldElement
             this(m, k, 0, 0, x);
         }
 
-        private F2m(int m, int k1, int k2, int k3, IntArray x)
+        private F2m(int m, int[] ks, LongArray x)
         {
-            t = (m + 31) >> 5;
-            this.x = x;
             this.m = m;
-            this.k1 = k1;
-            this.k2 = k2;
-            this.k3 = k3;
+            this.representation = (ks.length == 1) ? TPB : PPB;
+            this.ks = ks;
+            this.x = x;
+        }
 
-            if ((k2 == 0) && (k3 == 0))
-            {
-                this.representation = TPB;
-            }
-            else
-            {
-                this.representation = PPB;
-            }
+        public int bitLength()
+        {
+            return x.degree();
+        }
+
+        public boolean isZero()
+        {
+            return x.isZero();
+        }
 
+        public boolean testBitZero()
+        {
+            return x.testBitZero();
         }
 
         public BigInteger toBigInteger()
@@ -967,19 +1146,15 @@ public abstract class ECFieldElement
             ECFieldElement.F2m aF2m = (ECFieldElement.F2m)a;
             ECFieldElement.F2m bF2m = (ECFieldElement.F2m)b;
 
-            if ((aF2m.m != bF2m.m) || (aF2m.k1 != bF2m.k1)
-                    || (aF2m.k2 != bF2m.k2) || (aF2m.k3 != bF2m.k3))
+            if (aF2m.representation != bF2m.representation)
             {
-                throw new IllegalArgumentException("Field elements are not "
-                        + "elements of the same field F2m");
+                // Should never occur
+                throw new IllegalArgumentException("One of the F2m field elements has incorrect representation");
             }
 
-            if (aF2m.representation != bF2m.representation)
+            if ((aF2m.m != bF2m.m) || !Arrays.areEqual(aF2m.ks, bF2m.ks))
             {
-                // Should never occur
-                throw new IllegalArgumentException(
-                        "One of the field "
-                                + "elements are not elements has incorrect representation");
+                throw new IllegalArgumentException("Field elements are not elements of the same field F2m");
             }
         }
 
@@ -988,10 +1163,15 @@ public abstract class ECFieldElement
             // No check performed here for performance reasons. Instead the
             // elements involved are checked in ECPoint.F2m
             // checkFieldElements(this, b);
-            IntArray iarrClone = (IntArray)this.x.clone();
+            LongArray iarrClone = (LongArray)this.x.clone();
             F2m bF2m = (F2m)b;
-            iarrClone.addShifted(bF2m.x, 0);
-            return new F2m(m, k1, k2, k3, iarrClone);
+            iarrClone.addShiftedByWords(bF2m.x, 0);
+            return new F2m(m, ks, iarrClone);
+        }
+
+        public ECFieldElement addOne()
+        {
+            return new F2m(m, ks, x.addOne());
         }
 
         public ECFieldElement subtract(final ECFieldElement b)
@@ -1002,17 +1182,14 @@ public abstract class ECFieldElement
 
         public ECFieldElement multiply(final ECFieldElement b)
         {
-            // Right-to-left comb multiplication in the IntArray
+            // Right-to-left comb multiplication in the LongArray
             // Input: Binary polynomials a(z) and b(z) of degree at most m-1
             // Output: c(z) = a(z) * b(z) mod f(z)
 
             // No check performed here for performance reasons. Instead the
             // elements involved are checked in ECPoint.F2m
             // checkFieldElements(this, b);
-            F2m bF2m = (F2m)b;
-            IntArray mult = x.multiply(bF2m.x, m);
-            mult.reduce(m, new int[]{k1, k2, k3});
-            return new F2m(m, k1, k2, k3, mult);
+            return new F2m(m, ks, x.modMultiply(((F2m)b).x, m, ks));
         }
 
         public ECFieldElement divide(final ECFieldElement b)
@@ -1030,80 +1207,12 @@ public abstract class ECFieldElement
 
         public ECFieldElement square()
         {
-            IntArray squared = x.square(m);
-            squared.reduce(m, new int[]{k1, k2, k3});
-            return new F2m(m, k1, k2, k3, squared);
+            return new F2m(m, ks, x.modSquare(m, ks));
         }
 
-
         public ECFieldElement invert()
         {
-            // Inversion in F2m using the extended Euclidean algorithm
-            // Input: A nonzero polynomial a(z) of degree at most m-1
-            // Output: a(z)^(-1) mod f(z)
-
-            // u(z) := a(z)
-            IntArray uz = (IntArray)this.x.clone();
-
-            // v(z) := f(z)
-            IntArray vz = new IntArray(t);
-            vz.setBit(m);
-            vz.setBit(0);
-            vz.setBit(this.k1);
-            if (this.representation == PPB) 
-            {
-                vz.setBit(this.k2);
-                vz.setBit(this.k3);
-            }
-
-            // g1(z) := 1, g2(z) := 0
-            IntArray g1z = new IntArray(t);
-            g1z.setBit(0);
-            IntArray g2z = new IntArray(t);
-
-            // while u != 0
-            while (!uz.isZero())
-//            while (uz.getUsedLength() > 0)
-//            while (uz.bitLength() > 1)
-            {
-                // j := deg(u(z)) - deg(v(z))
-                int j = uz.bitLength() - vz.bitLength();
-
-                // If j < 0 then: u(z) <-> v(z), g1(z) <-> g2(z), j := -j
-                if (j < 0) 
-                {
-                    final IntArray uzCopy = uz;
-                    uz = vz;
-                    vz = uzCopy;
-
-                    final IntArray g1zCopy = g1z;
-                    g1z = g2z;
-                    g2z = g1zCopy;
-
-                    j = -j;
-                }
-
-                // u(z) := u(z) + z^j * v(z)
-                // Note, that no reduction modulo f(z) is required, because
-                // deg(u(z) + z^j * v(z)) <= max(deg(u(z)), j + deg(v(z)))
-                // = max(deg(u(z)), deg(u(z)) - deg(v(z)) + deg(v(z))
-                // = deg(u(z))
-                // uz = uz.xor(vz.shiftLeft(j));
-                // jInt = n / 32
-                int jInt = j >> 5;
-                // jInt = n % 32
-                int jBit = j & 0x1F;
-                IntArray vzShift = vz.shiftLeft(jBit);
-                uz.addShifted(vzShift, jInt);
-
-                // g1(z) := g1(z) + z^j * g2(z)
-//                g1z = g1z.xor(g2z.shiftLeft(j));
-                IntArray g2zShift = g2z.shiftLeft(jBit);
-                g1z.addShifted(g2zShift, jInt);
-                
-            }
-            return new ECFieldElement.F2m(
-                    this.m, this.k1, this.k2, this.k3, g2z);
+            return new ECFieldElement.F2m(this.m, this.ks, this.x.modInverse(m, ks));
         }
 
         public ECFieldElement sqrt()
@@ -1143,7 +1252,7 @@ public abstract class ECFieldElement
          */
         public int getK1()
         {
-            return this.k1;
+            return this.ks[0];
         }
 
         /**
@@ -1154,7 +1263,7 @@ public abstract class ECFieldElement
          */
         public int getK2()
         {
-            return this.k2;
+            return this.ks.length >= 2 ? this.ks[1] : 0;
         }
 
         /**
@@ -1165,7 +1274,7 @@ public abstract class ECFieldElement
          */
         public int getK3()
         {
-            return this.k3;
+            return this.ks.length >= 3 ? this.ks[2] : 0;
         }
 
         public boolean equals(Object anObject)
@@ -1182,15 +1291,15 @@ public abstract class ECFieldElement
 
             ECFieldElement.F2m b = (ECFieldElement.F2m)anObject;
             
-            return ((this.m == b.m) && (this.k1 == b.k1) && (this.k2 == b.k2)
-                && (this.k3 == b.k3)
+            return ((this.m == b.m)
                 && (this.representation == b.representation)
+                && Arrays.areEqual(this.ks, b.ks)
                 && (this.x.equals(b.x)));
         }
 
         public int hashCode()
         {
-            return x.hashCode() ^ m ^ k1 ^ k2 ^ k3;
+            return x.hashCode() ^ m ^ Arrays.hashCode(ks);
         }
     }
 }
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/ECMultiplier.java b/bcprov/src/main/java/org/bouncycastle/math/ec/ECMultiplier.java
index 4d72e33..da1013a 100644
--- a/bcprov/src/main/java/org/bouncycastle/math/ec/ECMultiplier.java
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/ECMultiplier.java
@@ -6,14 +6,14 @@ import java.math.BigInteger;
  * Interface for classes encapsulating a point multiplication algorithm
  * for <code>ECPoint</code>s.
  */
-interface ECMultiplier
+public interface ECMultiplier
 {
     /**
      * Multiplies the <code>ECPoint p</code> by <code>k</code>, i.e.
      * <code>p</code> is added <code>k</code> times to itself.
      * @param p The <code>ECPoint</code> to be multiplied.
-     * @param k The factor by which <code>p</code> i multiplied.
+     * @param k The factor by which <code>p</code> is multiplied.
      * @return <code>p</code> multiplied by <code>k</code>.
      */
-    ECPoint multiply(ECPoint p, BigInteger k, PreCompInfo preCompInfo);
+    ECPoint multiply(ECPoint p, BigInteger k);
 }
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/ECPoint.java b/bcprov/src/main/java/org/bouncycastle/math/ec/ECPoint.java
index cbc5aaf..7f740e4 100644
--- a/bcprov/src/main/java/org/bouncycastle/math/ec/ECPoint.java
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/ECPoint.java
@@ -2,59 +2,324 @@ package org.bouncycastle.math.ec;
 
 import java.math.BigInteger;
 
-import org.bouncycastle.asn1.x9.X9IntegerConverter;
-
 /**
  * base class for points on elliptic curves.
  */
 public abstract class ECPoint
 {
-    ECCurve        curve;
-    ECFieldElement x;
-    ECFieldElement y;
+    protected static ECFieldElement[] EMPTY_ZS = new ECFieldElement[0];
 
-    protected boolean withCompression;
+    protected static ECFieldElement[] getInitialZCoords(ECCurve curve)
+    {
+        // Cope with null curve, most commonly used by implicitlyCa
+        int coord = null == curve ? ECCurve.COORD_AFFINE : curve.getCoordinateSystem();
 
-    protected ECMultiplier multiplier = null;
+        switch (coord)
+        {
+        case ECCurve.COORD_AFFINE:
+        case ECCurve.COORD_LAMBDA_AFFINE:
+            return EMPTY_ZS;
+        default:
+            break;
+        }
 
-    protected PreCompInfo preCompInfo = null;
+        ECFieldElement one = curve.fromBigInteger(ECConstants.ONE);
 
-    private static X9IntegerConverter converter = new X9IntegerConverter();
+        switch (coord)
+        {
+        case ECCurve.COORD_HOMOGENEOUS:
+        case ECCurve.COORD_JACOBIAN:
+        case ECCurve.COORD_LAMBDA_PROJECTIVE:
+            return new ECFieldElement[]{ one };
+        case ECCurve.COORD_JACOBIAN_CHUDNOVSKY:
+            return new ECFieldElement[]{ one, one, one };
+        case ECCurve.COORD_JACOBIAN_MODIFIED:
+            return new ECFieldElement[]{ one, curve.getA() };
+        default:
+            throw new IllegalArgumentException("unknown coordinate system");
+        }
+    }
+
+    protected ECCurve curve;
+    protected ECFieldElement x;
+    protected ECFieldElement y;
+    protected ECFieldElement[] zs;
+
+    protected boolean withCompression;
+
+    protected PreCompInfo preCompInfo = null;
 
     protected ECPoint(ECCurve curve, ECFieldElement x, ECFieldElement y)
     {
+        this(curve, x, y, getInitialZCoords(curve));
+    }
+
+    protected ECPoint(ECCurve curve, ECFieldElement x, ECFieldElement y, ECFieldElement[] zs)
+    {
         this.curve = curve;
         this.x = x;
         this.y = y;
+        this.zs = zs;
     }
-    
+
     public ECCurve getCurve()
     {
         return curve;
     }
-    
+
+    protected int getCurveCoordinateSystem()
+    {
+        // Cope with null curve, most commonly used by implicitlyCa
+        return null == curve ? ECCurve.COORD_AFFINE : curve.getCoordinateSystem();
+    }
+
+    /**
+     * Normalizes this point, and then returns the affine x-coordinate.
+     * 
+     * Note: normalization can be expensive, this method is deprecated in favour
+     * of caller-controlled normalization.
+     * 
+     * @deprecated Use getAffineXCoord, or normalize() and getXCoord(), instead
+     */
     public ECFieldElement getX()
     {
-        return x;
+        return normalize().getXCoord();
     }
 
+
+    /**
+     * Normalizes this point, and then returns the affine y-coordinate.
+     * 
+     * Note: normalization can be expensive, this method is deprecated in favour
+     * of caller-controlled normalization.
+     * 
+     * @deprecated Use getAffineYCoord, or normalize() and getYCoord(), instead
+     */
     public ECFieldElement getY()
     {
+        return normalize().getYCoord();
+    }
+
+    /**
+     * Returns the affine x-coordinate after checking that this point is normalized.
+     * 
+     * @return The affine x-coordinate of this point
+     * @throws IllegalStateException if the point is not normalized
+     */
+    public ECFieldElement getAffineXCoord()
+    {
+        checkNormalized();
+        return getXCoord();
+    }
+
+    /**
+     * Returns the affine y-coordinate after checking that this point is normalized
+     * 
+     * @return The affine y-coordinate of this point
+     * @throws IllegalStateException if the point is not normalized
+     */
+    public ECFieldElement getAffineYCoord()
+    {
+        checkNormalized();
+        return getYCoord();
+    }
+
+    /**
+     * Returns the x-coordinate.
+     * 
+     * Caution: depending on the curve's coordinate system, this may not be the same value as in an
+     * affine coordinate system; use normalize() to get a point where the coordinates have their
+     * affine values, or use getAffineXCoord if you expect the point to already have been
+     * normalized.
+     * 
+     * @return the x-coordinate of this point
+     */
+    public ECFieldElement getXCoord()
+    {
+        return x;
+    }
+
+    /**
+     * Returns the y-coordinate.
+     * 
+     * Caution: depending on the curve's coordinate system, this may not be the same value as in an
+     * affine coordinate system; use normalize() to get a point where the coordinates have their
+     * affine values, or use getAffineYCoord if you expect the point to already have been
+     * normalized.
+     * 
+     * @return the y-coordinate of this point
+     */
+    public ECFieldElement getYCoord()
+    {
         return y;
     }
 
+    public ECFieldElement getZCoord(int index)
+    {
+        return (index < 0 || index >= zs.length) ? null : zs[index];
+    }
+
+    public ECFieldElement[] getZCoords()
+    {
+        int zsLen = zs.length;
+        if (zsLen == 0)
+        {
+            return zs;
+        }
+        ECFieldElement[] copy = new ECFieldElement[zsLen];
+        System.arraycopy(zs, 0, copy, 0, zsLen);
+        return copy;
+    }
+
+    protected ECFieldElement getRawXCoord()
+    {
+        return x;
+    }
+
+    protected ECFieldElement getRawYCoord()
+    {
+        return y;
+    }
+
+    protected void checkNormalized()
+    {
+        if (!isNormalized())
+        {
+            throw new IllegalStateException("point not in normal form");
+        }
+    }
+
+    public boolean isNormalized()
+    {
+        int coord = this.getCurveCoordinateSystem();
+
+        return coord == ECCurve.COORD_AFFINE
+            || coord == ECCurve.COORD_LAMBDA_AFFINE
+            || isInfinity()
+            || zs[0].bitLength() == 1;
+    }
+
+    /**
+     * Normalization ensures that any projective coordinate is 1, and therefore that the x, y
+     * coordinates reflect those of the equivalent point in an affine coordinate system.
+     * 
+     * @return a new ECPoint instance representing the same point, but with normalized coordinates
+     */
+    public ECPoint normalize()
+    {
+        if (this.isInfinity())
+        {
+            return this;
+        }
+
+        switch (this.getCurveCoordinateSystem())
+        {
+        case ECCurve.COORD_AFFINE:
+        case ECCurve.COORD_LAMBDA_AFFINE:
+        {
+            return this;
+        }
+        default:
+        {
+            ECFieldElement Z1 = getZCoord(0);
+            if (Z1.bitLength() == 1)
+            {
+                return this;
+            }
+
+            return normalize(Z1.invert());
+        }
+        }
+    }
+
+    ECPoint normalize(ECFieldElement zInv)
+    {
+        switch (this.getCurveCoordinateSystem())
+        {
+        case ECCurve.COORD_HOMOGENEOUS:
+        case ECCurve.COORD_LAMBDA_PROJECTIVE:
+        {
+            return createScaledPoint(zInv, zInv);
+        }
+        case ECCurve.COORD_JACOBIAN:
+        case ECCurve.COORD_JACOBIAN_CHUDNOVSKY:
+        case ECCurve.COORD_JACOBIAN_MODIFIED:
+        {
+            ECFieldElement zInv2 = zInv.square(), zInv3 = zInv2.multiply(zInv);
+            return createScaledPoint(zInv2, zInv3);
+        }
+        default:
+        {
+            throw new IllegalStateException("not a projective coordinate system");
+        }
+        }
+    }
+
+    protected ECPoint createScaledPoint(ECFieldElement sx, ECFieldElement sy)
+    {
+        return this.getCurve().createRawPoint(getRawXCoord().multiply(sx), getRawYCoord().multiply(sy), this.withCompression);
+    }
+
     public boolean isInfinity()
     {
-        return x == null && y == null;
+        return x == null || y == null || (zs.length > 0 && zs[0].isZero());
     }
 
     public boolean isCompressed()
     {
-        return withCompression;
+        return this.withCompression;
+    }
+
+    public boolean equals(ECPoint other)
+    {
+        if (null == other)
+        {
+            return false;
+        }
+
+        ECCurve c1 = this.getCurve(), c2 = other.getCurve();
+        boolean n1 = (null == c1), n2 = (null == c2);
+        boolean i1 = isInfinity(), i2 = other.isInfinity();
+
+        if (i1 || i2)
+        {
+            return (i1 && i2) && (n1 || n2 || c1.equals(c2));
+        }
+
+        ECPoint p1 = this, p2 = other;
+        if (n1 && n2)
+        {
+            // Points with null curve are in affine form, so already normalized
+        }
+        else if (n1)
+        {
+            p2 = p2.normalize();
+        }
+        else if (n2)
+        {
+            p1 = p1.normalize();
+        }
+        else if (!c1.equals(c2))
+        {
+            return false;
+        }
+        else
+        {
+            // TODO Consider just requiring already normalized, to avoid silent performance degradation
+
+            ECPoint[] points = new ECPoint[]{ this, c1.importPoint(p2) };
+
+            // TODO This is a little strong, really only requires coZNormalizeAll to get Zs equal
+            c1.normalizeAll(points);
+
+            p1 = points[0];
+            p2 = points[1];
+        }
+
+        return p1.getXCoord().equals(p2.getXCoord()) && p1.getYCoord().equals(p2.getYCoord());
     }
 
-    public boolean equals(
-        Object  other)
+    public boolean equals(Object other)
     {
         if (other == this)
         {
@@ -66,69 +331,117 @@ public abstract class ECPoint
             return false;
         }
 
-        ECPoint o = (ECPoint)other;
+        return equals((ECPoint)other);
+    }
 
-        if (this.isInfinity())
+    public int hashCode()
+    {
+        ECCurve c = this.getCurve();
+        int hc = (null == c) ? 0 : ~c.hashCode();
+
+        if (!this.isInfinity())
         {
-            return o.isInfinity();
+            // TODO Consider just requiring already normalized, to avoid silent performance degradation
+
+            ECPoint p = normalize();
+
+            hc ^= p.getXCoord().hashCode() * 17;
+            hc ^= p.getYCoord().hashCode() * 257;
         }
 
-        return x.equals(o.x) && y.equals(o.y);
+        return hc;
     }
 
-    public int hashCode()
+    public String toString()
     {
         if (this.isInfinity())
         {
-            return 0;
+            return "INF";
         }
-        
-        return x.hashCode() ^ y.hashCode();
+
+        StringBuffer sb = new StringBuffer();
+        sb.append('(');
+        sb.append(getRawXCoord());
+        sb.append(',');
+        sb.append(getRawYCoord());
+        for (int i = 0; i < zs.length; ++i)
+        {
+            sb.append(',');
+            sb.append(zs[i]);
+        }
+        sb.append(')');
+        return sb.toString();
     }
 
-//    /**
-//     * Mainly for testing. Explicitly set the <code>ECMultiplier</code>.
-//     * @param multiplier The <code>ECMultiplier</code> to be used to multiply
-//     * this <code>ECPoint</code>.
-//     */
-//    public void setECMultiplier(ECMultiplier multiplier)
-//    {
-//        this.multiplier = multiplier;
-//    }
+    public byte[] getEncoded()
+    {
+        return getEncoded(this.withCompression);
+    }
 
     /**
-     * Sets the <code>PreCompInfo</code>. Used by <code>ECMultiplier</code>s
-     * to save the precomputation for this <code>ECPoint</code> to store the
-     * precomputation result for use by subsequent multiplication.
-     * @param preCompInfo The values precomputed by the
-     * <code>ECMultiplier</code>.
+     * return the field element encoded with point compression. (S 4.3.6)
      */
-    void setPreCompInfo(PreCompInfo preCompInfo)
+    public byte[] getEncoded(boolean compressed)
     {
-        this.preCompInfo = preCompInfo;
-    }
+        if (this.isInfinity())
+        {
+            return new byte[1];
+        }
 
-    public byte[] getEncoded()
-    {
-        return getEncoded(withCompression);
+        ECPoint normed = normalize();
+
+        byte[] X = normed.getXCoord().getEncoded();
+
+        if (compressed)
+        {
+            byte[] PO = new byte[X.length + 1];
+            PO[0] = (byte)(normed.getCompressionYTilde() ? 0x03 : 0x02);
+            System.arraycopy(X, 0, PO, 1, X.length);
+            return PO;
+        }
+
+        byte[] Y = normed.getYCoord().getEncoded();
+
+        byte[] PO = new byte[X.length + Y.length + 1];
+        PO[0] = 0x04;
+        System.arraycopy(X, 0, PO, 1, X.length);
+        System.arraycopy(Y, 0, PO, X.length + 1, Y.length);
+        return PO;
     }
 
-    public abstract byte[] getEncoded(boolean compressed);
+    protected abstract boolean getCompressionYTilde();
 
     public abstract ECPoint add(ECPoint b);
-    public abstract ECPoint subtract(ECPoint b);
+
     public abstract ECPoint negate();
-    public abstract ECPoint twice();
 
-    /**
-     * Sets the default <code>ECMultiplier</code>, unless already set. 
-     */
-    synchronized void assertECMultiplier()
+    public abstract ECPoint subtract(ECPoint b);
+
+    public ECPoint timesPow2(int e)
     {
-        if (this.multiplier == null)
+        if (e < 0)
+        {
+            throw new IllegalArgumentException("'e' cannot be negative");
+        }
+
+        ECPoint p = this;
+        while (--e >= 0)
         {
-            this.multiplier = new FpNafMultiplier();
+            p = p.twice();
         }
+        return p;
+    }
+
+    public abstract ECPoint twice();
+
+    public ECPoint twicePlus(ECPoint b)
+    {
+        return twice().add(b);
+    }
+
+    public ECPoint threeTimes()
+    {
+        return twicePlus(this);
     }
 
     /**
@@ -138,23 +451,7 @@ public abstract class ECPoint
      */
     public ECPoint multiply(BigInteger k)
     {
-        if (k.signum() < 0)
-        {
-            throw new IllegalArgumentException("The multiplicator cannot be negative");
-        }
-
-        if (this.isInfinity())
-        {
-            return this;
-        }
-
-        if (k.signum() == 0)
-        {
-            return this.curve.getInfinity();
-        }
-
-        assertECMultiplier();
-        return this.multiplier.multiply(this, k, preCompInfo);
+        return this.getCurve().getMultiplier().multiply(this, k);
     }
 
     /**
@@ -162,13 +459,14 @@ public abstract class ECPoint
      */
     public static class Fp extends ECPoint
     {
-        
         /**
          * Create a point which encodes with point compression.
          * 
          * @param curve the curve to use
          * @param x affine x co-ordinate
          * @param y affine y co-ordinate
+         * 
+         * @deprecated Use ECCurve.createPoint to construct points
          */
         public Fp(ECCurve curve, ECFieldElement x, ECFieldElement y)
         {
@@ -182,6 +480,8 @@ public abstract class ECPoint
          * @param x affine x co-ordinate
          * @param y affine y co-ordinate
          * @param withCompression if true encode with point compression
+         * 
+         * @deprecated per-point compression property will be removed, refer {@link #getEncoded(boolean)}
          */
         public Fp(ECCurve curve, ECFieldElement x, ECFieldElement y, boolean withCompression)
         {
@@ -194,112 +494,537 @@ public abstract class ECPoint
 
             this.withCompression = withCompression;
         }
-         
-        /**
-         * return the field element encoded with point compression. (S 4.3.6)
-         */
-        public byte[] getEncoded(boolean compressed)
+
+        Fp(ECCurve curve, ECFieldElement x, ECFieldElement y, ECFieldElement[] zs, boolean withCompression)
         {
-            if (this.isInfinity()) 
+            super(curve, x, y, zs);
+
+            this.withCompression = withCompression;
+        }
+
+        protected boolean getCompressionYTilde()
+        {
+            return this.getAffineYCoord().testBitZero();
+        }
+
+        public ECFieldElement getZCoord(int index)
+        {
+            if (index == 1 && ECCurve.COORD_JACOBIAN_MODIFIED == this.getCurveCoordinateSystem())
             {
-                return new byte[1];
+                return getJacobianModifiedW();
             }
 
-            int qLength = converter.getByteLength(x);
-            
-            if (compressed)
+            return super.getZCoord(index);
+        }
+
+        // B.3 pg 62
+        public ECPoint add(ECPoint b)
+        {
+            if (this.isInfinity())
             {
-                byte    PC;
-    
-                if (this.getY().toBigInteger().testBit(0))
+                return b;
+            }
+            if (b.isInfinity())
+            {
+                return this;
+            }
+            if (this == b)
+            {
+                return twice();
+            }
+
+            ECCurve curve = this.getCurve();
+            int coord = curve.getCoordinateSystem();
+
+            ECFieldElement X1 = this.x, Y1 = this.y;
+            ECFieldElement X2 = b.x, Y2 = b.y;
+
+            switch (coord)
+            {
+            case ECCurve.COORD_AFFINE:
+            {
+                ECFieldElement dx = X2.subtract(X1), dy = Y2.subtract(Y1);
+
+                if (dx.isZero())
                 {
-                    PC = 0x03;
+                    if (dy.isZero())
+                    {
+                        // this == b, i.e. this must be doubled
+                        return twice();
+                    }
+
+                    // this == -b, i.e. the result is the point at infinity
+                    return curve.getInfinity();
                 }
-                else
+
+                ECFieldElement gamma = dy.divide(dx);
+                ECFieldElement X3 = gamma.square().subtract(X1).subtract(X2);
+                ECFieldElement Y3 = gamma.multiply(X1.subtract(X3)).subtract(Y1);
+
+                return new ECPoint.Fp(curve, X3, Y3, this.withCompression);
+            }
+
+            case ECCurve.COORD_HOMOGENEOUS:
+            {
+                ECFieldElement Z1 = this.zs[0];
+                ECFieldElement Z2 = b.zs[0];
+
+                boolean Z1IsOne = Z1.bitLength() == 1;
+                boolean Z2IsOne = Z2.bitLength() == 1;
+
+                ECFieldElement u1 = Z1IsOne ? Y2 : Y2.multiply(Z1);
+                ECFieldElement u2 = Z2IsOne ? Y1 : Y1.multiply(Z2);
+                ECFieldElement u = u1.subtract(u2);
+                ECFieldElement v1 = Z1IsOne ? X2 : X2.multiply(Z1);
+                ECFieldElement v2 = Z2IsOne ? X1 : X1.multiply(Z2);
+                ECFieldElement v = v1.subtract(v2);
+
+                // Check if b == this or b == -this
+                if (v.isZero())
                 {
-                    PC = 0x02;
+                    if (u.isZero())
+                    {
+                        // this == b, i.e. this must be doubled
+                        return this.twice();
+                    }
+
+                    // this == -b, i.e. the result is the point at infinity
+                    return curve.getInfinity();
                 }
+
+                // TODO Optimize for when w == 1
+                ECFieldElement w = Z1IsOne ? Z2 : Z2IsOne ? Z1 : Z1.multiply(Z2);
+                ECFieldElement vSquared = v.square();
+                ECFieldElement vCubed = vSquared.multiply(v);
+                ECFieldElement vSquaredV2 = vSquared.multiply(v2);
+                ECFieldElement A = u.square().multiply(w).subtract(vCubed).subtract(two(vSquaredV2));
+
+                ECFieldElement X3 = v.multiply(A);
+                ECFieldElement Y3 = vSquaredV2.subtract(A).multiply(u).subtract(vCubed.multiply(u2));
+                ECFieldElement Z3 = vCubed.multiply(w);
+
+                return new ECPoint.Fp(curve, X3, Y3, new ECFieldElement[]{ Z3 }, this.withCompression);
+            }
+
+            case ECCurve.COORD_JACOBIAN:
+            case ECCurve.COORD_JACOBIAN_MODIFIED:
+            {
+                ECFieldElement Z1 = this.zs[0];
+                ECFieldElement Z2 = b.zs[0];
+
+                boolean Z1IsOne = Z1.bitLength() == 1;
+
+                ECFieldElement X3, Y3, Z3, Z3Squared = null;
+
+                if (!Z1IsOne && Z1.equals(Z2))
+                {
+                    // TODO Make this available as public method coZAdd?
+
+                    ECFieldElement dx = X1.subtract(X2), dy = Y1.subtract(Y2);
+                    if (dx.isZero())
+                    {
+                        if (dy.isZero())
+                        {
+                            return twice();
+                        }
+                        return curve.getInfinity();
+                    }
+
+                    ECFieldElement C = dx.square();
+                    ECFieldElement W1 = X1.multiply(C), W2 = X2.multiply(C);
+                    ECFieldElement A1 = W1.subtract(W2).multiply(Y1);
+
+                    X3 = dy.square().subtract(W1).subtract(W2);
+                    Y3 = W1.subtract(X3).multiply(dy).subtract(A1);
+                    Z3 = dx;
+
+                    if (Z1IsOne)
+                    {
+                        Z3Squared = C;
+                    }
+                    else
+                    {
+                        Z3 = Z3.multiply(Z1);
+                    }
+                }
+                else
+                {
+                    ECFieldElement Z1Squared, U2, S2;
+                    if (Z1IsOne)
+                    {
+                        Z1Squared = Z1; U2 = X2; S2 = Y2;
+                    }
+                    else
+                    {
+                        Z1Squared = Z1.square();
+                        U2 = Z1Squared.multiply(X2);
+                        ECFieldElement Z1Cubed = Z1Squared.multiply(Z1);
+                        S2 = Z1Cubed.multiply(Y2);
+                    }
+
+                    boolean Z2IsOne = Z2.bitLength() == 1;
+                    ECFieldElement Z2Squared, U1, S1;
+                    if (Z2IsOne)
+                    {
+                        Z2Squared = Z2; U1 = X1; S1 = Y1;
+                    }
+                    else
+                    {
+                        Z2Squared = Z2.square();
+                        U1 = Z2Squared.multiply(X1); 
+                        ECFieldElement Z2Cubed = Z2Squared.multiply(Z2);
+                        S1 = Z2Cubed.multiply(Y1);
+                    }
+
+                    ECFieldElement H = U1.subtract(U2);
+                    ECFieldElement R = S1.subtract(S2);
+    
+                    // Check if b == this or b == -this
+                    if (H.isZero())
+                    {
+                        if (R.isZero())
+                        {
+                            // this == b, i.e. this must be doubled
+                            return this.twice();
+                        }
+    
+                        // this == -b, i.e. the result is the point at infinity
+                        return curve.getInfinity();
+                    }
     
-                byte[]  X = converter.integerToBytes(this.getX().toBigInteger(), qLength);
-                byte[]  PO = new byte[X.length + 1];
+                    ECFieldElement HSquared = H.square();
+                    ECFieldElement G = HSquared.multiply(H);
+                    ECFieldElement V = HSquared.multiply(U1);
     
-                PO[0] = PC;
-                System.arraycopy(X, 0, PO, 1, X.length);
+                    X3 = R.square().add(G).subtract(two(V));
+                    Y3 = V.subtract(X3).multiply(R).subtract(S1.multiply(G));
     
-                return PO;
+                    Z3 = H;
+                    if (!Z1IsOne)
+                    {
+                        Z3 = Z3.multiply(Z1);
+                    }
+                    if (!Z2IsOne)
+                    {
+                        Z3 = Z3.multiply(Z2);
+                    }
+    
+                    // Alternative calculation of Z3 using fast square
+    //                X3 = four(X3);
+    //                Y3 = eight(Y3);
+    //                Z3 = doubleProductFromSquares(Z1, Z2, Z1Squared, Z2Squared).multiply(H);
+                    
+                    if (Z3 == H)
+                    {
+                        Z3Squared = HSquared;
+                    }
+                }
+
+                ECFieldElement[] zs;
+                if (coord == ECCurve.COORD_JACOBIAN_MODIFIED)
+                {
+                    // TODO If the result will only be used in a subsequent addition, we don't need W3
+                    ECFieldElement W3 = calculateJacobianModifiedW(Z3, Z3Squared);
+
+                    zs = new ECFieldElement[]{ Z3, W3 };
+                }
+                else
+                {
+                    zs = new ECFieldElement[]{ Z3 };
+                }
+
+                return new ECPoint.Fp(curve, X3, Y3, zs, this.withCompression);
             }
-            else
+            default:
+            {
+                throw new IllegalStateException("unsupported coordinate system");
+            }
+            }
+        }
+
+        // B.3 pg 62
+        public ECPoint twice()
+        {
+            if (this.isInfinity())
+            {
+                return this;
+            }
+
+            ECCurve curve = this.getCurve();
+
+            ECFieldElement Y1 = this.y;
+            if (Y1.isZero()) 
+            {
+                return curve.getInfinity();
+            }
+
+            int coord = curve.getCoordinateSystem();
+
+            ECFieldElement X1 = this.x;
+
+            switch (coord)
+            {
+            case ECCurve.COORD_AFFINE:
+            {
+                ECFieldElement X1Squared = X1.square();
+                ECFieldElement gamma = three(X1Squared).add(this.getCurve().getA()).divide(two(Y1));
+                ECFieldElement X3 = gamma.square().subtract(two(X1));
+                ECFieldElement Y3 = gamma.multiply(X1.subtract(X3)).subtract(Y1);
+    
+                return new ECPoint.Fp(curve, X3, Y3, this.withCompression);
+            }
+
+            case ECCurve.COORD_HOMOGENEOUS:
             {
-                byte[]  X = converter.integerToBytes(this.getX().toBigInteger(), qLength);
-                byte[]  Y = converter.integerToBytes(this.getY().toBigInteger(), qLength);
-                byte[]  PO = new byte[X.length + Y.length + 1];
+                ECFieldElement Z1 = this.zs[0];
+
+                boolean Z1IsOne = Z1.bitLength() == 1;
+                ECFieldElement Z1Squared = Z1IsOne ? Z1 : Z1.square();
+
+                // TODO Optimize for small negative a4 and -3
+                ECFieldElement w = curve.getA();
+                if (!Z1IsOne)
+                {
+                    w = w.multiply(Z1Squared);
+                }
+                w = w.add(three(X1.square()));
                 
-                PO[0] = 0x04;
-                System.arraycopy(X, 0, PO, 1, X.length);
-                System.arraycopy(Y, 0, PO, X.length + 1, Y.length);
+                ECFieldElement s = Z1IsOne ? Y1 : Y1.multiply(Z1);
+                ECFieldElement t = Z1IsOne ? Y1.square() : s.multiply(Y1);
+                ECFieldElement B = X1.multiply(t);
+                ECFieldElement _4B = four(B);
+                ECFieldElement h = w.square().subtract(two(_4B));
+
+                ECFieldElement X3 = two(h.multiply(s));
+                ECFieldElement Y3 = w.multiply(_4B.subtract(h)).subtract(two(two(t).square()));
+                ECFieldElement _4sSquared = Z1IsOne ? four(t) : two(s).square();
+                ECFieldElement Z3 = two(_4sSquared).multiply(s);
+
+                return new ECPoint.Fp(curve, X3, Y3, new ECFieldElement[]{ Z3 }, this.withCompression);
+            }
+
+            case ECCurve.COORD_JACOBIAN:
+            {
+                ECFieldElement Z1 = this.zs[0];
+
+                boolean Z1IsOne = Z1.bitLength() == 1;
+                ECFieldElement Z1Squared = Z1IsOne ? Z1 : Z1.square();
+
+                ECFieldElement Y1Squared = Y1.square();
+                ECFieldElement T = Y1Squared.square();
+
+                ECFieldElement a4 = curve.getA();
+                ECFieldElement a4Neg = a4.negate();
+
+                ECFieldElement M, S;
+                if (a4Neg.toBigInteger().equals(BigInteger.valueOf(3)))
+                {
+                    M = three(X1.add(Z1Squared).multiply(X1.subtract(Z1Squared)));
+                    S = four(Y1Squared.multiply(X1));
+                }
+                else
+                {
+                    ECFieldElement X1Squared = X1.square();
+                    M = three(X1Squared);
+                    if (Z1IsOne)
+                    {
+                        M = M.add(a4);
+                    }
+                    else
+                    {
+                        ECFieldElement Z1Pow4 = Z1Squared.square();
+                        if (a4Neg.bitLength() < a4.bitLength())
+                        {
+                            M = M.subtract(Z1Pow4.multiply(a4Neg));
+                        }
+                        else
+                        {
+                            M = M.add(Z1Pow4.multiply(a4));
+                        }
+                    }
+                    S = two(doubleProductFromSquares(X1, Y1Squared, X1Squared, T));
+                }
+
+                ECFieldElement X3 = M.square().subtract(two(S));
+                ECFieldElement Y3 = S.subtract(X3).multiply(M).subtract(eight(T));
+
+                ECFieldElement Z3 = two(Y1);
+                if (!Z1IsOne)
+                {
+                    Z3 = Z3.multiply(Z1);
+                }
 
-                return PO;
+                // Alternative calculation of Z3 using fast square
+//                ECFieldElement Z3 = doubleProductFromSquares(Y1, Z1, Y1Squared, Z1Squared);
+
+                return new ECPoint.Fp(curve, X3, Y3, new ECFieldElement[]{ Z3 }, this.withCompression);
+            }
+
+            case ECCurve.COORD_JACOBIAN_MODIFIED:
+            {
+                return twiceJacobianModified(true);
+            }
+
+            default:
+            {
+                throw new IllegalStateException("unsupported coordinate system");
+            }
             }
         }
 
-        // B.3 pg 62
-        public ECPoint add(ECPoint b)
+        public ECPoint twicePlus(ECPoint b)
         {
+            if (this == b)
+            {
+                return threeTimes();
+            }
             if (this.isInfinity())
             {
                 return b;
             }
-
             if (b.isInfinity())
             {
-                return this;
+                return twice();
+            }
+
+            ECFieldElement Y1 = this.y;
+            if (Y1.isZero()) 
+            {
+                return b;
             }
 
-            // Check if b = this or b = -this
-            if (this.x.equals(b.x))
+            ECCurve curve = this.getCurve();
+            int coord = curve.getCoordinateSystem();
+
+            switch (coord)
+            {
+            case ECCurve.COORD_AFFINE:
             {
-                if (this.y.equals(b.y))
+                ECFieldElement X1 = this.x;
+                ECFieldElement X2 = b.x, Y2 = b.y;
+
+                ECFieldElement dx = X2.subtract(X1), dy = Y2.subtract(Y1);
+
+                if (dx.isZero())
                 {
-                    // this = b, i.e. this must be doubled
-                    return this.twice();
+                    if (dy.isZero())
+                    {
+                        // this == b i.e. the result is 3P
+                        return threeTimes();
+                    }
+
+                    // this == -b, i.e. the result is P
+                    return this;
                 }
 
-                // this = -b, i.e. the result is the point at infinity
-                return this.curve.getInfinity();
-            }
+                /*
+                 * Optimized calculation of 2P + Q, as described in "Trading Inversions for
+                 * Multiplications in Elliptic Curve Cryptography", by Ciet, Joye, Lauter, Montgomery.
+                 */
 
-            ECFieldElement gamma = b.y.subtract(this.y).divide(b.x.subtract(this.x));
+                ECFieldElement X = dx.square(), Y = dy.square();
+                ECFieldElement d = X.multiply(two(X1).add(X2)).subtract(Y);
+                if (d.isZero())
+                {
+                    return curve.getInfinity();
+                }
 
-            ECFieldElement x3 = gamma.square().subtract(this.x).subtract(b.x);
-            ECFieldElement y3 = gamma.multiply(this.x.subtract(x3)).subtract(this.y);
+                ECFieldElement D = d.multiply(dx);
+                ECFieldElement I = D.invert();
+                ECFieldElement L1 = d.multiply(I).multiply(dy);
+                ECFieldElement L2 = two(Y1).multiply(X).multiply(dx).multiply(I).subtract(L1);
+                ECFieldElement X4 = (L2.subtract(L1)).multiply(L1.add(L2)).add(X2);
+                ECFieldElement Y4 = (X1.subtract(X4)).multiply(L2).subtract(Y1);
 
-            return new ECPoint.Fp(curve, x3, y3, withCompression);
+                return new ECPoint.Fp(curve, X4, Y4, this.withCompression);
+            }
+            case ECCurve.COORD_JACOBIAN_MODIFIED:
+            {
+                return twiceJacobianModified(false).add(b);
+            }
+            default:
+            {
+                return twice().add(b);
+            }
+            }
         }
 
-        // B.3 pg 62
-        public ECPoint twice()
+        public ECPoint threeTimes()
         {
-            if (this.isInfinity())
+            if (this.isInfinity() || this.y.isZero())
             {
-                // Twice identity element (point at infinity) is identity
                 return this;
             }
 
-            if (this.y.toBigInteger().signum() == 0) 
+            ECCurve curve = this.getCurve();
+            int coord = curve.getCoordinateSystem();
+
+            switch (coord)
+            {
+            case ECCurve.COORD_AFFINE:
             {
-                // if y1 == 0, then (x1, y1) == (x1, -y1)
-                // and hence this = -this and thus 2(x1, y1) == infinity
-                return this.curve.getInfinity();
+                ECFieldElement X1 = this.x, Y1 = this.y;
+
+                ECFieldElement _2Y1 = two(Y1); 
+                ECFieldElement X = _2Y1.square();
+                ECFieldElement Z = three(X1.square()).add(this.getCurve().getA());
+                ECFieldElement Y = Z.square();
+
+                ECFieldElement d = three(X1).multiply(X).subtract(Y);
+                if (d.isZero())
+                {
+                    return this.getCurve().getInfinity();
+                }
+
+                ECFieldElement D = d.multiply(_2Y1); 
+                ECFieldElement I = D.invert();
+                ECFieldElement L1 = d.multiply(I).multiply(Z);
+                ECFieldElement L2 = X.square().multiply(I).subtract(L1);
+
+                ECFieldElement X4 = (L2.subtract(L1)).multiply(L1.add(L2)).add(X1);
+                ECFieldElement Y4 = (X1.subtract(X4)).multiply(L2).subtract(Y1); 
+                return new ECPoint.Fp(curve, X4, Y4, this.withCompression);
+            }
+            case ECCurve.COORD_JACOBIAN_MODIFIED:
+            {
+                return twiceJacobianModified(false).add(this);
+            }
+            default:
+            {
+                // NOTE: Be careful about recursions between twicePlus and threeTimes
+                return twice().add(this);
             }
+            }
+        }
 
-            ECFieldElement TWO = this.curve.fromBigInteger(BigInteger.valueOf(2));
-            ECFieldElement THREE = this.curve.fromBigInteger(BigInteger.valueOf(3));
-            ECFieldElement gamma = this.x.square().multiply(THREE).add(curve.a).divide(y.multiply(TWO));
+        protected ECFieldElement two(ECFieldElement x)
+        {
+            return x.add(x);
+        }
 
-            ECFieldElement x3 = gamma.square().subtract(this.x.multiply(TWO));
-            ECFieldElement y3 = gamma.multiply(this.x.subtract(x3)).subtract(this.y);
-                
-            return new ECPoint.Fp(curve, x3, y3, this.withCompression);
+        protected ECFieldElement three(ECFieldElement x)
+        {
+            return two(x).add(x);
+        }
+
+        protected ECFieldElement four(ECFieldElement x)
+        {
+            return two(two(x));
+        }
+
+        protected ECFieldElement eight(ECFieldElement x)
+        {
+            return four(two(x));
+        }
+
+        protected ECFieldElement doubleProductFromSquares(ECFieldElement a, ECFieldElement b,
+            ECFieldElement aSquared, ECFieldElement bSquared)
+        {
+            /*
+             * NOTE: If squaring in the field is faster than multiplication, then this is a quicker
+             * way to calculate 2.A.B, if A^2 and B^2 are already known.
+             */
+            return a.add(b).square().subtract(aSquared).subtract(bSquared);
         }
 
         // D.3.2 pg 102 (see Note:)
@@ -316,18 +1041,70 @@ public abstract class ECPoint
 
         public ECPoint negate()
         {
+            if (this.isInfinity())
+            {
+                return this;
+            }
+
+            ECCurve curve = this.getCurve();
+            int coord = curve.getCoordinateSystem();
+
+            if (ECCurve.COORD_AFFINE != coord)
+            {
+                return new ECPoint.Fp(curve, this.x, this.y.negate(), this.zs, this.withCompression);
+            }
+
             return new ECPoint.Fp(curve, this.x, this.y.negate(), this.withCompression);
         }
 
-        /**
-         * Sets the default <code>ECMultiplier</code>, unless already set. 
-         */
-        synchronized void assertECMultiplier()
+        protected ECFieldElement calculateJacobianModifiedW(ECFieldElement Z, ECFieldElement ZSquared)
         {
-            if (this.multiplier == null)
+            if (ZSquared == null)
+            {
+                ZSquared = Z.square();
+            }
+
+            ECFieldElement W = ZSquared.square();
+            ECFieldElement a4 = this.getCurve().getA();
+            ECFieldElement a4Neg = a4.negate();
+            if (a4Neg.bitLength() < a4.bitLength())
             {
-                this.multiplier = new WNafMultiplier();
+                W = W.multiply(a4Neg).negate();
             }
+            else
+            {
+                W = W.multiply(a4);
+            }
+            return W;
+        }
+
+        protected ECFieldElement getJacobianModifiedW()
+        {
+            ECFieldElement W = this.zs[1];
+            if (W == null)
+            {
+                // NOTE: Rarely, twicePlus will result in the need for a lazy W1 calculation here
+                this.zs[1] = W = calculateJacobianModifiedW(this.zs[0], null);
+            }
+            return W;
+        }
+
+        protected ECPoint.Fp twiceJacobianModified(boolean calculateW)
+        {
+            ECFieldElement X1 = this.x, Y1 = this.y, Z1 = this.zs[0], W1 = getJacobianModifiedW();
+
+            ECFieldElement X1Squared = X1.square();
+            ECFieldElement M = three(X1Squared).add(W1);
+            ECFieldElement Y1Squared = Y1.square();
+            ECFieldElement T = Y1Squared.square();
+            ECFieldElement S = two(doubleProductFromSquares(X1, Y1Squared, X1Squared, T));
+            ECFieldElement X3 = M.square().subtract(two(S));
+            ECFieldElement _8T = eight(T);
+            ECFieldElement Y3 = M.multiply(S.subtract(X3)).subtract(_8T);
+            ECFieldElement W3 = calculateW ? two(_8T.multiply(W1)) : null;
+            ECFieldElement Z3 = two(Z1.bitLength() == 1 ? Y1 : Y1.multiply(Z1));
+
+            return new ECPoint.Fp(this.getCurve(), X3, Y3, new ECFieldElement[]{ Z3, W3 }, this.withCompression);
         }
     }
 
@@ -340,6 +1117,8 @@ public abstract class ECPoint
          * @param curve base curve
          * @param x x point
          * @param y y point
+         * 
+         * @deprecated Use ECCurve.createPoint to construct points
          */
         public F2m(ECCurve curve, ECFieldElement x, ECFieldElement y)
         {
@@ -351,6 +1130,8 @@ public abstract class ECPoint
          * @param x x point
          * @param y y point
          * @param withCompression true if encode with point compression.
+         * 
+         * @deprecated per-point compression property will be removed, refer {@link #getEncoded(boolean)}
          */
         public F2m(ECCurve curve, ECFieldElement x, ECFieldElement y, boolean withCompression)
         {
@@ -360,71 +1141,91 @@ public abstract class ECPoint
             {
                 throw new IllegalArgumentException("Exactly one of the field elements is null");
             }
-            
+
             if (x != null)
             {
                 // Check if x and y are elements of the same field
                 ECFieldElement.F2m.checkFieldElements(this.x, this.y);
-    
+
                 // Check if x and a are elements of the same field
                 if (curve != null)
                 {
                     ECFieldElement.F2m.checkFieldElements(this.x, this.curve.getA());
                 }
             }
-            
+
             this.withCompression = withCompression;
+
+//            checkCurveEquation();
         }
 
-        /* (non-Javadoc)
-         * @see org.bouncycastle.math.ec.ECPoint#getEncoded()
-         */
-        public byte[] getEncoded(boolean compressed)
+        F2m(ECCurve curve, ECFieldElement x, ECFieldElement y, ECFieldElement[] zs, boolean withCompression)
         {
-            if (this.isInfinity()) 
-            {
-                return new byte[1];
-            }
+            super(curve, x, y, zs);
+
+            this.withCompression = withCompression;
+
+//            checkCurveEquation();
+        }
 
-            int byteCount = converter.getByteLength(this.x);
-            byte[] X = converter.integerToBytes(this.getX().toBigInteger(), byteCount);
-            byte[] PO;
+        public ECFieldElement getYCoord()
+        {
+            int coord = this.getCurveCoordinateSystem();
 
-            if (compressed)
+            switch (coord)
+            {
+            case ECCurve.COORD_LAMBDA_AFFINE:
+            case ECCurve.COORD_LAMBDA_PROJECTIVE:
             {
-                // See X9.62 4.3.6 and 4.2.2
-                PO = new byte[byteCount + 1];
+                // TODO The X == 0 stuff needs further thought
+                if (this.isInfinity() || x.isZero())
+                {
+                    return y;
+                }
 
-                PO[0] = 0x02;
-                // X9.62 4.2.2 and 4.3.6:
-                // if x = 0 then ypTilde := 0, else ypTilde is the rightmost
-                // bit of y * x^(-1)
-                // if ypTilde = 0, then PC := 02, else PC := 03
-                // Note: PC === PO[0]
-                if (!(this.getX().toBigInteger().equals(ECConstants.ZERO)))
+                // Y is actually Lambda (X + Y/X) here; convert to affine value on the fly
+                ECFieldElement X = x, L = y;
+                ECFieldElement Y = L.subtract(X).multiply(X);
+                if (ECCurve.COORD_LAMBDA_PROJECTIVE == coord)
                 {
-                    if (this.getY().multiply(this.getX().invert())
-                            .toBigInteger().testBit(0))
+                    ECFieldElement Z = zs[0];
+                    if (Z.bitLength() != 1)
                     {
-                        // ypTilde = 1, hence PC = 03
-                        PO[0] = 0x03;
+                        Y = Y.divide(Z);
                     }
                 }
-
-                System.arraycopy(X, 0, PO, 1, byteCount);
+                return Y;
             }
-            else
+            default:
             {
-                byte[] Y = converter.integerToBytes(this.getY().toBigInteger(), byteCount);
-    
-                PO = new byte[byteCount + byteCount + 1];
-    
-                PO[0] = 0x04;
-                System.arraycopy(X, 0, PO, 1, byteCount);
-                System.arraycopy(Y, 0, PO, byteCount + 1, byteCount);    
+                return y;
+            }
+            }
+        }
+
+        protected boolean getCompressionYTilde()
+        {
+            ECFieldElement X = this.getRawXCoord();
+            if (X.isZero())
+            {
+                return false;
             }
 
-            return PO;
+            ECFieldElement Y = this.getRawYCoord();
+
+            switch (this.getCurveCoordinateSystem())
+            {
+            case ECCurve.COORD_LAMBDA_AFFINE:
+            case ECCurve.COORD_LAMBDA_PROJECTIVE:
+            {
+                // Y is actually Lambda (X + Y/X) here
+                return Y.subtract(X).testBitZero();
+            }
+            default:
+            {
+                return Y.divide(X).testBitZero();
+            }
+            }
         }
 
         /**
@@ -437,7 +1238,7 @@ public abstract class ECPoint
         private static void checkPoints(ECPoint a, ECPoint b)
         {
             // Check, if points are on the same curve
-            if (!(a.curve.equals(b.curve)))
+            if (a.curve != b.curve)
             {
                 throw new IllegalArgumentException("Only points on the same "
                         + "curve can be added or subtracted");
@@ -466,43 +1267,162 @@ public abstract class ECPoint
          */
         public ECPoint.F2m addSimple(ECPoint.F2m b)
         {
-            ECPoint.F2m other = b;
             if (this.isInfinity())
             {
-                return other;
+                return b;
             }
-
-            if (other.isInfinity())
+            if (b.isInfinity())
             {
                 return this;
             }
 
-            ECFieldElement.F2m x2 = (ECFieldElement.F2m)other.getX();
-            ECFieldElement.F2m y2 = (ECFieldElement.F2m)other.getY();
+            ECCurve curve = this.getCurve();
+            int coord = curve.getCoordinateSystem();
+
+            ECFieldElement X1 = this.x;
+            ECFieldElement X2 = b.x;
+
+            switch (coord)
+            {
+            case ECCurve.COORD_AFFINE:
+            {
+                ECFieldElement Y1 = this.y;
+                ECFieldElement Y2 = b.y;
+
+                if (X1.equals(X2))
+                {
+                    if (Y1.equals(Y2))
+                    {
+                        return (ECPoint.F2m)twice();
+                    }
+
+                    return (ECPoint.F2m)curve.getInfinity();
+                }
+
+                ECFieldElement sumX = X1.add(X2);
+                ECFieldElement L = Y1.add(Y2).divide(sumX);
+
+                ECFieldElement X3 = L.square().add(L).add(sumX).add(curve.getA());
+                ECFieldElement Y3 = L.multiply(X1.add(X3)).add(X3).add(Y1);
 
-            // Check if other = this or other = -this
-            if (this.x.equals(x2))
+                return new ECPoint.F2m(curve, X3, Y3, this.withCompression);
+            }
+            case ECCurve.COORD_HOMOGENEOUS:
             {
-                if (this.y.equals(y2))
+                ECFieldElement Y1 = this.y, Z1 = this.zs[0];
+                ECFieldElement Y2 = b.y, Z2 = b.zs[0];
+
+                boolean Z2IsOne = Z2.bitLength() == 1;
+
+                ECFieldElement U1 = Z1.multiply(Y2); 
+                ECFieldElement U2 = Z2IsOne ? Y1 : Y1.multiply(Z2);
+                ECFieldElement U = U1.subtract(U2);
+                ECFieldElement V1 = Z1.multiply(X2);
+                ECFieldElement V2 = Z2IsOne ? X1 : X1.multiply(Z2);
+                ECFieldElement V = V1.subtract(V2);
+
+                if (V1.equals(V2))
                 {
-                    // this = other, i.e. this must be doubled
-                    return (ECPoint.F2m)this.twice();
+                    if (U1.equals(U2))
+                    {
+                        return (ECPoint.F2m)twice();
+                    }
+
+                    return (ECPoint.F2m)curve.getInfinity();
                 }
 
-                // this = -other, i.e. the result is the point at infinity
-                return (ECPoint.F2m)this.curve.getInfinity();
+                ECFieldElement VSq =  V.square();
+                ECFieldElement W = Z2IsOne ? Z1 : Z1.multiply(Z2);
+                ECFieldElement A = U.square().add(U.multiply(V).add(VSq.multiply(curve.getA()))).multiply(W).add(V.multiply(VSq));
+
+                ECFieldElement X3 = V.multiply(A);
+                ECFieldElement VSqZ2 = Z2IsOne ? VSq : VSq.multiply(Z2);
+                ECFieldElement Y3 = VSqZ2.multiply(U.multiply(X1).add(Y1.multiply(V))).add(A.multiply(U.add(V)));
+                ECFieldElement Z3 = VSq.multiply(V).multiply(W);
+
+                return new ECPoint.F2m(curve, X3, Y3, new ECFieldElement[]{ Z3 }, this.withCompression);
             }
+            case ECCurve.COORD_LAMBDA_PROJECTIVE:
+            {
+                if (X1.isZero())
+                {
+                    return b.addSimple(this);
+                }
 
-            ECFieldElement.F2m lambda
-                = (ECFieldElement.F2m)(this.y.add(y2)).divide(this.x.add(x2));
+                ECFieldElement L1 = this.y, Z1 = this.zs[0];
+                ECFieldElement L2 = b.y, Z2 = b.zs[0];
+
+                boolean Z1IsOne = Z1.bitLength() == 1;
+                ECFieldElement U2 = X2, S2 = L2;
+                if (!Z1IsOne)
+                {
+                    U2 = U2.multiply(Z1);
+                    S2 = S2.multiply(Z1);
+                }
 
-            ECFieldElement.F2m x3
-                = (ECFieldElement.F2m)lambda.square().add(lambda).add(this.x).add(x2).add(this.curve.getA());
+                boolean Z2IsOne = Z2.bitLength() == 1;
+                ECFieldElement U1 = X1, S1 = L1;
+                if (!Z2IsOne)
+                {
+                    U1 = U1.multiply(Z2);
+                    S1 = S1.multiply(Z2);
+                }
 
-            ECFieldElement.F2m y3
-                = (ECFieldElement.F2m)lambda.multiply(this.x.add(x3)).add(x3).add(this.y);
+                ECFieldElement A = S1.add(S2);
+                ECFieldElement B = U1.add(U2);
 
-            return new ECPoint.F2m(curve, x3, y3, withCompression);
+                if (B.isZero())
+                {
+                    if (A.isZero())
+                    {
+                        return (ECPoint.F2m)twice();
+                    }
+
+                    return (ECPoint.F2m)curve.getInfinity();
+                }
+
+                ECFieldElement X3, L3, Z3;
+                if (X2.isZero())
+                {
+                    // TODO This can probably be optimized quite a bit
+
+                    ECFieldElement Y1 = getYCoord(), Y2 = L2;
+                    ECFieldElement L = Y1.add(Y2).divide(X1);
+
+                    X3 = L.square().add(L).add(X1).add(curve.getA());
+                    ECFieldElement Y3 = L.multiply(X1.add(X3)).add(X3).add(Y1);
+                    L3 = X3.isZero() ? Y3 : Y3.divide(X3).add(X3);
+                    Z3 = curve.fromBigInteger(ECConstants.ONE);
+                }
+                else
+                {
+                    B = B.square();
+    
+                    ECFieldElement AU1 = A.multiply(U1);
+                    ECFieldElement AU2 = A.multiply(U2);
+                    ECFieldElement ABZ2 = A.multiply(B);
+                    if (!Z2IsOne)
+                    {
+                        ABZ2 = ABZ2.multiply(Z2);
+                    }
+
+                    X3 = AU1.multiply(AU2);
+                    L3 = AU2.add(B).square().add(ABZ2.multiply(L1.add(Z1)));
+
+                    Z3 = ABZ2;
+                    if (!Z1IsOne)
+                    {
+                        Z3 = Z3.multiply(Z1);
+                    }
+                }
+
+                return new ECPoint.F2m(curve, X3, L3, new ECFieldElement[]{ Z3 }, this.withCompression);
+            }
+            default:
+            {
+                throw new IllegalStateException("unsupported coordinate system");
+            }
+            }
         }
 
         /* (non-Javadoc)
@@ -534,59 +1454,279 @@ public abstract class ECPoint
             return addSimple((ECPoint.F2m)b.negate());
         }
 
-        /* (non-Javadoc)
-         * @see org.bouncycastle.math.ec.ECPoint#twice()
-         */
+        public ECPoint.F2m tau()
+        {
+            if (this.isInfinity())
+            {
+                return this;
+            }
+
+            ECCurve curve = this.getCurve();
+            int coord = curve.getCoordinateSystem();
+
+            ECFieldElement X1 = this.x;
+
+            switch (coord)
+            {
+            case ECCurve.COORD_AFFINE:
+            case ECCurve.COORD_LAMBDA_AFFINE:
+            {
+                ECFieldElement Y1 = this.y;
+                return new ECPoint.F2m(curve, X1.square(), Y1.square(), this.withCompression);
+            }
+            case ECCurve.COORD_HOMOGENEOUS:
+            case ECCurve.COORD_LAMBDA_PROJECTIVE:
+            {
+                ECFieldElement Y1 = this.y, Z1 = this.zs[0];
+                return new ECPoint.F2m(curve, X1.square(), Y1.square(), new ECFieldElement[]{ Z1.square() }, this.withCompression);
+            }
+            default:
+            {
+                throw new IllegalStateException("unsupported coordinate system");
+            }
+            }
+        }
+
         public ECPoint twice()
         {
             if (this.isInfinity()) 
             {
-                // Twice identity element (point at infinity) is identity
                 return this;
             }
 
-            if (this.x.toBigInteger().signum() == 0) 
+            ECCurve curve = this.getCurve();
+
+            ECFieldElement X1 = this.x;
+            if (X1.isZero()) 
             {
-                // if x1 == 0, then (x1, y1) == (x1, x1 + y1)
-                // and hence this = -this and thus 2(x1, y1) == infinity
-                return this.curve.getInfinity();
+                // A point with X == 0 is it's own additive inverse
+                return curve.getInfinity();
             }
 
-            ECFieldElement.F2m lambda
-                = (ECFieldElement.F2m)this.x.add(this.y.divide(this.x));
+            int coord = curve.getCoordinateSystem();
 
-            ECFieldElement.F2m x3
-                = (ECFieldElement.F2m)lambda.square().add(lambda).
-                    add(this.curve.getA());
+            switch (coord)
+            {
+            case ECCurve.COORD_AFFINE:
+            {
+                ECFieldElement Y1 = this.y;
 
-            ECFieldElement ONE = this.curve.fromBigInteger(ECConstants.ONE);
-            ECFieldElement.F2m y3
-                = (ECFieldElement.F2m)this.x.square().add(
-                    x3.multiply(lambda.add(ONE)));
+                ECFieldElement L1 = Y1.divide(X1).add(X1);
 
-            return new ECPoint.F2m(this.curve, x3, y3, withCompression);
-        }
+                ECFieldElement X3 = L1.square().add(L1).add(curve.getA());
+                ECFieldElement Y3 = X1.square().add(X3.multiply(L1.addOne()));
 
-        public ECPoint negate()
-        {
-            return new ECPoint.F2m(curve, this.getX(), this.getY().add(this.getX()), withCompression);
+                return new ECPoint.F2m(curve, X3, Y3, this.withCompression);
+            }
+            case ECCurve.COORD_HOMOGENEOUS:
+            {
+                ECFieldElement Y1 = this.y, Z1 = this.zs[0];
+
+                boolean Z1IsOne = Z1.bitLength() == 1;
+                ECFieldElement X1Z1 = Z1IsOne ? X1 : X1.multiply(Z1);
+                ECFieldElement Y1Z1 = Z1IsOne ? Y1 : Y1.multiply(Z1);
+
+                ECFieldElement X1Sq = X1.square();
+                ECFieldElement S = X1Sq.add(Y1Z1);
+                ECFieldElement V = X1Z1;
+                ECFieldElement vSquared = V.square();
+                ECFieldElement h = S.square().add(S.multiply(V)).add(curve.getA().multiply(vSquared));
+
+                ECFieldElement X3 = V.multiply(h);
+                ECFieldElement Y3 = h.multiply(S.add(V)).add(X1Sq.square().multiply(V));
+                ECFieldElement Z3 = V.multiply(vSquared);    
+
+                return new ECPoint.F2m(curve, X3, Y3, new ECFieldElement[]{ Z3 }, this.withCompression);
+            }
+            case ECCurve.COORD_LAMBDA_PROJECTIVE:
+            {
+                ECFieldElement L1 = this.y, Z1 = this.zs[0];
+
+                boolean Z1IsOne = Z1.bitLength() == 1;
+                ECFieldElement L1Z1 = Z1IsOne ? L1 : L1.multiply(Z1);
+                ECFieldElement Z1Sq = Z1IsOne ? Z1 : Z1.square();
+                ECFieldElement a = curve.getA();
+                ECFieldElement aZ1Sq = Z1IsOne ? a : a.multiply(Z1Sq);
+                ECFieldElement T = L1.square().add(L1Z1).add(aZ1Sq);
+
+                ECFieldElement X3 = T.square();
+                ECFieldElement Z3 = Z1IsOne ? T : T.multiply(Z1Sq);
+
+                ECFieldElement b = curve.getB();
+                ECFieldElement L3;
+                if (b.bitLength() < (curve.getFieldSize() >> 1))
+                {
+                    ECFieldElement t1 = L1.add(X1).square();
+                    ECFieldElement t2 = aZ1Sq.square();
+                    ECFieldElement t3 = curve.getB().multiply(Z1Sq.square());
+                    L3 = t1.add(T).add(Z1Sq).multiply(t1).add(t2.add(t3)).add(X3).add(a.addOne().multiply(Z3));
+                }
+                else
+                {
+                    ECFieldElement X1Z1 = Z1IsOne ? X1 : X1.multiply(Z1);
+                    L3 = X1Z1.square().add(X3).add(T.multiply(L1Z1)).add(Z3);
+                }
+
+                return new ECPoint.F2m(curve, X3, L3, new ECFieldElement[]{ Z3 }, this.withCompression);
+            }
+            default:
+            {
+                throw new IllegalStateException("unsupported coordinate system");
+            }
+            }
         }
 
-        /**
-         * Sets the appropriate <code>ECMultiplier</code>, unless already set. 
-         */
-        synchronized void assertECMultiplier()
+        public ECPoint twicePlus(ECPoint b)
         {
-            if (this.multiplier == null)
+            if (this.isInfinity()) 
+            {
+                return b;
+            }
+            if (b.isInfinity())
             {
-                if (((ECCurve.F2m)this.curve).isKoblitz())
+                return twice();
+            }
+
+            ECCurve curve = this.getCurve();
+
+            ECFieldElement X1 = this.x;
+            if (X1.isZero()) 
+            {
+                // A point with X == 0 is it's own additive inverse
+                return b;
+            }
+
+            int coord = curve.getCoordinateSystem();
+
+            switch (coord)
+            {
+            case ECCurve.COORD_LAMBDA_PROJECTIVE:
+            {
+                // NOTE: twicePlus() only optimized for lambda-affine argument
+                ECFieldElement X2 = b.x, Z2 = b.zs[0];
+                if (X2.isZero() || Z2.bitLength() != 1)
                 {
-                    this.multiplier = new WTauNafMultiplier();
+                    return twice().add(b);
                 }
-                else
+
+                ECFieldElement L1 = this.y, Z1 = this.zs[0];
+                ECFieldElement L2 = b.y;
+
+                ECFieldElement X1Sq = X1.square();
+                ECFieldElement L1Sq = L1.square();
+                ECFieldElement Z1Sq = Z1.square();
+                ECFieldElement L1Z1 = L1.multiply(Z1);
+
+                ECFieldElement T = curve.getA().multiply(Z1Sq).add(L1Sq).add(L1Z1);
+                ECFieldElement L2plus1 = L2.addOne();
+                ECFieldElement A = curve.getA().add(L2plus1).multiply(Z1Sq).add(L1Sq).multiply(T).add(X1Sq.multiply(Z1Sq));
+                ECFieldElement X2Z1Sq = X2.multiply(Z1Sq);
+                ECFieldElement B = X2Z1Sq.add(T).square();
+
+                ECFieldElement X3 = A.square().multiply(X2Z1Sq);
+                ECFieldElement Z3 = A.multiply(B).multiply(Z1Sq);
+                ECFieldElement L3 = A.add(B).square().multiply(T).add(L2plus1.multiply(Z3));
+
+                return new ECPoint.F2m(curve, X3, L3, new ECFieldElement[]{ Z3 }, this.withCompression);
+            }
+            default:
+            {
+                return twice().add(b);
+            }
+            }
+        }
+
+        protected void checkCurveEquation()
+        {
+            if (this.isInfinity())
+            {
+                return;
+            }
+
+            ECFieldElement Z;
+            switch (this.getCurveCoordinateSystem())
+            {
+            case ECCurve.COORD_LAMBDA_AFFINE:
+                Z = curve.fromBigInteger(ECConstants.ONE);
+                break;
+            case ECCurve.COORD_LAMBDA_PROJECTIVE:
+                Z = this.zs[0];
+                break;
+            default:
+                return;
+            }
+
+            if (Z.isZero())
+            {
+                throw new IllegalStateException();
+            }
+
+            ECFieldElement X = this.x;
+            if (X.isZero())
+            {
+                // NOTE: For x == 0, we expect the affine-y instead of the lambda-y 
+                ECFieldElement Y = this.y;
+                if (!Y.square().equals(curve.getB().multiply(Z)))
                 {
-                    this.multiplier = new WNafMultiplier();
+                    throw new IllegalStateException();
                 }
+
+                return;
+            }
+
+            ECFieldElement L = this.y;
+            ECFieldElement XSq = X.square();
+            ECFieldElement ZSq = Z.square();
+
+            ECFieldElement lhs = L.square().add(L.multiply(Z)).add(this.getCurve().getA().multiply(ZSq)).multiply(XSq);
+            ECFieldElement rhs = ZSq.square().multiply(this.getCurve().getB()).add(XSq.square());
+            
+            if (!lhs.equals(rhs))
+            {
+                throw new IllegalStateException("F2m Lambda-Projective invariant broken");
+            }
+        }
+
+        public ECPoint negate()
+        {
+            if (this.isInfinity())
+            {
+                return this;
+            }
+
+            ECFieldElement X = this.x;
+            if (X.isZero())
+            {
+                return this;
+            }
+
+            switch (this.getCurveCoordinateSystem())
+            {
+            case ECCurve.COORD_AFFINE:
+            {
+                ECFieldElement Y = this.y;
+                return new ECPoint.F2m(curve, X, Y.add(X), this.withCompression);
+            }
+            case ECCurve.COORD_HOMOGENEOUS:
+            {
+                ECFieldElement Y = this.y, Z = this.zs[0];
+                return new ECPoint.F2m(curve, X, Y.add(X), new ECFieldElement[]{ Z }, this.withCompression);
+            }
+            case ECCurve.COORD_LAMBDA_AFFINE:
+            {
+                ECFieldElement L = this.y;
+                return new ECPoint.F2m(curve, X, L.addOne(), this.withCompression);
+            }
+            case ECCurve.COORD_LAMBDA_PROJECTIVE:
+            {
+                // L is actually Lambda (X + Y/X) here
+                ECFieldElement L = this.y, Z = this.zs[0];
+                return new ECPoint.F2m(curve, X, L.add(Z), new ECFieldElement[]{ Z }, this.withCompression);
+            }
+            default:
+            {
+                throw new IllegalStateException("unsupported coordinate system");
+            }
             }
         }
     }
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/FpNafMultiplier.java b/bcprov/src/main/java/org/bouncycastle/math/ec/FpNafMultiplier.java
deleted file mode 100644
index 35e601d..0000000
--- a/bcprov/src/main/java/org/bouncycastle/math/ec/FpNafMultiplier.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package org.bouncycastle.math.ec;
-
-import java.math.BigInteger;
-
-/**
- * Class implementing the NAF (Non-Adjacent Form) multiplication algorithm.
- */
-class FpNafMultiplier implements ECMultiplier
-{
-    /**
-     * D.3.2 pg 101
-     * @see org.bouncycastle.math.ec.ECMultiplier#multiply(org.bouncycastle.math.ec.ECPoint, java.math.BigInteger)
-     */
-    public ECPoint multiply(ECPoint p, BigInteger k, PreCompInfo preCompInfo)
-    {
-        // TODO Probably should try to add this
-        // BigInteger e = k.mod(n); // n == order of p
-        BigInteger e = k;
-        BigInteger h = e.multiply(BigInteger.valueOf(3));
-
-        ECPoint neg = p.negate();
-        ECPoint R = p;
-
-        for (int i = h.bitLength() - 2; i > 0; --i)
-        {             
-            R = R.twice();
-
-            boolean hBit = h.testBit(i);
-            boolean eBit = e.testBit(i);
-
-            if (hBit != eBit)
-            {
-                R = R.add(hBit ? p : neg);
-            }
-        }
-
-        return R;
-    }
-}
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/IntArray.java b/bcprov/src/main/java/org/bouncycastle/math/ec/IntArray.java
index ead38c4..34395a5 100644
--- a/bcprov/src/main/java/org/bouncycastle/math/ec/IntArray.java
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/IntArray.java
@@ -6,6 +6,60 @@ import java.math.BigInteger;
 
 class IntArray
 {
+//    private static int DEINTERLEAVE_MASK = 0x55555555;
+
+    /*
+     * This expands 8 bit indices into 16 bit contents, by inserting 0s between bits.
+     * In a binary field, this operation is the same as squaring an 8 bit number.
+     */
+    private static final int[] INTERLEAVE_TABLE = new int[] { 0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014,
+        0x0015, 0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055, 0x0100, 0x0101, 0x0104, 0x0105, 0x0110,
+        0x0111, 0x0114, 0x0115, 0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155, 0x0400, 0x0401, 0x0404,
+        0x0405, 0x0410, 0x0411, 0x0414, 0x0415, 0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455, 0x0500,
+        0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515, 0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554,
+        0x0555, 0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015, 0x1040, 0x1041, 0x1044, 0x1045, 0x1050,
+        0x1051, 0x1054, 0x1055, 0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115, 0x1140, 0x1141, 0x1144,
+        0x1145, 0x1150, 0x1151, 0x1154, 0x1155, 0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415, 0x1440,
+        0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455, 0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514,
+        0x1515, 0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555, 0x4000, 0x4001, 0x4004, 0x4005, 0x4010,
+        0x4011, 0x4014, 0x4015, 0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055, 0x4100, 0x4101, 0x4104,
+        0x4105, 0x4110, 0x4111, 0x4114, 0x4115, 0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155, 0x4400,
+        0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415, 0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454,
+        0x4455, 0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515, 0x4540, 0x4541, 0x4544, 0x4545, 0x4550,
+        0x4551, 0x4554, 0x4555, 0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015, 0x5040, 0x5041, 0x5044,
+        0x5045, 0x5050, 0x5051, 0x5054, 0x5055, 0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115, 0x5140,
+        0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155, 0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414,
+        0x5415, 0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455, 0x5500, 0x5501, 0x5504, 0x5505, 0x5510,
+        0x5511, 0x5514, 0x5515, 0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555 };
+
+    // For toString(); must have length 32
+    private static final String ZEROES = "00000000000000000000000000000000";
+
+    private final static byte[] bitLengths =
+    {
+        0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8
+    };
+
+    public static int getWordLength(int bits)
+    {
+        return (bits + 31) >>> 5;
+    }
+
     // TODO make m fixed for the IntArray, and hence compute T once and for all
 
     private int[] m_ints;
@@ -22,16 +76,12 @@ class IntArray
 
     public IntArray(BigInteger bigInt)
     {
-        this(bigInt, 0);
-    }
-
-    public IntArray(BigInteger bigInt, int minIntLen)
-    {
-        if (bigInt.signum() == -1)
+        if (bigInt == null || bigInt.signum() < 0)
         {
-            throw new IllegalArgumentException("Only positive Integers allowed");
+            throw new IllegalArgumentException("invalid F2m field value");
         }
-        if (bigInt.equals(ECConstants.ZERO))
+
+        if (bigInt.signum() == 0)
         {
             m_ints = new int[] { 0 };
             return;
@@ -48,14 +98,7 @@ class IntArray
             barrStart = 1;
         }
         int intLen = (barrLen + 3) / 4;
-        if (intLen < minIntLen)
-        {
-            m_ints = new int[minIntLen];
-        }
-        else
-        {
-            m_ints = new int[intLen];
-        }
+        m_ints = new int[intLen];
 
         int iarrJ = intLen - 1;
         int rem = barrLen % 4 + barrStart;
@@ -66,11 +109,7 @@ class IntArray
             for (; barrI < rem; barrI++)
             {
                 temp <<= 8;
-                int barrBarrI = barr[barrI];
-                if (barrBarrI < 0)
-                {
-                    barrBarrI += 256;
-                }
+                int barrBarrI = barr[barrI] & 0xFF;
                 temp |= barrBarrI;
             }
             m_ints[iarrJ--] = temp;
@@ -82,11 +121,7 @@ class IntArray
             for (int i = 0; i < 4; i++)
             {
                 temp <<= 8;
-                int barrBarrI = barr[barrI++];
-                if (barrBarrI < 0)
-                {
-                    barrBarrI += 256;
-                }
+                int barrBarrI = barr[barrI++] & 0xFF;
                 temp |= barrBarrI;
             }
             m_ints[iarrJ] = temp;
@@ -95,88 +130,86 @@ class IntArray
 
     public boolean isZero()
     {
-        return m_ints.length == 0
-            || (m_ints[0] == 0 && getUsedLength() == 0);
+        int[] a = m_ints;
+        for (int i = 0; i < a.length; ++i)
+        {
+            if (a[i] != 0)
+            {
+                return false;
+            }
+        }
+        return true;
     }
 
     public int getUsedLength()
     {
-        int highestIntPos = m_ints.length;
+        return getUsedLengthFrom(m_ints.length);
+    }
 
-        if (highestIntPos < 1)
+    public int getUsedLengthFrom(int from)
+    {
+        int[] a = m_ints;
+        from = Math.min(from, a.length);
+
+        if (from < 1)
         {
             return 0;
         }
 
         // Check if first element will act as sentinel
-        if (m_ints[0] != 0)
+        if (a[0] != 0)
         {
-            while (m_ints[--highestIntPos] == 0)
+            while (a[--from] == 0)
             {
             }
-            return highestIntPos + 1;
+            return from + 1;
         }
 
         do
         {
-            if (m_ints[--highestIntPos] != 0)
+            if (a[--from] != 0)
             {
-                return highestIntPos + 1;
+                return from + 1;
             }
         }
-        while (highestIntPos > 0);
+        while (from > 0);
 
         return 0;
     }
 
-    public int bitLength()
+    public int degree()
     {
-        // JDK 1.5: see Integer.numberOfLeadingZeros()
-        int intLen = getUsedLength();
-        if (intLen == 0)
-        {
-            return 0;
-        }
-
-        int last = intLen - 1;
-        int highest = m_ints[last];
-        int bits = (last << 5) + 1;
-
-        // A couple of binary search steps
-        if ((highest & 0xffff0000) != 0)
+        int i = m_ints.length, w;
+        do
         {
-            if ((highest & 0xff000000) != 0)
-            {
-                bits += 24;
-                highest >>>= 24;
-            }
-            else
+            if (i == 0)
             {
-                bits += 16;
-                highest >>>= 16;
+                return 0;
             }
+            w = m_ints[--i];
         }
-        else if (highest > 0x000000ff)
-        {
-            bits += 8;
-            highest >>>= 8;
-        }
+        while (w == 0);
 
-        while (highest != 1)
+        return (i << 5) + bitLength(w);
+    }
+
+    private static int bitLength(int w)
+    {
+        int t = w >>> 16;
+        if (t == 0)
         {
-            ++bits;
-            highest >>>= 1;
+            t = w >>> 8;
+            return (t == 0) ? bitLengths[w] : 8 + bitLengths[t];
         }
 
-        return bits;
+        int u = t >>> 8;
+        return (u == 0) ? 16 + bitLengths[t] : 24 + bitLengths[u];
     }
 
     private int[] resizedInts(int newLen)
     {
         int[] newInts = new int[newLen];
-        int oldLen = m_ints.length;
-        int copyLen = oldLen < newLen ? oldLen : newLen;
-        System.arraycopy(m_ints, 0, newInts, 0, copyLen);
+        System.arraycopy(m_ints, 0, newInts, 0, Math.min(m_ints.length, newLen));
         return newInts;
     }
 
@@ -220,86 +253,128 @@ class IntArray
         return new BigInteger(1, barr);
     }
 
-    public void shiftLeft()
+    private static int shiftLeft(int[] x, int count)
     {
-        int usedLen = getUsedLength();
-        if (usedLen == 0)
+        int prev = 0;
+        for (int i = 0; i < count; ++i)
+        {
+            int next = x[i];
+            x[i] = (next << 1) | prev;
+            prev = next >>> 31;
+        }
+        return prev;
+    }
+
+    public void addOneShifted(int shift)
+    {
+        if (shift >= m_ints.length)
+        {
+            m_ints = resizedInts(shift + 1);
+        }
+
+        m_ints[shift] ^= 1;
+    }
+
+    private void addShiftedByBits(IntArray other, int bits)
+    {
+        int words = bits >>> 5;
+        int shift = bits & 0x1F;
+
+        if (shift == 0)
+        {
+            addShiftedByWords(other, words);
+            return;
+        }
+
+        int otherUsedLen = other.getUsedLength();
+        if (otherUsedLen == 0)
         {
             return;
         }
-        if (m_ints[usedLen - 1] < 0)
+
+        int minLen = otherUsedLen + words + 1;
+        if (minLen > m_ints.length)
         {
-            // highest bit of highest used byte is set, so shifting left will
-            // make the IntArray one byte longer
-            usedLen++;
-            if (usedLen > m_ints.length)
-            {
-                // make the m_ints one byte longer, because we need one more
-                // byte which is not available in m_ints
-                m_ints = resizedInts(m_ints.length + 1);
-            }
+            m_ints = resizedInts(minLen);
         }
 
-        boolean carry = false;
-        for (int i = 0; i < usedLen; i++)
+        int shiftInv = 32 - shift, prev = 0;
+        for (int i = 0; i < otherUsedLen; ++i)
         {
-            // nextCarry is true if highest bit is set
-            boolean nextCarry = m_ints[i] < 0;
-            m_ints[i] <<= 1;
-            if (carry)
-            {
-                // set lowest bit
-                m_ints[i] |= 1;
-            }
-            carry = nextCarry;
+            int next = other.m_ints[i];
+            m_ints[i + words] ^= (next << shift) | prev;
+            prev = next >>> shiftInv;
         }
+        m_ints[otherUsedLen + words] ^= prev;
     }
 
-    public IntArray shiftLeft(int n)
+    private static int addShiftedByBits(int[] x, int[] y, int count, int shift)
     {
-        int usedLen = getUsedLength();
-        if (usedLen == 0)
+        int shiftInv = 32 - shift, prev = 0;
+        for (int i = 0; i < count; ++i)
         {
-            return this;
+            int next = y[i];
+            x[i] ^= (next << shift) | prev;
+            prev = next >>> shiftInv;
         }
+        return prev;
+    }
 
-        if (n == 0)
+    private static int addShiftedByBits(int[] x, int xOff, int[] y, int yOff, int count, int shift)
+    {
+        int shiftInv = 32 - shift, prev = 0;
+        for (int i = 0; i < count; ++i)
         {
-            return this;
+            int next = y[yOff + i];
+            x[xOff + i] ^= (next << shift) | prev;
+            prev = next >>> shiftInv;
         }
+        return prev;
+    }
 
-        if (n > 31)
+    public void addShiftedByWords(IntArray other, int words)
+    {
+        int otherUsedLen = other.getUsedLength();
+        if (otherUsedLen == 0)
         {
-            throw new IllegalArgumentException("shiftLeft() for max 31 bits "
-                + ", " + n + "bit shift is not possible");
+            return;
         }
 
-        int[] newInts = new int[usedLen + 1];
+        int minLen = otherUsedLen + words;
+        if (minLen > m_ints.length)
+        {
+            m_ints = resizedInts(minLen);
+        }
 
-        int nm32 = 32 - n;
-        newInts[0] = m_ints[0] << n;
-        for (int i = 1; i < usedLen; i++)
+        for (int i = 0; i < otherUsedLen; i++)
         {
-            newInts[i] = (m_ints[i] << n) | (m_ints[i - 1] >>> nm32);
+            m_ints[words + i] ^= other.m_ints[i];
         }
-        newInts[usedLen] = m_ints[usedLen - 1] >>> nm32;
+    }
 
-        return new IntArray(newInts);
+    private static void addShiftedByWords(int[] x, int xOff, int[] y, int count)
+    {
+        for (int i = 0; i < count; ++i)
+        {
+            x[xOff + i] ^= y[i];
+        }
     }
 
-    public void addShifted(IntArray other, int shift)
+    private static void add(int[] x, int[] y, int count)
     {
-        int usedLenOther = other.getUsedLength();
-        int newMinUsedLen = usedLenOther + shift;
-        if (newMinUsedLen > m_ints.length)
+        for (int i = 0; i < count; ++i)
         {
-            m_ints = resizedInts(newMinUsedLen);
-            //System.out.println("Resize required");
+            x[i] ^= y[i];
         }
+    }
 
-        for (int i = 0; i < usedLenOther; i++)
+    private static void distribute(int[] x, int dst1, int dst2, int src, int count)
+    {
+        for (int i = 0; i < count; ++i)
         {
-            m_ints[i + shift] ^= other.m_ints[i];
+            int v = x[src + i];
+            x[dst1 + i] ^= v;
+            x[dst2 + i] ^= v;
         }
     }
 
@@ -308,10 +383,58 @@ class IntArray
         return m_ints.length;
     }
 
+    public void flipWord(int bit, int word)
+    {
+        int len = m_ints.length;
+        int n = bit >>> 5;
+        if (n < len)
+        {
+            int shift = bit & 0x1F;
+            if (shift == 0)
+            {
+                m_ints[n] ^= word;
+            }
+            else
+            {
+                m_ints[n] ^= word << shift;
+                if (++n < len)
+                {
+                    m_ints[n] ^= word >>> (32 - shift);
+                }
+            }
+        }
+    }
+
+    public int getWord(int bit)
+    {
+        int len = m_ints.length;
+        int n = bit >>> 5;
+        if (n >= len)
+        {
+            return 0;
+        }
+        int shift = bit & 0x1F;
+        if (shift == 0)
+        {
+            return m_ints[n];
+        }
+        int result = m_ints[n] >>> shift;
+        if (++n < len)
+        {
+            result |= m_ints[n] << (32 - shift);
+        }
+        return result;
+    }
+
+    public boolean testBitZero()
+    {
+        return m_ints.length > 0 && (m_ints[0] & 1) != 0;
+    }
+
     public boolean testBit(int n)
     {
         // theInt = n / 32
-        int theInt = n >> 5;
+        int theInt = n >>> 5;
         // theBit = n % 32
         int theBit = n & 0x1F;
         int tester = 1 << theBit;
@@ -321,7 +444,7 @@ class IntArray
     public void flipBit(int n)
     {
         // theInt = n / 32
-        int theInt = n >> 5;
+        int theInt = n >>> 5;
         // theBit = n % 32
         int theBit = n & 0x1F;
         int flipper = 1 << theBit;
@@ -331,127 +454,344 @@ class IntArray
     public void setBit(int n)
     {
         // theInt = n / 32
-        int theInt = n >> 5;
+        int theInt = n >>> 5;
         // theBit = n % 32
         int theBit = n & 0x1F;
         int setter = 1 << theBit;
         m_ints[theInt] |= setter;
     }
 
+    public void clearBit(int n)
+    {
+        // theInt = n / 32
+        int theInt = n >>> 5;
+        // theBit = n % 32
+        int theBit = n & 0x1F;
+        int setter = 1 << theBit;
+        m_ints[theInt] &= ~setter;
+    }
+
     public IntArray multiply(IntArray other, int m)
     {
-        // Lenght of c is 2m bits rounded up to the next int (32 bit)
-        int t = (m + 31) >> 5;
-        if (m_ints.length < t)
+        int aLen = getUsedLength();
+        if (aLen == 0)
+        {
+            return new IntArray(1);
+        }
+
+        int bLen = other.getUsedLength();
+        if (bLen == 0)
+        {
+            return new IntArray(1);
+        }
+
+        IntArray A = this, B = other;
+        if (aLen > bLen)
+        {
+            A = other; B = this;
+            int tmp = aLen; aLen = bLen; bLen = tmp;
+        }
+
+        if (aLen == 1)
+        {
+            int a = A.m_ints[0];
+            int[] b = B.m_ints;
+            int[] c = new int[aLen + bLen];
+            if ((a & 1) != 0)
+            {
+                add(c, b, bLen);
+            }
+            int k = 1;
+            while ((a >>>= 1) != 0)
+            {
+                if ((a & 1) != 0)
+                {
+                    addShiftedByBits(c, b, bLen, k);
+                }
+                ++k;
+            }
+            return new IntArray(c);
+        }
+
+        // TODO It'd be better to be able to tune the width directly (need support for interleaving arbitrary widths)
+        int complexity = aLen <= 8 ? 1 : 2;
+
+        int width = 1 << complexity;
+        int shifts = (32 >>> complexity);
+
+        int bExt = bLen;
+        if ((B.m_ints[bLen - 1] >>> (33 - shifts)) != 0)
         {
-            m_ints = resizedInts(t);
+            ++bExt;
         }
 
-        IntArray b = new IntArray(other.resizedInts(other.getLength() + 1));
-        IntArray c = new IntArray((m + m + 31) >> 5);
-        // IntArray c = new IntArray(t + t);
-        int testBit = 1;
-        for (int k = 0; k < 32; k++)
+        int cLen = bExt + aLen;
+
+        int[] c = new int[cLen << width];
+        System.arraycopy(B.m_ints, 0, c, 0, bLen);
+        interleave(A.m_ints, 0, c, bExt, aLen, complexity);
+
+        int[] ci = new int[1 << width];
+        for (int i = 1; i < ci.length; ++i)
+        {
+            ci[i] = ci[i - 1] + cLen;
+        }
+
+        int MASK = (1 << width) - 1;
+
+        int k = 0;
+        for (;;)
         {
-            for (int j = 0; j < t; j++)
+            for (int aPos = 0; aPos < aLen; ++aPos)
             {
-                if ((m_ints[j] & testBit) != 0)
+                int index = (c[bExt + aPos] >>> k) & MASK;
+                if (index != 0)
                 {
-                    // The kth bit of m_ints[j] is set
-                    c.addShifted(b, j);
+                    addShiftedByWords(c, aPos + ci[index], c, bExt);
                 }
             }
-            testBit <<= 1;
-            b.shiftLeft();
-        }
-        return c;
-    }
-
-    // public IntArray multiplyLeftToRight(IntArray other, int m) {
-    // // Lenght of c is 2m bits rounded up to the next int (32 bit)
-    // int t = (m + 31) / 32;
-    // if (m_ints.length < t) {
-    // m_ints = resizedInts(t);
-    // }
-    //
-    // IntArray b = new IntArray(other.resizedInts(other.getLength() + 1));
-    // IntArray c = new IntArray((m + m + 31) / 32);
-    // // IntArray c = new IntArray(t + t);
-    // int testBit = 1 << 31;
-    // for (int k = 31; k >= 0; k--) {
-    // for (int j = 0; j < t; j++) {
-    // if ((m_ints[j] & testBit) != 0) {
-    // // The kth bit of m_ints[j] is set
-    // c.addShifted(b, j);
-    // }
-    // }
-    // testBit >>>= 1;
-    // if (k > 0) {
-    // c.shiftLeft();
-    // }
-    // }
-    // return c;
-    // }
-
-    // TODO note, redPol.length must be 3 for TPB and 5 for PPB
-    public void reduce(int m, int[] redPol)
-    {
-        for (int i = m + m - 2; i >= m; i--)
+
+            if ((k += width) >= 32)
+            {
+                break;
+            }
+
+            shiftLeft(c, bExt);
+        }
+
+        int ciPos = ci.length, pow2 = ciPos >>> 1, offset = 32;
+        while (--ciPos > 1)
+        {
+            if (ciPos == pow2)
+            {
+                offset -= shifts;
+                addShiftedByBits(c, ci[1], c, ci[pow2], cLen, offset);
+                pow2 >>>= 1;
+            }
+            else
+            {
+                distribute(c, ci[pow2], ci[ciPos - pow2], ci[ciPos], cLen);
+            }
+        }
+
+        // TODO reduce in place to avoid extra copying
+        IntArray p = new IntArray(cLen);
+        System.arraycopy(c, ci[1], p.m_ints, 0, cLen);
+        return p;
+    }
+
+//    private static void deInterleave(int[] x, int xOff, int[] z, int zOff, int count, int rounds)
+//    {
+//        for (int i = 0; i < count; ++i)
+//        {
+//            z[zOff + i] = deInterleave(x[zOff + i], rounds);
+//        }
+//    }
+//
+//    private static int deInterleave(int x, int rounds)
+//    {
+//        while (--rounds >= 0)
+//        {
+//            x = deInterleave16(x & DEINTERLEAVE_MASK) | (deInterleave16((x >>> 1) & DEINTERLEAVE_MASK) << 16);
+//        }
+//        return x;
+//    }
+//
+//    private static int deInterleave16(int x)
+//    {
+//        x = (x | (x >>> 1)) & 0x33333333;
+//        x = (x | (x >>> 2)) & 0x0F0F0F0F;
+//        x = (x | (x >>> 4)) & 0x00FF00FF;
+//        x = (x | (x >>> 8)) & 0x0000FFFF;
+//        return x;
+//    }
+
+    public void reduce(int m, int[] ks)
+    {
+        int len = getUsedLength();
+        int mLen = (m + 31) >>> 5;
+        if (len < mLen)
+        {
+            return;
+        }
+
+        int _2m = m << 1;
+        int pos = Math.min(_2m - 2, (len << 5) - 1);
+
+        int kMax = ks[ks.length - 1];
+        if (kMax < m - 31)
+        {
+            reduceWordWise(pos, m, ks);
+        }
+        else
+        {
+            reduceBitWise(pos, m, ks);
+        }
+
+        // Instead of flipping the high bits in the loop, explicitly clear any partial word above m bits
+        int partial = m & 0x1F;
+        if (partial != 0)
+        {
+            m_ints[mLen - 1] &= (1 << partial) - 1;
+        }
+
+        if (len > mLen)
+        {
+            m_ints = resizedInts(mLen);
+        }
+    }
+
+    private void reduceBitWise(int from, int m, int[] ks)
+    {
+        for (int i = from; i >= m; --i)
         {
             if (testBit(i))
             {
+//                clearBit(i);
                 int bit = i - m;
                 flipBit(bit);
-                flipBit(i);
-                int l = redPol.length;
-                while (--l >= 0)
+                int j = ks.length;
+                while (--j >= 0)
+                {
+                    flipBit(ks[j] + bit);
+                }
+            }
+        }
+    }
+
+    private void reduceWordWise(int from, int m, int[] ks)
+    {
+        int pos = m + ((from - m) & ~0x1F);
+        for (int i = pos; i >= m; i -= 32)
+        {
+            int word = getWord(i);
+            if (word != 0)
+            {
+//                flipWord(i);
+                int bit = i - m;
+                flipWord(bit, word);
+                int j = ks.length;
+                while (--j >= 0)
                 {
-                    flipBit(redPol[l] + bit);
+                    flipWord(ks[j] + bit, word);
                 }
             }
         }
-        m_ints = resizedInts((m + 31) >> 5);
     }
 
     public IntArray square(int m)
     {
-        // TODO make the table static final
-        final int[] table = { 0x0, 0x1, 0x4, 0x5, 0x10, 0x11, 0x14, 0x15, 0x40,
-            0x41, 0x44, 0x45, 0x50, 0x51, 0x54, 0x55 };
+        int len = getUsedLength();
+        if (len == 0)
+        {
+            return this;
+        }
 
-        int t = (m + 31) >> 5;
-        if (m_ints.length < t)
+        int _2len = len << 1;
+        int[] r = new int[_2len];
+
+        int pos = 0;
+        while (pos < _2len)
         {
-            m_ints = resizedInts(t);
+            int mi = m_ints[pos >>> 1];
+            r[pos++] = interleave16(mi & 0xFFFF);
+            r[pos++] = interleave16(mi >>> 16);
         }
 
-        IntArray c = new IntArray(t + t);
+        return new IntArray(r);
+    }
 
-        // TODO twice the same code, put in separate private method
-        for (int i = 0; i < t; i++)
+    private static void interleave(int[] x, int xOff, int[] z, int zOff, int count, int rounds)
+    {
+        for (int i = 0; i < count; ++i)
         {
-            int v0 = 0;
-            for (int j = 0; j < 4; j++)
+            z[zOff + i] = interleave(x[xOff + i], rounds);
+        }
+    }
+
+    private static int interleave(int x, int rounds)
+    {
+        while (--rounds >= 0)
+        {
+            x = interleave16(x & 0xFFFF) | (interleave16(x >>> 16) << 1);
+        }
+        return x;
+    }
+
+    private static int interleave16(int n)
+    {
+        return INTERLEAVE_TABLE[n & 0xFF] | INTERLEAVE_TABLE[n >>> 8] << 16;
+    }
+
+    public IntArray modInverse(int m, int[] ks)
+    {
+        // Inversion in F2m using the extended Euclidean algorithm
+        // Input: A nonzero polynomial a(z) of degree at most m-1
+        // Output: a(z)^(-1) mod f(z)
+
+        int uzDegree = degree();
+        if (uzDegree == 1)
+        {
+            return this;
+        }
+
+        // u(z) := a(z)
+        IntArray uz = (IntArray)clone();
+
+        int t = getWordLength(m);
+
+        // v(z) := f(z)
+        IntArray vz = new IntArray(t);
+        vz.setBit(m);
+        vz.setBit(0);
+        vz.setBit(ks[0]);
+        if (ks.length > 1) 
+        {
+            vz.setBit(ks[1]);
+            vz.setBit(ks[2]);
+        }
+
+        // g1(z) := 1, g2(z) := 0
+        IntArray g1z = new IntArray(t);
+        g1z.setBit(0);
+        IntArray g2z = new IntArray(t);
+
+        while (uzDegree != 0)
+        {
+            // j := deg(u(z)) - deg(v(z))
+            int j = uzDegree - vz.degree();
+
+            // If j < 0 then: u(z) <-> v(z), g1(z) <-> g2(z), j := -j
+            if (j < 0) 
             {
-                v0 = v0 >>> 8;
-                int u = (m_ints[i] >>> (j * 4)) & 0xF;
-                int w = table[u] << 24;
-                v0 |= w;
+                final IntArray uzCopy = uz;
+                uz = vz;
+                vz = uzCopy;
+
+                final IntArray g1zCopy = g1z;
+                g1z = g2z;
+                g2z = g1zCopy;
+
+                j = -j;
             }
-            c.m_ints[i + i] = v0;
 
-            v0 = 0;
-            int upper = m_ints[i] >>> 16;
-            for (int j = 0; j < 4; j++)
+            // u(z) := u(z) + z^j * v(z)
+            // Note, that no reduction modulo f(z) is required, because
+            // deg(u(z) + z^j * v(z)) <= max(deg(u(z)), j + deg(v(z)))
+            // = max(deg(u(z)), deg(u(z)) - deg(v(z)) + deg(v(z))
+            // = deg(u(z))
+            // uz = uz.xor(vz.shiftLeft(j));
+            uz.addShiftedByBits(vz, j);
+            uzDegree = uz.degree();
+
+            // g1(z) := g1(z) + z^j * g2(z)
+//            g1z = g1z.xor(g2z.shiftLeft(j));
+            if (uzDegree != 0)
             {
-                v0 = v0 >>> 8;
-                int u = (upper >>> (j * 4)) & 0xF;
-                int w = table[u] << 24;
-                v0 |= w;
+                g1z.addShiftedByBits(g2z, j);
             }
-            c.m_ints[i + i + 1] = v0;
         }
-        return c;
+        return g2z;
     }
 
     public boolean equals(Object o)
@@ -482,7 +822,8 @@ class IntArray
         int hash = 1;
         for (int i = 0; i < usedLen; i++)
         {
-            hash = hash * 31 + m_ints[i];
+            hash *= 31;
+            hash ^= m_ints[i];
         }
         return hash;
     }
@@ -494,25 +835,26 @@ class IntArray
 
     public String toString()
     {
-        int usedLen = getUsedLength();
-        if (usedLen == 0)
+        int i = getUsedLength();
+        if (i == 0)
         {
             return "0";
         }
 
-        StringBuffer sb = new StringBuffer(Integer
-            .toBinaryString(m_ints[usedLen - 1]));
-        for (int iarrJ = usedLen - 2; iarrJ >= 0; iarrJ--)
+        StringBuffer sb = new StringBuffer(Integer.toBinaryString(m_ints[--i]));
+        while (--i >= 0)
         {
-            String hexString = Integer.toBinaryString(m_ints[iarrJ]);
+            String s = Integer.toBinaryString(m_ints[i]);
 
-            // Add leading zeroes, except for highest significant int
-            for (int i = hexString.length(); i < 8; i++)
+            // Add leading zeroes, except for highest significant word
+            int len = s.length();
+            if (len < 32)
             {
-                hexString = "0" + hexString;
+                sb.append(ZEROES.substring(len));
             }
-            sb.append(hexString);
+
+            sb.append(s);
         }
         return sb.toString();
     }
-}
+}
+\ No newline at end of file
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/LongArray.java b/bcprov/src/main/java/org/bouncycastle/math/ec/LongArray.java
new file mode 100644
index 0000000..7e8b172
--- /dev/null
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/LongArray.java
@@ -0,0 +1,1995 @@
+package org.bouncycastle.math.ec;
+
+import org.bouncycastle.util.Arrays;
+
+import java.math.BigInteger;
+
+class LongArray
+{
+//    private static long DEINTERLEAVE_MASK = 0x5555555555555555L;
+
+    /*
+     * This expands 8 bit indices into 16 bit contents (high bit 14), by inserting 0s between bits.
+     * In a binary field, this operation is the same as squaring an 8 bit number.
+     */
+    private static final int[] INTERLEAVE2_TABLE = new int[]
+    {
+        0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015,
+        0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055,
+        0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115,
+        0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155,
+        0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415,
+        0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455,
+        0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515,
+        0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555,
+        0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015,
+        0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055,
+        0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115,
+        0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155,
+        0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415,
+        0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455,
+        0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515,
+        0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555,
+        0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015,
+        0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055,
+        0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115,
+        0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155,
+        0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415,
+        0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455,
+        0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515,
+        0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555,
+        0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015,
+        0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055,
+        0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115,
+        0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155,
+        0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415,
+        0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455,
+        0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515,
+        0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555
+    };
+
+    /*
+     * This expands 7 bit indices into 21 bit contents (high bit 18), by inserting 0s between bits.
+     */
+    private static final int[] INTERLEAVE3_TABLE = new  int[]
+    {
+        0x00000, 0x00001, 0x00008, 0x00009, 0x00040, 0x00041, 0x00048, 0x00049,
+        0x00200, 0x00201, 0x00208, 0x00209, 0x00240, 0x00241, 0x00248, 0x00249,
+        0x01000, 0x01001, 0x01008, 0x01009, 0x01040, 0x01041, 0x01048, 0x01049,
+        0x01200, 0x01201, 0x01208, 0x01209, 0x01240, 0x01241, 0x01248, 0x01249,
+        0x08000, 0x08001, 0x08008, 0x08009, 0x08040, 0x08041, 0x08048, 0x08049,
+        0x08200, 0x08201, 0x08208, 0x08209, 0x08240, 0x08241, 0x08248, 0x08249,
+        0x09000, 0x09001, 0x09008, 0x09009, 0x09040, 0x09041, 0x09048, 0x09049,
+        0x09200, 0x09201, 0x09208, 0x09209, 0x09240, 0x09241, 0x09248, 0x09249,
+        0x40000, 0x40001, 0x40008, 0x40009, 0x40040, 0x40041, 0x40048, 0x40049,
+        0x40200, 0x40201, 0x40208, 0x40209, 0x40240, 0x40241, 0x40248, 0x40249,
+        0x41000, 0x41001, 0x41008, 0x41009, 0x41040, 0x41041, 0x41048, 0x41049,
+        0x41200, 0x41201, 0x41208, 0x41209, 0x41240, 0x41241, 0x41248, 0x41249,
+        0x48000, 0x48001, 0x48008, 0x48009, 0x48040, 0x48041, 0x48048, 0x48049,
+        0x48200, 0x48201, 0x48208, 0x48209, 0x48240, 0x48241, 0x48248, 0x48249,
+        0x49000, 0x49001, 0x49008, 0x49009, 0x49040, 0x49041, 0x49048, 0x49049,
+        0x49200, 0x49201, 0x49208, 0x49209, 0x49240, 0x49241, 0x49248, 0x49249
+    };
+
+    /*
+     * This expands 8 bit indices into 32 bit contents (high bit 28), by inserting 0s between bits.
+     */
+    private static final int[] INTERLEAVE4_TABLE = new int[]
+    {
+        0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100, 0x00000101, 0x00000110, 0x00000111,
+        0x00001000, 0x00001001, 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110, 0x00001111,
+        0x00010000, 0x00010001, 0x00010010, 0x00010011, 0x00010100, 0x00010101, 0x00010110, 0x00010111,
+        0x00011000, 0x00011001, 0x00011010, 0x00011011, 0x00011100, 0x00011101, 0x00011110, 0x00011111,
+        0x00100000, 0x00100001, 0x00100010, 0x00100011, 0x00100100, 0x00100101, 0x00100110, 0x00100111,
+        0x00101000, 0x00101001, 0x00101010, 0x00101011, 0x00101100, 0x00101101, 0x00101110, 0x00101111,
+        0x00110000, 0x00110001, 0x00110010, 0x00110011, 0x00110100, 0x00110101, 0x00110110, 0x00110111,
+        0x00111000, 0x00111001, 0x00111010, 0x00111011, 0x00111100, 0x00111101, 0x00111110, 0x00111111,
+        0x01000000, 0x01000001, 0x01000010, 0x01000011, 0x01000100, 0x01000101, 0x01000110, 0x01000111,
+        0x01001000, 0x01001001, 0x01001010, 0x01001011, 0x01001100, 0x01001101, 0x01001110, 0x01001111,
+        0x01010000, 0x01010001, 0x01010010, 0x01010011, 0x01010100, 0x01010101, 0x01010110, 0x01010111,
+        0x01011000, 0x01011001, 0x01011010, 0x01011011, 0x01011100, 0x01011101, 0x01011110, 0x01011111,
+        0x01100000, 0x01100001, 0x01100010, 0x01100011, 0x01100100, 0x01100101, 0x01100110, 0x01100111,
+        0x01101000, 0x01101001, 0x01101010, 0x01101011, 0x01101100, 0x01101101, 0x01101110, 0x01101111,
+        0x01110000, 0x01110001, 0x01110010, 0x01110011, 0x01110100, 0x01110101, 0x01110110, 0x01110111,
+        0x01111000, 0x01111001, 0x01111010, 0x01111011, 0x01111100, 0x01111101, 0x01111110, 0x01111111,
+        0x10000000, 0x10000001, 0x10000010, 0x10000011, 0x10000100, 0x10000101, 0x10000110, 0x10000111,
+        0x10001000, 0x10001001, 0x10001010, 0x10001011, 0x10001100, 0x10001101, 0x10001110, 0x10001111,
+        0x10010000, 0x10010001, 0x10010010, 0x10010011, 0x10010100, 0x10010101, 0x10010110, 0x10010111,
+        0x10011000, 0x10011001, 0x10011010, 0x10011011, 0x10011100, 0x10011101, 0x10011110, 0x10011111,
+        0x10100000, 0x10100001, 0x10100010, 0x10100011, 0x10100100, 0x10100101, 0x10100110, 0x10100111,
+        0x10101000, 0x10101001, 0x10101010, 0x10101011, 0x10101100, 0x10101101, 0x10101110, 0x10101111,
+        0x10110000, 0x10110001, 0x10110010, 0x10110011, 0x10110100, 0x10110101, 0x10110110, 0x10110111,
+        0x10111000, 0x10111001, 0x10111010, 0x10111011, 0x10111100, 0x10111101, 0x10111110, 0x10111111,
+        0x11000000, 0x11000001, 0x11000010, 0x11000011, 0x11000100, 0x11000101, 0x11000110, 0x11000111,
+        0x11001000, 0x11001001, 0x11001010, 0x11001011, 0x11001100, 0x11001101, 0x11001110, 0x11001111,
+        0x11010000, 0x11010001, 0x11010010, 0x11010011, 0x11010100, 0x11010101, 0x11010110, 0x11010111,
+        0x11011000, 0x11011001, 0x11011010, 0x11011011, 0x11011100, 0x11011101, 0x11011110, 0x11011111,
+        0x11100000, 0x11100001, 0x11100010, 0x11100011, 0x11100100, 0x11100101, 0x11100110, 0x11100111,
+        0x11101000, 0x11101001, 0x11101010, 0x11101011, 0x11101100, 0x11101101, 0x11101110, 0x11101111,
+        0x11110000, 0x11110001, 0x11110010, 0x11110011, 0x11110100, 0x11110101, 0x11110110, 0x11110111,
+        0x11111000, 0x11111001, 0x11111010, 0x11111011, 0x11111100, 0x11111101, 0x11111110, 0x11111111
+    };
+
+    /*
+     * This expands 7 bit indices into 35 bit contents (high bit 30), by inserting 0s between bits.
+     */
+    private static final int[] INTERLEAVE5_TABLE = new int[] {
+        0x00000000, 0x00000001, 0x00000020, 0x00000021, 0x00000400, 0x00000401, 0x00000420, 0x00000421,
+        0x00008000, 0x00008001, 0x00008020, 0x00008021, 0x00008400, 0x00008401, 0x00008420, 0x00008421,
+        0x00100000, 0x00100001, 0x00100020, 0x00100021, 0x00100400, 0x00100401, 0x00100420, 0x00100421,
+        0x00108000, 0x00108001, 0x00108020, 0x00108021, 0x00108400, 0x00108401, 0x00108420, 0x00108421,
+        0x02000000, 0x02000001, 0x02000020, 0x02000021, 0x02000400, 0x02000401, 0x02000420, 0x02000421,
+        0x02008000, 0x02008001, 0x02008020, 0x02008021, 0x02008400, 0x02008401, 0x02008420, 0x02008421,
+        0x02100000, 0x02100001, 0x02100020, 0x02100021, 0x02100400, 0x02100401, 0x02100420, 0x02100421,
+        0x02108000, 0x02108001, 0x02108020, 0x02108021, 0x02108400, 0x02108401, 0x02108420, 0x02108421,
+        0x40000000, 0x40000001, 0x40000020, 0x40000021, 0x40000400, 0x40000401, 0x40000420, 0x40000421,
+        0x40008000, 0x40008001, 0x40008020, 0x40008021, 0x40008400, 0x40008401, 0x40008420, 0x40008421,
+        0x40100000, 0x40100001, 0x40100020, 0x40100021, 0x40100400, 0x40100401, 0x40100420, 0x40100421,
+        0x40108000, 0x40108001, 0x40108020, 0x40108021, 0x40108400, 0x40108401, 0x40108420, 0x40108421,
+        0x42000000, 0x42000001, 0x42000020, 0x42000021, 0x42000400, 0x42000401, 0x42000420, 0x42000421,
+        0x42008000, 0x42008001, 0x42008020, 0x42008021, 0x42008400, 0x42008401, 0x42008420, 0x42008421,
+        0x42100000, 0x42100001, 0x42100020, 0x42100021, 0x42100400, 0x42100401, 0x42100420, 0x42100421,
+        0x42108000, 0x42108001, 0x42108020, 0x42108021, 0x42108400, 0x42108401, 0x42108420, 0x42108421
+    };
+
+    /*
+     * This expands 9 bit indices into 63 bit (long) contents (high bit 56), by inserting 0s between bits.
+     */
+    private static final long[] INTERLEAVE7_TABLE = new long[]
+    {
+        0x0000000000000000L, 0x0000000000000001L, 0x0000000000000080L, 0x0000000000000081L,
+        0x0000000000004000L, 0x0000000000004001L, 0x0000000000004080L, 0x0000000000004081L,
+        0x0000000000200000L, 0x0000000000200001L, 0x0000000000200080L, 0x0000000000200081L,
+        0x0000000000204000L, 0x0000000000204001L, 0x0000000000204080L, 0x0000000000204081L,
+        0x0000000010000000L, 0x0000000010000001L, 0x0000000010000080L, 0x0000000010000081L,
+        0x0000000010004000L, 0x0000000010004001L, 0x0000000010004080L, 0x0000000010004081L,
+        0x0000000010200000L, 0x0000000010200001L, 0x0000000010200080L, 0x0000000010200081L,
+        0x0000000010204000L, 0x0000000010204001L, 0x0000000010204080L, 0x0000000010204081L,
+        0x0000000800000000L, 0x0000000800000001L, 0x0000000800000080L, 0x0000000800000081L,
+        0x0000000800004000L, 0x0000000800004001L, 0x0000000800004080L, 0x0000000800004081L,
+        0x0000000800200000L, 0x0000000800200001L, 0x0000000800200080L, 0x0000000800200081L,
+        0x0000000800204000L, 0x0000000800204001L, 0x0000000800204080L, 0x0000000800204081L,
+        0x0000000810000000L, 0x0000000810000001L, 0x0000000810000080L, 0x0000000810000081L,
+        0x0000000810004000L, 0x0000000810004001L, 0x0000000810004080L, 0x0000000810004081L,
+        0x0000000810200000L, 0x0000000810200001L, 0x0000000810200080L, 0x0000000810200081L,
+        0x0000000810204000L, 0x0000000810204001L, 0x0000000810204080L, 0x0000000810204081L,
+        0x0000040000000000L, 0x0000040000000001L, 0x0000040000000080L, 0x0000040000000081L,
+        0x0000040000004000L, 0x0000040000004001L, 0x0000040000004080L, 0x0000040000004081L,
+        0x0000040000200000L, 0x0000040000200001L, 0x0000040000200080L, 0x0000040000200081L,
+        0x0000040000204000L, 0x0000040000204001L, 0x0000040000204080L, 0x0000040000204081L,
+        0x0000040010000000L, 0x0000040010000001L, 0x0000040010000080L, 0x0000040010000081L,
+        0x0000040010004000L, 0x0000040010004001L, 0x0000040010004080L, 0x0000040010004081L,
+        0x0000040010200000L, 0x0000040010200001L, 0x0000040010200080L, 0x0000040010200081L,
+        0x0000040010204000L, 0x0000040010204001L, 0x0000040010204080L, 0x0000040010204081L,
+        0x0000040800000000L, 0x0000040800000001L, 0x0000040800000080L, 0x0000040800000081L,
+        0x0000040800004000L, 0x0000040800004001L, 0x0000040800004080L, 0x0000040800004081L,
+        0x0000040800200000L, 0x0000040800200001L, 0x0000040800200080L, 0x0000040800200081L,
+        0x0000040800204000L, 0x0000040800204001L, 0x0000040800204080L, 0x0000040800204081L,
+        0x0000040810000000L, 0x0000040810000001L, 0x0000040810000080L, 0x0000040810000081L,
+        0x0000040810004000L, 0x0000040810004001L, 0x0000040810004080L, 0x0000040810004081L,
+        0x0000040810200000L, 0x0000040810200001L, 0x0000040810200080L, 0x0000040810200081L,
+        0x0000040810204000L, 0x0000040810204001L, 0x0000040810204080L, 0x0000040810204081L,
+        0x0002000000000000L, 0x0002000000000001L, 0x0002000000000080L, 0x0002000000000081L,
+        0x0002000000004000L, 0x0002000000004001L, 0x0002000000004080L, 0x0002000000004081L,
+        0x0002000000200000L, 0x0002000000200001L, 0x0002000000200080L, 0x0002000000200081L,
+        0x0002000000204000L, 0x0002000000204001L, 0x0002000000204080L, 0x0002000000204081L,
+        0x0002000010000000L, 0x0002000010000001L, 0x0002000010000080L, 0x0002000010000081L,
+        0x0002000010004000L, 0x0002000010004001L, 0x0002000010004080L, 0x0002000010004081L,
+        0x0002000010200000L, 0x0002000010200001L, 0x0002000010200080L, 0x0002000010200081L,
+        0x0002000010204000L, 0x0002000010204001L, 0x0002000010204080L, 0x0002000010204081L,
+        0x0002000800000000L, 0x0002000800000001L, 0x0002000800000080L, 0x0002000800000081L,
+        0x0002000800004000L, 0x0002000800004001L, 0x0002000800004080L, 0x0002000800004081L,
+        0x0002000800200000L, 0x0002000800200001L, 0x0002000800200080L, 0x0002000800200081L,
+        0x0002000800204000L, 0x0002000800204001L, 0x0002000800204080L, 0x0002000800204081L,
+        0x0002000810000000L, 0x0002000810000001L, 0x0002000810000080L, 0x0002000810000081L,
+        0x0002000810004000L, 0x0002000810004001L, 0x0002000810004080L, 0x0002000810004081L,
+        0x0002000810200000L, 0x0002000810200001L, 0x0002000810200080L, 0x0002000810200081L,
+        0x0002000810204000L, 0x0002000810204001L, 0x0002000810204080L, 0x0002000810204081L,
+        0x0002040000000000L, 0x0002040000000001L, 0x0002040000000080L, 0x0002040000000081L,
+        0x0002040000004000L, 0x0002040000004001L, 0x0002040000004080L, 0x0002040000004081L,
+        0x0002040000200000L, 0x0002040000200001L, 0x0002040000200080L, 0x0002040000200081L,
+        0x0002040000204000L, 0x0002040000204001L, 0x0002040000204080L, 0x0002040000204081L,
+        0x0002040010000000L, 0x0002040010000001L, 0x0002040010000080L, 0x0002040010000081L,
+        0x0002040010004000L, 0x0002040010004001L, 0x0002040010004080L, 0x0002040010004081L,
+        0x0002040010200000L, 0x0002040010200001L, 0x0002040010200080L, 0x0002040010200081L,
+        0x0002040010204000L, 0x0002040010204001L, 0x0002040010204080L, 0x0002040010204081L,
+        0x0002040800000000L, 0x0002040800000001L, 0x0002040800000080L, 0x0002040800000081L,
+        0x0002040800004000L, 0x0002040800004001L, 0x0002040800004080L, 0x0002040800004081L,
+        0x0002040800200000L, 0x0002040800200001L, 0x0002040800200080L, 0x0002040800200081L,
+        0x0002040800204000L, 0x0002040800204001L, 0x0002040800204080L, 0x0002040800204081L,
+        0x0002040810000000L, 0x0002040810000001L, 0x0002040810000080L, 0x0002040810000081L,
+        0x0002040810004000L, 0x0002040810004001L, 0x0002040810004080L, 0x0002040810004081L,
+        0x0002040810200000L, 0x0002040810200001L, 0x0002040810200080L, 0x0002040810200081L,
+        0x0002040810204000L, 0x0002040810204001L, 0x0002040810204080L, 0x0002040810204081L,
+        0x0100000000000000L, 0x0100000000000001L, 0x0100000000000080L, 0x0100000000000081L,
+        0x0100000000004000L, 0x0100000000004001L, 0x0100000000004080L, 0x0100000000004081L,
+        0x0100000000200000L, 0x0100000000200001L, 0x0100000000200080L, 0x0100000000200081L,
+        0x0100000000204000L, 0x0100000000204001L, 0x0100000000204080L, 0x0100000000204081L,
+        0x0100000010000000L, 0x0100000010000001L, 0x0100000010000080L, 0x0100000010000081L,
+        0x0100000010004000L, 0x0100000010004001L, 0x0100000010004080L, 0x0100000010004081L,
+        0x0100000010200000L, 0x0100000010200001L, 0x0100000010200080L, 0x0100000010200081L,
+        0x0100000010204000L, 0x0100000010204001L, 0x0100000010204080L, 0x0100000010204081L,
+        0x0100000800000000L, 0x0100000800000001L, 0x0100000800000080L, 0x0100000800000081L,
+        0x0100000800004000L, 0x0100000800004001L, 0x0100000800004080L, 0x0100000800004081L,
+        0x0100000800200000L, 0x0100000800200001L, 0x0100000800200080L, 0x0100000800200081L,
+        0x0100000800204000L, 0x0100000800204001L, 0x0100000800204080L, 0x0100000800204081L,
+        0x0100000810000000L, 0x0100000810000001L, 0x0100000810000080L, 0x0100000810000081L,
+        0x0100000810004000L, 0x0100000810004001L, 0x0100000810004080L, 0x0100000810004081L,
+        0x0100000810200000L, 0x0100000810200001L, 0x0100000810200080L, 0x0100000810200081L,
+        0x0100000810204000L, 0x0100000810204001L, 0x0100000810204080L, 0x0100000810204081L,
+        0x0100040000000000L, 0x0100040000000001L, 0x0100040000000080L, 0x0100040000000081L,
+        0x0100040000004000L, 0x0100040000004001L, 0x0100040000004080L, 0x0100040000004081L,
+        0x0100040000200000L, 0x0100040000200001L, 0x0100040000200080L, 0x0100040000200081L,
+        0x0100040000204000L, 0x0100040000204001L, 0x0100040000204080L, 0x0100040000204081L,
+        0x0100040010000000L, 0x0100040010000001L, 0x0100040010000080L, 0x0100040010000081L,
+        0x0100040010004000L, 0x0100040010004001L, 0x0100040010004080L, 0x0100040010004081L,
+        0x0100040010200000L, 0x0100040010200001L, 0x0100040010200080L, 0x0100040010200081L,
+        0x0100040010204000L, 0x0100040010204001L, 0x0100040010204080L, 0x0100040010204081L,
+        0x0100040800000000L, 0x0100040800000001L, 0x0100040800000080L, 0x0100040800000081L,
+        0x0100040800004000L, 0x0100040800004001L, 0x0100040800004080L, 0x0100040800004081L,
+        0x0100040800200000L, 0x0100040800200001L, 0x0100040800200080L, 0x0100040800200081L,
+        0x0100040800204000L, 0x0100040800204001L, 0x0100040800204080L, 0x0100040800204081L,
+        0x0100040810000000L, 0x0100040810000001L, 0x0100040810000080L, 0x0100040810000081L,
+        0x0100040810004000L, 0x0100040810004001L, 0x0100040810004080L, 0x0100040810004081L,
+        0x0100040810200000L, 0x0100040810200001L, 0x0100040810200080L, 0x0100040810200081L,
+        0x0100040810204000L, 0x0100040810204001L, 0x0100040810204080L, 0x0100040810204081L,
+        0x0102000000000000L, 0x0102000000000001L, 0x0102000000000080L, 0x0102000000000081L,
+        0x0102000000004000L, 0x0102000000004001L, 0x0102000000004080L, 0x0102000000004081L,
+        0x0102000000200000L, 0x0102000000200001L, 0x0102000000200080L, 0x0102000000200081L,
+        0x0102000000204000L, 0x0102000000204001L, 0x0102000000204080L, 0x0102000000204081L,
+        0x0102000010000000L, 0x0102000010000001L, 0x0102000010000080L, 0x0102000010000081L,
+        0x0102000010004000L, 0x0102000010004001L, 0x0102000010004080L, 0x0102000010004081L,
+        0x0102000010200000L, 0x0102000010200001L, 0x0102000010200080L, 0x0102000010200081L,
+        0x0102000010204000L, 0x0102000010204001L, 0x0102000010204080L, 0x0102000010204081L,
+        0x0102000800000000L, 0x0102000800000001L, 0x0102000800000080L, 0x0102000800000081L,
+        0x0102000800004000L, 0x0102000800004001L, 0x0102000800004080L, 0x0102000800004081L,
+        0x0102000800200000L, 0x0102000800200001L, 0x0102000800200080L, 0x0102000800200081L,
+        0x0102000800204000L, 0x0102000800204001L, 0x0102000800204080L, 0x0102000800204081L,
+        0x0102000810000000L, 0x0102000810000001L, 0x0102000810000080L, 0x0102000810000081L,
+        0x0102000810004000L, 0x0102000810004001L, 0x0102000810004080L, 0x0102000810004081L,
+        0x0102000810200000L, 0x0102000810200001L, 0x0102000810200080L, 0x0102000810200081L,
+        0x0102000810204000L, 0x0102000810204001L, 0x0102000810204080L, 0x0102000810204081L,
+        0x0102040000000000L, 0x0102040000000001L, 0x0102040000000080L, 0x0102040000000081L,
+        0x0102040000004000L, 0x0102040000004001L, 0x0102040000004080L, 0x0102040000004081L,
+        0x0102040000200000L, 0x0102040000200001L, 0x0102040000200080L, 0x0102040000200081L,
+        0x0102040000204000L, 0x0102040000204001L, 0x0102040000204080L, 0x0102040000204081L,
+        0x0102040010000000L, 0x0102040010000001L, 0x0102040010000080L, 0x0102040010000081L,
+        0x0102040010004000L, 0x0102040010004001L, 0x0102040010004080L, 0x0102040010004081L,
+        0x0102040010200000L, 0x0102040010200001L, 0x0102040010200080L, 0x0102040010200081L,
+        0x0102040010204000L, 0x0102040010204001L, 0x0102040010204080L, 0x0102040010204081L,
+        0x0102040800000000L, 0x0102040800000001L, 0x0102040800000080L, 0x0102040800000081L,
+        0x0102040800004000L, 0x0102040800004001L, 0x0102040800004080L, 0x0102040800004081L,
+        0x0102040800200000L, 0x0102040800200001L, 0x0102040800200080L, 0x0102040800200081L,
+        0x0102040800204000L, 0x0102040800204001L, 0x0102040800204080L, 0x0102040800204081L,
+        0x0102040810000000L, 0x0102040810000001L, 0x0102040810000080L, 0x0102040810000081L,
+        0x0102040810004000L, 0x0102040810004001L, 0x0102040810004080L, 0x0102040810004081L,
+        0x0102040810200000L, 0x0102040810200001L, 0x0102040810200080L, 0x0102040810200081L,
+        0x0102040810204000L, 0x0102040810204001L, 0x0102040810204080L, 0x0102040810204081L
+    };
+
+    // For toString(); must have length 64
+    private static final String ZEROES = "0000000000000000000000000000000000000000000000000000000000000000";
+
+    final static byte[] bitLengths =
+    {
+        0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8
+    };
+
+    // TODO make m fixed for the LongArray, and hence compute T once and for all
+
+    private long[] m_ints;
+
+    public LongArray(int intLen)
+    {
+        m_ints = new long[intLen];
+    }
+
+    public LongArray(long[] ints)
+    {
+        m_ints = ints;
+    }
+
+    public LongArray(long[] ints, int off, int len)
+    {
+        if (off == 0 && len == ints.length)
+        {
+            m_ints = ints;
+        }
+        else
+        {
+            m_ints = new long[len];
+            System.arraycopy(ints, off, m_ints, 0, len);
+        }
+    }
+
+    public LongArray(BigInteger bigInt)
+    {
+        if (bigInt == null || bigInt.signum() < 0)
+        {
+            throw new IllegalArgumentException("invalid F2m field value");
+        }
+
+        if (bigInt.signum() == 0)
+        {
+            m_ints = new long[] { 0L };
+            return;
+        }
+
+        byte[] barr = bigInt.toByteArray();
+        int barrLen = barr.length;
+        int barrStart = 0;
+        if (barr[0] == 0)
+        {
+            // First byte is 0 to enforce highest (=sign) bit is zero.
+            // In this case ignore barr[0].
+            barrLen--;
+            barrStart = 1;
+        }
+        int intLen = (barrLen + 7) / 8;
+        m_ints = new long[intLen];
+
+        int iarrJ = intLen - 1;
+        int rem = barrLen % 8 + barrStart;
+        long temp = 0;
+        int barrI = barrStart;
+        if (barrStart < rem)
+        {
+            for (; barrI < rem; barrI++)
+            {
+                temp <<= 8;
+                int barrBarrI = barr[barrI] & 0xFF;
+                temp |= barrBarrI;
+            }
+            m_ints[iarrJ--] = temp;
+        }
+
+        for (; iarrJ >= 0; iarrJ--)
+        {
+            temp = 0;
+            for (int i = 0; i < 8; i++)
+            {
+                temp <<= 8;
+                int barrBarrI = barr[barrI++] & 0xFF;
+                temp |= barrBarrI;
+            }
+            m_ints[iarrJ] = temp;
+        }
+    }
+
+    public boolean isZero()
+    {
+        long[] a = m_ints;
+        for (int i = 0; i < a.length; ++i)
+        {
+            if (a[i] != 0L)
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    public int getUsedLength()
+    {
+        return getUsedLengthFrom(m_ints.length);
+    }
+
+    public int getUsedLengthFrom(int from)
+    {
+        long[] a = m_ints;
+        from = Math.min(from, a.length);
+
+        if (from < 1)
+        {
+            return 0;
+        }
+
+        // Check if first element will act as sentinel
+        if (a[0] != 0)
+        {
+            while (a[--from] == 0)
+            {
+            }
+            return from + 1;
+        }
+
+        do
+        {
+            if (a[--from] != 0)
+            {
+                return from + 1;
+            }
+        }
+        while (from > 0);
+
+        return 0;
+    }
+
+    public int degree()
+    {
+        int i = m_ints.length;
+        long w;
+        do
+        {
+            if (i == 0)
+            {
+                return 0;
+            }
+            w = m_ints[--i];
+        }
+        while (w == 0);
+
+        return (i << 6) + bitLength(w);
+    }
+
+    private int degreeFrom(int limit)
+    {
+        int i = (limit + 62) >>> 6;
+        long w;
+        do
+        {
+            if (i == 0)
+            {
+                return 0;
+            }
+            w = m_ints[--i];
+        }
+        while (w == 0);
+
+        return (i << 6) + bitLength(w);
+    }
+
+//    private int lowestCoefficient()
+//    {
+//        for (int i = 0; i < m_ints.length; ++i)
+//        {
+//            long mi = m_ints[i];
+//            if (mi != 0)
+//            {
+//                int j = 0;
+//                while ((mi & 0xFFL) == 0)
+//                {
+//                    j += 8;
+//                    mi >>>= 8;
+//                }
+//                while ((mi & 1L) == 0)
+//                {
+//                    ++j;
+//                    mi >>>= 1;
+//                }
+//                return (i << 6) + j;
+//            }
+//        }
+//        return -1;
+//    }
+
+    private static int bitLength(long w)
+    {
+        int u = (int)(w >>> 32), b;
+        if (u == 0)
+        {
+            u = (int)w;
+            b = 0;
+        }
+        else
+        {
+            b = 32;
+        }
+
+        int t = u >>> 16, k;
+        if (t == 0)
+        {
+            t = u >>> 8;
+            k = (t == 0) ? bitLengths[u] : 8 + bitLengths[t];
+        }
+        else
+        {
+            int v = t >>> 8;
+            k = (v == 0) ? 16 + bitLengths[t] : 24 + bitLengths[v];
+        }
+
+        return b + k;
+    }
+
+    private long[] resizedInts(int newLen)
+    {
+        long[] newInts = new long[newLen];
+        System.arraycopy(m_ints, 0, newInts, 0, Math.min(m_ints.length, newLen));
+        return newInts;
+    }
+
+    public BigInteger toBigInteger()
+    {
+        int usedLen = getUsedLength();
+        if (usedLen == 0)
+        {
+            return ECConstants.ZERO;
+        }
+
+        long highestInt = m_ints[usedLen - 1];
+        byte[] temp = new byte[8];
+        int barrI = 0;
+        boolean trailingZeroBytesDone = false;
+        for (int j = 7; j >= 0; j--)
+        {
+            byte thisByte = (byte)(highestInt >>> (8 * j));
+            if (trailingZeroBytesDone || (thisByte != 0))
+            {
+                trailingZeroBytesDone = true;
+                temp[barrI++] = thisByte;
+            }
+        }
+
+        int barrLen = 8 * (usedLen - 1) + barrI;
+        byte[] barr = new byte[barrLen];
+        for (int j = 0; j < barrI; j++)
+        {
+            barr[j] = temp[j];
+        }
+        // Highest value int is done now
+
+        for (int iarrJ = usedLen - 2; iarrJ >= 0; iarrJ--)
+        {
+            long mi = m_ints[iarrJ];
+            for (int j = 7; j >= 0; j--)
+            {
+                barr[barrI++] = (byte)(mi >>> (8 * j));
+            }
+        }
+        return new BigInteger(1, barr);
+    }
+
+//    private static long shiftUp(long[] x, int xOff, int count)
+//    {
+//        long prev = 0;
+//        for (int i = 0; i < count; ++i)
+//        {
+//            long next = x[xOff + i];
+//            x[xOff + i] = (next << 1) | prev;
+//            prev = next >>> 63;
+//        }
+//        return prev;
+//    }
+
+    private static long shiftUp(long[] x, int xOff, int count, int shift)
+    {
+        int shiftInv = 64 - shift;
+        long prev = 0;
+        for (int i = 0; i < count; ++i)
+        {
+            long next = x[xOff + i];
+            x[xOff + i] = (next << shift) | prev;
+            prev = next >>> shiftInv;
+        }
+        return prev;
+    }
+
+    private static long shiftUp(long[] x, int xOff, long[] z, int zOff, int count, int shift)
+    {
+        int shiftInv = 64 - shift;
+        long prev = 0;
+        for (int i = 0; i < count; ++i)
+        {
+            long next = x[xOff + i];
+            z[zOff + i] = (next << shift) | prev;
+            prev = next >>> shiftInv;
+        }
+        return prev;
+    }
+
+    public LongArray addOne()
+    {
+        if (m_ints.length == 0)
+        {
+            return new LongArray(new long[]{ 1L });
+        }
+
+        int resultLen = Math.max(1, getUsedLength());
+        long[] ints = resizedInts(resultLen);
+        ints[0] ^= 1L;
+        return new LongArray(ints);
+    }
+
+//    private void addShiftedByBits(LongArray other, int bits)
+//    {
+//        int words = bits >>> 6;
+//        int shift = bits & 0x3F;
+//
+//        if (shift == 0)
+//        {
+//            addShiftedByWords(other, words);
+//            return;
+//        }
+//
+//        int otherUsedLen = other.getUsedLength();
+//        if (otherUsedLen == 0)
+//        {
+//            return;
+//        }
+//
+//        int minLen = otherUsedLen + words + 1;
+//        if (minLen > m_ints.length)
+//        {
+//            m_ints = resizedInts(minLen);
+//        }
+//
+//        long carry = addShiftedByBits(m_ints, words, other.m_ints, 0, otherUsedLen, shift);
+//        m_ints[otherUsedLen + words] ^= carry;
+//    }
+
+    private void addShiftedByBitsSafe(LongArray other, int otherDegree, int bits)
+    {
+        int otherLen = (otherDegree + 63) >>> 6;
+
+        int words = bits >>> 6;
+        int shift = bits & 0x3F;
+
+        if (shift == 0)
+        {
+            add(m_ints, words, other.m_ints, 0, otherLen);
+            return;
+        }
+
+        long carry = addShiftedUp(m_ints, words, other.m_ints, 0, otherLen, shift);
+        if (carry != 0L)
+        {
+            m_ints[otherLen + words] ^= carry;
+        }
+    }
+
+    private static long addShiftedUp(long[] x, int xOff, long[] y, int yOff, int count, int shift)
+    {
+        int shiftInv = 64 - shift;
+        long prev = 0;
+        for (int i = 0; i < count; ++i)
+        {
+            long next = y[yOff + i];
+            x[xOff + i] ^= (next << shift) | prev;
+            prev = next >>> shiftInv;
+        }
+        return prev;
+    }
+
+    private static long addShiftedDown(long[] x, int xOff, long[] y, int yOff, int count, int shift)
+    {
+        int shiftInv = 64 - shift;
+        long prev = 0;
+        int i = count;
+        while (--i >= 0)
+        {
+            long next = y[yOff + i];
+            x[xOff + i] ^= (next >>> shift) | prev;
+            prev = next << shiftInv;
+        }
+        return prev;
+    }
+
+    public void addShiftedByWords(LongArray other, int words)
+    {
+        int otherUsedLen = other.getUsedLength();
+        if (otherUsedLen == 0)
+        {
+            return;
+        }
+
+        int minLen = otherUsedLen + words;
+        if (minLen > m_ints.length)
+        {
+            m_ints = resizedInts(minLen);
+        }
+
+        add(m_ints, words, other.m_ints, 0, otherUsedLen);
+    }
+
+    private static void add(long[] x, int xOff, long[] y, int yOff, int count)
+    {
+        for (int i = 0; i < count; ++i)
+        {
+            x[xOff + i] ^= y[yOff + i];
+        }
+    }
+
+    private static void add(long[] x, int xOff, long[] y, int yOff, long[] z, int zOff, int count)
+    {
+        for (int i = 0; i < count; ++i)
+        {
+            z[zOff + i] = x[xOff + i] ^ y[yOff + i];
+        }
+    }
+
+    private static void addBoth(long[] x, int xOff, long[] y1, int y1Off, long[] y2, int y2Off, int count)
+    {
+        for (int i = 0; i < count; ++i)
+        {
+            x[xOff + i] ^= y1[y1Off + i] ^ y2[y2Off + i];
+        }
+    }
+
+    private static void distribute(long[] x, int src, int dst1, int dst2, int count)
+    {
+        for (int i = 0; i < count; ++i)
+        {
+            long v = x[src + i];
+            x[dst1 + i] ^= v;
+            x[dst2 + i] ^= v;
+        }
+    }
+
+    public int getLength()
+    {
+        return m_ints.length;
+    }
+
+    private static void flipWord(long[] buf, int off, int bit, long word)
+    {
+        int n = off + (bit >>> 6);
+        int shift = bit & 0x3F;
+        if (shift == 0)
+        {
+            buf[n] ^= word;
+        }
+        else
+        {
+            buf[n] ^= word << shift;
+            word >>>= (64 - shift);
+            if (word != 0)
+            {
+                buf[++n] ^= word;
+            }
+        }
+    }
+
+//    private static long getWord(long[] buf, int off, int len, int bit)
+//    {
+//        int n = off + (bit >>> 6);
+//        int shift = bit & 0x3F;
+//        if (shift == 0)
+//        {
+//            return buf[n];
+//        }
+//        long result = buf[n] >>> shift;
+//        if (++n < len)
+//        {
+//            result |= buf[n] << (64 - shift);
+//        }
+//        return result;
+//    }
+
+    public boolean testBitZero()
+    {
+        return m_ints.length > 0 && (m_ints[0] & 1L) != 0;
+    }
+
+    private static boolean testBit(long[] buf, int off, int n)
+    {
+        // theInt = n / 64
+        int theInt = n >>> 6;
+        // theBit = n % 64
+        int theBit = n & 0x3F;
+        long tester = 1L << theBit;
+        return (buf[off + theInt] & tester) != 0;
+    }
+
+    private static void flipBit(long[] buf, int off, int n)
+    {
+        // theInt = n / 64
+        int theInt = n >>> 6;
+        // theBit = n % 64
+        int theBit = n & 0x3F;
+        long flipper = 1L << theBit;
+        buf[off + theInt] ^= flipper;
+    }
+
+//    private static void setBit(long[] buf, int off, int n)
+//    {
+//        // theInt = n / 64
+//        int theInt = n >>> 6;
+//        // theBit = n % 64
+//        int theBit = n & 0x3F;
+//        long setter = 1L << theBit;
+//        buf[off + theInt] |= setter;
+//    }
+//
+//    private static void clearBit(long[] buf, int off, int n)
+//    {
+//        // theInt = n / 64
+//        int theInt = n >>> 6;
+//        // theBit = n % 64
+//        int theBit = n & 0x3F;
+//        long setter = 1L << theBit;
+//        buf[off + theInt] &= ~setter;
+//    }
+
+    private static void multiplyWord(long a, long[] b, int bLen, long[] c, int cOff)
+    {
+        if ((a & 1L) != 0L)
+        {
+            add(c, cOff, b, 0, bLen);
+        }
+        int k = 1;
+        while ((a >>>= 1) != 0)
+        {
+            if ((a & 1L) != 0L)
+            {
+                long carry = addShiftedUp(c, cOff, b, 0, bLen, k);
+                if (carry != 0)
+                {
+                    c[cOff + bLen] ^= carry;
+                }
+            }
+            ++k;
+        }
+    }
+
+    public LongArray modMultiplyLD(LongArray other, int m, int[] ks)
+    {
+        /*
+         * Find out the degree of each argument and handle the zero cases
+         */
+        int aDeg = degree();
+        if (aDeg == 0)
+        {
+            return this;
+        }
+        int bDeg = other.degree();
+        if (bDeg == 0)
+        {
+            return other;
+        }
+
+        /*
+         * Swap if necessary so that A is the smaller argument
+         */
+        LongArray A = this, B = other;
+        if (aDeg > bDeg)
+        {
+            A = other; B = this;
+            int tmp = aDeg; aDeg = bDeg; bDeg = tmp;
+        }
+
+        /*
+         * Establish the word lengths of the arguments and result
+         */
+        int aLen = (aDeg + 63) >>> 6;
+        int bLen = (bDeg + 63) >>> 6;
+        int cLen = (aDeg + bDeg + 62) >>> 6;
+
+        if (aLen == 1)
+        {
+            long a = A.m_ints[0];
+            if (a == 1L)
+            {
+                return B;
+            }
+
+            /*
+             * Fast path for small A, with performance dependent only on the number of set bits
+             */
+            long[] c = new long[cLen];
+            multiplyWord(a, B.m_ints, bLen, c, 0);
+
+            /*
+             * Reduce the raw answer against the reduction coefficients
+             */
+            return reduceResult(c, 0, cLen, m, ks);
+        }
+
+        /*
+         * Determine if B will get bigger during shifting
+         */
+        int bMax = (bDeg + 7 + 63) >>> 6;
+
+        /*
+         * Lookup table for the offset of each B in the tables
+         */
+        int[] ti = new int[16];
+
+        /*
+         * Precompute table of all 4-bit products of B
+         */
+        long[] T0 = new long[bMax << 4];
+        int tOff = bMax;
+        ti[1] = tOff;
+        System.arraycopy(B.m_ints, 0, T0, tOff, bLen);
+        for (int i = 2; i < 16; ++i)
+        {
+            ti[i] = (tOff += bMax);
+            if ((i & 1) == 0)
+            {
+                shiftUp(T0, tOff >>> 1, T0, tOff, bMax, 1);
+            }
+            else
+            {
+                add(T0, bMax, T0, tOff - bMax, T0, tOff, bMax);
+            }
+        }
+
+        /*
+         * Second table with all 4-bit products of B shifted 4 bits
+         */
+        long[] T1 = new long[T0.length];
+        shiftUp(T0, 0, T1, 0, T0.length, 4);
+//        shiftUp(T0, bMax, T1, bMax, tOff, 4);
+
+        long[] a = A.m_ints;
+        long[] c = new long[cLen];
+
+        int MASK = 0xF;
+
+        /*
+         * Lopez-Dahab algorithm
+         */
+
+        for (int k = 56; k >= 0; k -= 8)
+        {
+            for (int j = 1; j < aLen; j += 2)
+            {
+                int aVal = (int)(a[j] >>> k);
+                int u = aVal & MASK;
+                int v = (aVal >>> 4) & MASK;
+                addBoth(c, j - 1, T0, ti[u], T1, ti[v], bMax);
+            }
+            shiftUp(c, 0, cLen, 8);
+        }
+
+        for (int k = 56; k >= 0; k -= 8)
+        {
+            for (int j = 0; j < aLen; j += 2)
+            {
+                int aVal = (int)(a[j] >>> k);
+                int u = aVal & MASK;
+                int v = (aVal >>> 4) & MASK;
+                addBoth(c, j, T0, ti[u], T1, ti[v], bMax);
+            }
+            if (k > 0)
+            {
+                shiftUp(c, 0, cLen, 8);
+            }
+        }
+
+        /*
+         * Finally the raw answer is collected, reduce it against the reduction coefficients
+         */
+        return reduceResult(c, 0, cLen, m, ks);
+    }
+
+    public LongArray modMultiply(LongArray other, int m, int[] ks)
+    {
+        /*
+         * Find out the degree of each argument and handle the zero cases
+         */
+        int aDeg = degree();
+        if (aDeg == 0)
+        {
+            return this;
+        }
+        int bDeg = other.degree();
+        if (bDeg == 0)
+        {
+            return other;
+        }
+
+        /*
+         * Swap if necessary so that A is the smaller argument
+         */
+        LongArray A = this, B = other;
+        if (aDeg > bDeg)
+        {
+            A = other; B = this;
+            int tmp = aDeg; aDeg = bDeg; bDeg = tmp;
+        }
+
+        /*
+         * Establish the word lengths of the arguments and result
+         */
+        int aLen = (aDeg + 63) >>> 6;
+        int bLen = (bDeg + 63) >>> 6;
+        int cLen = (aDeg + bDeg + 62) >>> 6;
+
+        if (aLen == 1)
+        {
+            long a = A.m_ints[0];
+            if (a == 1L)
+            {
+                return B;
+            }
+
+            /*
+             * Fast path for small A, with performance dependent only on the number of set bits
+             */
+            long[] c = new long[cLen];
+            multiplyWord(a, B.m_ints, bLen, c, 0);
+
+            /*
+             * Reduce the raw answer against the reduction coefficients
+             */
+            return reduceResult(c, 0, cLen, m, ks);
+        }
+
+        /*
+         * Determine if B will get bigger during shifting
+         */
+        int bMax = (bDeg + 7 + 63) >>> 6;
+
+        /*
+         * Lookup table for the offset of each B in the tables
+         */
+        int[] ti = new int[16];
+
+        /*
+         * Precompute table of all 4-bit products of B
+         */
+        long[] T0 = new long[bMax << 4];
+        int tOff = bMax;
+        ti[1] = tOff;
+        System.arraycopy(B.m_ints, 0, T0, tOff, bLen);
+        for (int i = 2; i < 16; ++i)
+        {
+            ti[i] = (tOff += bMax);
+            if ((i & 1) == 0)
+            {
+                shiftUp(T0, tOff >>> 1, T0, tOff, bMax, 1);
+            }
+            else
+            {
+                add(T0, bMax, T0, tOff - bMax, T0, tOff, bMax);
+            }
+        }
+
+        /*
+         * Second table with all 4-bit products of B shifted 4 bits
+         */
+        long[] T1 = new long[T0.length];
+        shiftUp(T0, 0, T1, 0, T0.length, 4);
+//        shiftUp(T0, bMax, T1, bMax, tOff, 4);
+
+        long[] a = A.m_ints;
+        long[] c = new long[cLen << 3];
+
+        int MASK = 0xF;
+
+        /*
+         * Lopez-Dahab (Modified) algorithm
+         */
+
+        for (int aPos = 0; aPos < aLen; ++aPos)
+        {
+            long aVal = a[aPos];
+            int cOff = aPos;
+            for (;;)
+            {
+                int u = (int)aVal & MASK;
+                aVal >>>= 4;
+                int v = (int)aVal & MASK;
+                addBoth(c, cOff, T0, ti[u], T1, ti[v], bMax);
+                if ((aVal >>>= 4) == 0L)
+                {
+                    break;
+                }
+                cOff += cLen;
+            }
+        }
+
+        int cOff = c.length;
+        while ((cOff -= cLen) != 0)
+        {
+            addShiftedUp(c, cOff - cLen, c, cOff, cLen, 8);
+        }
+
+        /*
+         * Finally the raw answer is collected, reduce it against the reduction coefficients
+         */
+        return reduceResult(c, 0, cLen, m, ks);
+    }
+
+    public LongArray modMultiplyAlt(LongArray other, int m, int[] ks)
+    {
+        /*
+         * Find out the degree of each argument and handle the zero cases
+         */
+        int aDeg = degree();
+        if (aDeg == 0)
+        {
+            return this;
+        }
+        int bDeg = other.degree();
+        if (bDeg == 0)
+        {
+            return other;
+        }
+
+        /*
+         * Swap if necessary so that A is the smaller argument
+         */
+        LongArray A = this, B = other;
+        if (aDeg > bDeg)
+        {
+            A = other; B = this;
+            int tmp = aDeg; aDeg = bDeg; bDeg = tmp;
+        }
+
+        /*
+         * Establish the word lengths of the arguments and result
+         */
+        int aLen = (aDeg + 63) >>> 6;
+        int bLen = (bDeg + 63) >>> 6;
+        int cLen = (aDeg + bDeg + 62) >>> 6;
+
+        if (aLen == 1)
+        {
+            long a = A.m_ints[0];
+            if (a == 1L)
+            {
+                return B;
+            }
+
+            /*
+             * Fast path for small A, with performance dependent only on the number of set bits
+             */
+            long[] c = new long[cLen];
+            multiplyWord(a, B.m_ints, bLen, c, 0);
+
+            /*
+             * Reduce the raw answer against the reduction coefficients
+             */
+            return reduceResult(c, 0, cLen, m, ks);
+        }
+
+        // NOTE: This works, but is slower than width 4 processing
+//        if (aLen == 2)
+//        {
+//            /*
+//             * Use common-multiplicand optimization to save ~1/4 of the adds
+//             */
+//            long a1 = A.m_ints[0], a2 = A.m_ints[1];
+//            long aa = a1 & a2; a1 ^= aa; a2 ^= aa;
+//
+//            long[] b = B.m_ints;
+//            long[] c = new long[cLen];
+//            multiplyWord(aa, b, bLen, c, 1);
+//            add(c, 0, c, 1, cLen - 1);
+//            multiplyWord(a1, b, bLen, c, 0);
+//            multiplyWord(a2, b, bLen, c, 1);
+//
+//            /*
+//             * Reduce the raw answer against the reduction coefficients
+//             */
+//            return reduceResult(c, 0, cLen, m, ks);
+//        }
+
+        /*
+         * Determine the parameters of the interleaved window algorithm: the 'width' in bits to
+         * process together, the number of evaluation 'positions' implied by that width, and the
+         * 'top' position at which the regular window algorithm stops.
+         */
+        int width, positions, top, banks;
+
+        // NOTE: width 4 is the fastest over the entire range of sizes used in current crypto 
+//        width = 1; positions = 64; top = 64; banks = 4;
+//        width = 2; positions = 32; top = 64; banks = 4;
+//        width = 3; positions = 21; top = 63; banks = 3;
+        width = 4; positions = 16; top = 64; banks = 8;
+//        width = 5; positions = 13; top = 65; banks = 7;
+//        width = 7; positions = 9; top = 63; banks = 9;
+//        width = 8; positions = 8; top = 64; banks = 8;
+
+        /*
+         * Determine if B will get bigger during shifting
+         */
+        int shifts = top < 64 ? positions : positions - 1;
+        int bMax = (bDeg + shifts + 63) >>> 6;
+
+        int bTotal = bMax * banks, stride = width * banks;
+
+        /*
+         * Create a single temporary buffer, with an offset table to find the positions of things in it 
+         */
+        int[] ci = new int[1 << width];
+        int cTotal = aLen;
+        {
+            ci[0] = cTotal;
+            cTotal += bTotal;
+            ci[1] = cTotal;
+            for (int i = 2; i < ci.length; ++i)
+            {
+                cTotal += cLen;
+                ci[i] = cTotal;
+            }
+            cTotal += cLen;
+        }
+        // NOTE: Provide a safe dump for "high zeroes" since we are adding 'bMax' and not 'bLen'
+        ++cTotal;
+
+        long[] c = new long[cTotal];
+
+        // Prepare A in interleaved form, according to the chosen width
+        interleave(A.m_ints, 0, c, 0, aLen, width);
+
+        // Make a working copy of B, since we will be shifting it
+        {
+            int bOff = aLen;
+            System.arraycopy(B.m_ints, 0, c, bOff, bLen);
+            for (int bank = 1; bank < banks; ++bank)
+            {
+                shiftUp(c, aLen, c, bOff += bMax, bMax, bank);
+            }
+        }
+
+        /*
+         * The main loop analyzes the interleaved windows in A, and for each non-zero window
+         * a single word-array XOR is performed to a carefully selected slice of 'c'. The loop is
+         * breadth-first, checking the lowest window in each word, then looping again for the
+         * next higher window position.
+         */
+        int MASK = (1 << width) - 1;
+
+        int k = 0;
+        for (;;)
+        {
+            int aPos = 0;
+            do
+            {
+                long aVal = c[aPos] >>> k;
+                int bank = 0, bOff = aLen;
+                for (;;)
+                {
+                    int index = (int)(aVal) & MASK;
+                    if (index != 0)
+                    {
+                        /*
+                         * Add to a 'c' buffer based on the bit-pattern of 'index'. Since A is in
+                         * interleaved form, the bits represent the current B shifted by 0, 'positions',
+                         * 'positions' * 2, ..., 'positions' * ('width' - 1)
+                         */
+                        add(c, aPos + ci[index], c, bOff, bMax);
+                    }
+                    if (++bank == banks)
+                    {
+                        break;
+                    }
+                    bOff += bMax;
+                    aVal >>>= width;
+                }
+            }
+            while (++aPos < aLen);
+
+            if ((k += stride) >= top)
+            {
+                if (k >= 64)
+                {
+                    break;
+                }
+
+                /*
+                 * Adjustment for window setups with top == 63, the final bit (if any) is processed
+                 * as the top-bit of a window
+                 */
+                k = 64 - width;
+                MASK &= MASK << (top - k);
+            }
+
+            /*
+             * After each position has been checked for all words of A, B is shifted up 1 place
+             */
+            shiftUp(c, aLen, bTotal, banks);
+        }
+
+        int ciPos = ci.length;
+        while (--ciPos > 1)
+        {
+            if ((ciPos & 1L) == 0L)
+            {
+                /*
+                 * For even numbers, shift contents and add to the half-position
+                 */
+                addShiftedUp(c, ci[ciPos >>> 1], c, ci[ciPos], cLen, positions);
+            }
+            else
+            {
+                /*
+                 * For odd numbers, 'distribute' contents to the result and the next-lowest position
+                 */
+                distribute(c, ci[ciPos], ci[ciPos - 1], ci[1], cLen);
+            }
+        }
+
+        /*
+         * Finally the raw answer is collected, reduce it against the reduction coefficients
+         */
+        return reduceResult(c, ci[1], cLen, m, ks);
+    }
+
+    private static LongArray reduceResult(long[] buf, int off, int len, int m, int[] ks)
+    {
+        int rLen = reduceInPlace(buf, off, len, m, ks);
+        return new LongArray(buf, off, rLen);
+    }
+
+//    private static void deInterleave(long[] x, int xOff, long[] z, int zOff, int count, int rounds)
+//    {
+//        for (int i = 0; i < count; ++i)
+//        {
+//            z[zOff + i] = deInterleave(x[zOff + i], rounds);
+//        }
+//    }
+//
+//    private static long deInterleave(long x, int rounds)
+//    {
+//        while (--rounds >= 0)
+//        {
+//            x = deInterleave32(x & DEINTERLEAVE_MASK) | (deInterleave32((x >>> 1) & DEINTERLEAVE_MASK) << 32);
+//        }
+//        return x;
+//    }
+//
+//    private static long deInterleave32(long x)
+//    {
+//        x = (x | (x >>> 1)) & 0x3333333333333333L;
+//        x = (x | (x >>> 2)) & 0x0F0F0F0F0F0F0F0FL;
+//        x = (x | (x >>> 4)) & 0x00FF00FF00FF00FFL;
+//        x = (x | (x >>> 8)) & 0x0000FFFF0000FFFFL;
+//        x = (x | (x >>> 16)) & 0x00000000FFFFFFFFL;
+//        return x;
+//    }
+
+    private static int reduceInPlace(long[] buf, int off, int len, int m, int[] ks)
+    {
+        int mLen = (m + 63) >>> 6;
+        if (len < mLen)
+        {
+            return len;
+        }
+
+        int numBits = Math.min(len << 6, (m << 1) - 1); // TODO use actual degree?
+        int excessBits = (len << 6) - numBits;
+        while (excessBits >= 64)
+        {
+            --len;
+            excessBits -= 64;
+        }
+
+        int kLen = ks.length, kMax = ks[kLen - 1], kNext = kLen > 1 ? ks[kLen - 2] : 0;
+        int wordWiseLimit = Math.max(m, kMax + 64);
+        int vectorableWords = (excessBits + Math.min(numBits - wordWiseLimit, m - kNext)) >> 6;
+        if (vectorableWords > 1)
+        {
+            int vectorWiseWords = len - vectorableWords;
+            reduceVectorWise(buf, off, len, vectorWiseWords, m, ks);
+            while (len > vectorWiseWords)
+            {
+                buf[off + --len] = 0L;
+            }
+            numBits = vectorWiseWords << 6;
+        }
+
+        if (numBits > wordWiseLimit)
+        {
+            reduceWordWise(buf, off, len, wordWiseLimit, m, ks);
+            numBits = wordWiseLimit;
+        }
+
+        if (numBits > m)
+        {
+            reduceBitWise(buf, off, numBits, m, ks);
+        }
+
+        return mLen;
+    }
+
+    private static void reduceBitWise(long[] buf, int off, int bitlength, int m, int[] ks)
+    {
+        while (--bitlength >= m)
+        {
+            if (testBit(buf, off, bitlength))
+            {
+                reduceBit(buf, off, bitlength, m, ks);
+            }
+        }
+    }
+
+    private static void reduceBit(long[] buf, int off, int bit, int m, int[] ks)
+    {
+        flipBit(buf, off, bit);
+        int base = bit - m;
+        int j = ks.length;
+        while (--j >= 0)
+        {
+            flipBit(buf, off, ks[j] + base);
+        }
+        flipBit(buf, off, base);
+    }
+
+    private static void reduceWordWise(long[] buf, int off, int len, int toBit, int m, int[] ks)
+    {
+        int toPos = toBit >>> 6;
+
+        while (--len > toPos)
+        {
+            long word = buf[off + len];
+            if (word != 0)
+            {
+                buf[off + len] = 0;
+                reduceWord(buf, off, (len << 6), word, m, ks);
+            }
+        }
+
+        int partial = toBit & 0x3F;
+        long word = buf[off + toPos] >>> partial;
+        if (word != 0)
+        {
+            buf[off + toPos] ^= word << partial;
+            reduceWord(buf, off, toBit, word, m, ks);
+        }
+    }
+
+    private static void reduceWord(long[] buf, int off, int bit, long word, int m, int[] ks)
+    {
+        int offset = bit - m;
+        int j = ks.length;
+        while (--j >= 0)
+        {
+            flipWord(buf, off, offset + ks[j], word);
+        }
+        flipWord(buf, off, offset, word);
+    }
+
+    private static void reduceVectorWise(long[] buf, int off, int len, int words, int m, int[] ks)
+    {
+        /*
+         * NOTE: It's important we go from highest coefficient to lowest, because for the highest
+         * one (only) we allow the ranges to partially overlap, and therefore any changes must take
+         * effect for the subsequent lower coefficients.
+         */
+        int baseBit = (words << 6) - m;
+        int j = ks.length;
+        while (--j >= 0)
+        {
+            flipVector(buf, off, buf, off + words, len - words, baseBit + ks[j]);
+        }
+        flipVector(buf, off, buf, off + words, len - words, baseBit);
+    }
+
+    private static void flipVector(long[] x, int xOff, long[] y, int yOff, int yLen, int bits)
+    {
+        xOff += bits >>> 6;
+        bits &= 0x3F;
+
+        if (bits == 0)
+        {
+            add(x, xOff, y, yOff, yLen);
+        }
+        else
+        {
+            long carry = addShiftedDown(x, xOff + 1, y, yOff, yLen, 64 - bits);
+            x[xOff] ^= carry;
+        }
+    }
+
+    public LongArray modSquare(int m, int[] ks)
+    {
+        int len = getUsedLength();
+        if (len == 0)
+        {
+            return this;
+        }
+
+        int _2len = len << 1;
+        long[] r = new long[_2len];
+
+        int pos = 0;
+        while (pos < _2len)
+        {
+            long mi = m_ints[pos >>> 1];
+            r[pos++] = interleave2_32to64((int)mi);
+            r[pos++] = interleave2_32to64((int)(mi >>> 32));
+        }
+
+        return new LongArray(r, 0, reduceInPlace(r, 0, r.length, m, ks));
+    }
+
+//    private LongArray modSquareN(int n, int m, int[] ks)
+//    {
+//        int len = getUsedLength();
+//        if (len == 0)
+//        {
+//            return this;
+//        }
+//
+//        int mLen = (m + 63) >>> 6;
+//        long[] r = new long[mLen << 1];
+//        System.arraycopy(m_ints, 0, r, 0, len);
+//
+//        while (--n >= 0)
+//        {
+//            squareInPlace(r, len, m, ks);
+//            len = reduceInPlace(r, 0, r.length, m, ks);
+//        }
+//
+//        return new LongArray(r, 0, len);
+//    }
+//
+//    private static void squareInPlace(long[] x, int xLen, int m, int[] ks)
+//    {
+//        int pos = xLen << 1;
+//        while (--xLen >= 0)
+//        {
+//            long xVal = x[xLen];
+//            x[--pos] = interleave2_32to64((int)(xVal >>> 32));
+//            x[--pos] = interleave2_32to64((int)xVal);
+//        }
+//    }
+
+    private static void interleave(long[] x, int xOff, long[] z, int zOff, int count, int width)
+    {
+        switch (width)
+        {
+        case 3:
+            interleave3(x, xOff, z, zOff, count);
+            break;
+        case 5:
+            interleave5(x, xOff, z, zOff, count);
+            break;
+        case 7:
+            interleave7(x, xOff, z, zOff, count);
+            break;
+        default:
+            interleave2_n(x, xOff, z, zOff, count, bitLengths[width] - 1);
+            break;
+        }
+    }
+
+    private static void interleave3(long[] x, int xOff, long[] z, int zOff, int count)
+    {
+        for (int i = 0; i < count; ++i)
+        {
+            z[zOff + i] = interleave3(x[xOff + i]);
+        }
+    }
+
+    private static long interleave3(long x)
+    {
+        long z = x & (1L << 63);
+        return z
+            | interleave3_21to63((int)x & 0x1FFFFF)
+            | interleave3_21to63((int)(x >>> 21) & 0x1FFFFF) << 1
+            | interleave3_21to63((int)(x >>> 42) & 0x1FFFFF) << 2;
+
+//        int zPos = 0, wPos = 0, xPos = 0;
+//        for (;;)
+//        {
+//            z |= ((x >>> xPos) & 1L) << zPos;
+//            if (++zPos == 63)
+//            {
+//                String sz2 = Long.toBinaryString(z);
+//                return z;
+//            }
+//            if ((xPos += 21) >= 63)
+//            {
+//                xPos = ++wPos;
+//            }
+//        }
+    }
+
+    private static long interleave3_21to63(int x)
+    {
+        int r00 = INTERLEAVE3_TABLE[x & 0x7F];
+        int r21 = INTERLEAVE3_TABLE[(x >>> 7) & 0x7F];
+        int r42 = INTERLEAVE3_TABLE[x >>> 14];
+        return (r42 & 0xFFFFFFFFL) << 42 | (r21 & 0xFFFFFFFFL) << 21 | (r00 & 0xFFFFFFFFL);
+    }
+
+    private static void interleave5(long[] x, int xOff, long[] z, int zOff, int count)
+    {
+        for (int i = 0; i < count; ++i)
+        {
+            z[zOff + i] = interleave5(x[xOff + i]);
+        }
+    }
+
+    private static long interleave5(long x)
+    {
+        return interleave3_13to65((int)x & 0x1FFF)
+            | interleave3_13to65((int)(x >>> 13) & 0x1FFF) << 1
+            | interleave3_13to65((int)(x >>> 26) & 0x1FFF) << 2
+            | interleave3_13to65((int)(x >>> 39) & 0x1FFF) << 3
+            | interleave3_13to65((int)(x >>> 52) & 0x1FFF) << 4;
+
+//        long z = 0;
+//        int zPos = 0, wPos = 0, xPos = 0;
+//        for (;;)
+//        {
+//            z |= ((x >>> xPos) & 1L) << zPos;
+//            if (++zPos == 64)
+//            {
+//                return z;
+//            }
+//            if ((xPos += 13) >= 64)
+//            {
+//                xPos = ++wPos;
+//            }
+//        }
+    }
+
+    private static long interleave3_13to65(int x)
+    {
+        int r00 = INTERLEAVE5_TABLE[x & 0x7F];
+        int r35 = INTERLEAVE5_TABLE[x >>> 7];
+        return (r35 & 0xFFFFFFFFL) << 35 | (r00 & 0xFFFFFFFFL);
+    }
+
+    private static void interleave7(long[] x, int xOff, long[] z, int zOff, int count)
+    {
+        for (int i = 0; i < count; ++i)
+        {
+            z[zOff + i] = interleave7(x[xOff + i]);
+        }
+    }
+
+    private static long interleave7(long x)
+    {
+        long z = x & (1L << 63);
+        return z
+            | INTERLEAVE7_TABLE[(int)x & 0x1FF]
+            | INTERLEAVE7_TABLE[(int)(x >>> 9) & 0x1FF] << 1
+            | INTERLEAVE7_TABLE[(int)(x >>> 18) & 0x1FF] << 2
+            | INTERLEAVE7_TABLE[(int)(x >>> 27) & 0x1FF] << 3
+            | INTERLEAVE7_TABLE[(int)(x >>> 36) & 0x1FF] << 4
+            | INTERLEAVE7_TABLE[(int)(x >>> 45) & 0x1FF] << 5
+            | INTERLEAVE7_TABLE[(int)(x >>> 54) & 0x1FF] << 6;
+
+//        int zPos = 0, wPos = 0, xPos = 0;
+//        for (;;)
+//        {
+//            z |= ((x >>> xPos) & 1L) << zPos;
+//            if (++zPos == 63)
+//            {
+//                return z;
+//            }
+//            if ((xPos += 9) >= 63)
+//            {
+//                xPos = ++wPos;
+//            }
+//        }
+    }
+
+    private static void interleave2_n(long[] x, int xOff, long[] z, int zOff, int count, int rounds)
+    {
+        for (int i = 0; i < count; ++i)
+        {
+            z[zOff + i] = interleave2_n(x[xOff + i], rounds);
+        }
+    }
+
+    private static long interleave2_n(long x, int rounds)
+    {
+        while (rounds > 1)
+        {
+            rounds -= 2;
+            x = interleave4_16to64((int)x & 0xFFFF)
+                | interleave4_16to64((int)(x >>> 16) & 0xFFFF) << 1
+                | interleave4_16to64((int)(x >>> 32) & 0xFFFF) << 2
+                | interleave4_16to64((int)(x >>> 48) & 0xFFFF) << 3;
+        }
+        if (rounds > 0)
+        {
+            x = interleave2_32to64((int)x) | interleave2_32to64((int)(x >>> 32)) << 1;
+        }
+        return x;
+    }
+
+    private static long interleave4_16to64(int x)
+    {
+        int r00 = INTERLEAVE4_TABLE[x & 0xFF];
+        int r32 = INTERLEAVE4_TABLE[x >>> 8];
+        return (r32 & 0xFFFFFFFFL) << 32 | (r00 & 0xFFFFFFFFL);
+    }
+
+    private static long interleave2_32to64(int x)
+    {
+        int r00 = INTERLEAVE2_TABLE[x & 0xFF] | INTERLEAVE2_TABLE[(x >>> 8) & 0xFF] << 16;
+        int r32 = INTERLEAVE2_TABLE[(x >>> 16) & 0xFF] | INTERLEAVE2_TABLE[x >>> 24] << 16;
+        return (r32 & 0xFFFFFFFFL) << 32 | (r00 & 0xFFFFFFFFL);
+    }
+
+//    private static LongArray expItohTsujii2(LongArray B, int n, int m, int[] ks)
+//    {
+//        LongArray t1 = B, t3 = new LongArray(new long[]{ 1L });
+//        int scale = 1;
+//
+//        int numTerms = n;
+//        while (numTerms > 1)
+//        {
+//            if ((numTerms & 1) != 0)
+//            {
+//                t3 = t3.modMultiply(t1, m, ks);
+//                t1 = t1.modSquareN(scale, m, ks);
+//            }
+//
+//            LongArray t2 = t1.modSquareN(scale, m, ks);
+//            t1 = t1.modMultiply(t2, m, ks);
+//            numTerms >>>= 1; scale <<= 1;
+//        }
+//
+//        return t3.modMultiply(t1, m, ks);
+//    }
+//
+//    private static LongArray expItohTsujii23(LongArray B, int n, int m, int[] ks)
+//    {
+//        LongArray t1 = B, t3 = new LongArray(new long[]{ 1L });
+//        int scale = 1;
+//
+//        int numTerms = n;
+//        while (numTerms > 1)
+//        {
+//            boolean m03 = numTerms % 3 == 0;
+//            boolean m14 = !m03 && (numTerms & 1) != 0;
+//
+//            if (m14)
+//            {
+//                t3 = t3.modMultiply(t1, m, ks);
+//                t1 = t1.modSquareN(scale, m, ks);
+//            }
+//
+//            LongArray t2 = t1.modSquareN(scale, m, ks);
+//            t1 = t1.modMultiply(t2, m, ks);
+//
+//            if (m03)
+//            {
+//                t2 = t2.modSquareN(scale, m, ks);
+//                t1 = t1.modMultiply(t2, m, ks);
+//                numTerms /= 3; scale *= 3;
+//            }
+//            else
+//            {
+//                numTerms >>>= 1; scale <<= 1;
+//            }
+//        }
+//
+//        return t3.modMultiply(t1, m, ks);
+//    }
+//
+//    private static LongArray expItohTsujii235(LongArray B, int n, int m, int[] ks)
+//    {
+//        LongArray t1 = B, t4 = new LongArray(new long[]{ 1L });
+//        int scale = 1;
+//
+//        int numTerms = n;
+//        while (numTerms > 1)
+//        {
+//            if (numTerms % 5 == 0)
+//            {
+////                t1 = expItohTsujii23(t1, 5, m, ks);
+//
+//                LongArray t3 = t1;
+//                t1 = t1.modSquareN(scale, m, ks);
+//
+//                LongArray t2 = t1.modSquareN(scale, m, ks);
+//                t1 = t1.modMultiply(t2, m, ks);
+//                t2 = t1.modSquareN(scale << 1, m, ks);
+//                t1 = t1.modMultiply(t2, m, ks);
+//
+//                t1 = t1.modMultiply(t3, m, ks);
+//
+//                numTerms /= 5; scale *= 5;
+//                continue;
+//            }
+//
+//            boolean m03 = numTerms % 3 == 0;
+//            boolean m14 = !m03 && (numTerms & 1) != 0;
+//
+//            if (m14)
+//            {
+//                t4 = t4.modMultiply(t1, m, ks);
+//                t1 = t1.modSquareN(scale, m, ks);
+//            }
+//
+//            LongArray t2 = t1.modSquareN(scale, m, ks);
+//            t1 = t1.modMultiply(t2, m, ks);
+//
+//            if (m03)
+//            {
+//                t2 = t2.modSquareN(scale, m, ks);
+//                t1 = t1.modMultiply(t2, m, ks);
+//                numTerms /= 3; scale *= 3;
+//            }
+//            else
+//            {
+//                numTerms >>>= 1; scale <<= 1;
+//            }
+//        }
+//
+//        return t4.modMultiply(t1, m, ks);
+//    }
+
+    public LongArray modInverse(int m, int[] ks)
+    {
+        /*
+         * Fermat's Little Theorem
+         */
+//        LongArray A = this;
+//        LongArray B = A.modSquare(m, ks);
+//        LongArray R0 = B, R1 = B;
+//        for (int i = 2; i < m; ++i)
+//        {
+//            R1 = R1.modSquare(m, ks);
+//            R0 = R0.modMultiply(R1, m, ks);
+//        }
+//
+//        return R0;
+
+        /*
+         * Itoh-Tsujii
+         */
+//        LongArray B = modSquare(m, ks);
+//        switch (m)
+//        {
+//        case 409:
+//            return expItohTsujii23(B, m - 1, m, ks);
+//        case 571:
+//            return expItohTsujii235(B, m - 1, m, ks);
+//        case 163:
+//        case 233:
+//        case 283:
+//        default:
+//            return expItohTsujii2(B, m - 1, m, ks);
+//        }
+
+        /*
+         * Inversion in F2m using the extended Euclidean algorithm
+         * 
+         * Input: A nonzero polynomial a(z) of degree at most m-1
+         * Output: a(z)^(-1) mod f(z)
+         */
+        int uzDegree = degree();
+        if (uzDegree == 1)
+        {
+            return this;
+        }
+
+        // u(z) := a(z)
+        LongArray uz = (LongArray)clone();
+
+        int t = (m + 63) >>> 6;
+
+        // v(z) := f(z)
+        LongArray vz = new LongArray(t);
+        reduceBit(vz.m_ints, 0, m, m, ks);
+
+        // g1(z) := 1, g2(z) := 0
+        LongArray g1z = new LongArray(t);
+        g1z.m_ints[0] = 1L;
+        LongArray g2z = new LongArray(t);
+
+        int[] uvDeg = new int[]{ uzDegree, m + 1 };
+        LongArray[] uv = new LongArray[]{ uz, vz };
+
+        int[] ggDeg = new int[]{ 1, 0 };
+        LongArray[] gg = new LongArray[]{ g1z, g2z };
+
+        int b = 1;
+        int duv1 = uvDeg[b];
+        int dgg1 = ggDeg[b];
+        int j = duv1 - uvDeg[1 - b];
+
+        for (;;)
+        {
+            if (j < 0)
+            {
+                j = -j;
+                uvDeg[b] = duv1;
+                ggDeg[b] = dgg1;
+                b = 1 - b;
+                duv1 = uvDeg[b];
+                dgg1 = ggDeg[b];
+            }
+
+            uv[b].addShiftedByBitsSafe(uv[1 - b], uvDeg[1 - b], j);
+
+            int duv2 = uv[b].degreeFrom(duv1);
+            if (duv2 == 0)
+            {
+                return gg[1 - b];
+            }
+
+            {
+                int dgg2 = ggDeg[1 - b];
+                gg[b].addShiftedByBitsSafe(gg[1 - b], dgg2, j);
+                dgg2 += j;
+
+                if (dgg2 > dgg1)
+                {
+                    dgg1 = dgg2;
+                }
+                else if (dgg2 == dgg1)
+                {
+                    dgg1 = gg[b].degreeFrom(dgg1);
+                }
+            }
+
+            j += (duv2 - duv1);
+            duv1 = duv2;
+        }
+    }
+
+    public boolean equals(Object o)
+    {
+        if (!(o instanceof LongArray))
+        {
+            return false;
+        }
+        LongArray other = (LongArray) o;
+        int usedLen = getUsedLength();
+        if (other.getUsedLength() != usedLen)
+        {
+            return false;
+        }
+        for (int i = 0; i < usedLen; i++)
+        {
+            if (m_ints[i] != other.m_ints[i])
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    public int hashCode()
+    {
+        int usedLen = getUsedLength();
+        int hash = 1;
+        for (int i = 0; i < usedLen; i++)
+        {
+            long mi = m_ints[i];
+            hash *= 31;
+            hash ^= (int)mi;
+            hash *= 31;
+            hash ^= (int)(mi >>> 32);
+        }
+        return hash;
+    }
+
+    public Object clone()
+    {
+        return new LongArray(Arrays.clone(m_ints));
+    }
+
+    public String toString()
+    {
+        int i = getUsedLength();
+        if (i == 0)
+        {
+            return "0";
+        }
+
+        StringBuffer sb = new StringBuffer(Long.toBinaryString(m_ints[--i]));
+        while (--i >= 0)
+        {
+            String s = Long.toBinaryString(m_ints[i]);
+
+            // Add leading zeroes, except for highest significant word
+            int len = s.length();
+            if (len < 64)
+            {
+                sb.append(ZEROES.substring(len));
+            }
+
+            sb.append(s);
+        }
+        return sb.toString();
+    }
+}
+\ No newline at end of file
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/MixedNafR2LMultiplier.java b/bcprov/src/main/java/org/bouncycastle/math/ec/MixedNafR2LMultiplier.java
new file mode 100644
index 0000000..6d5fe92
--- /dev/null
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/MixedNafR2LMultiplier.java
@@ -0,0 +1,77 @@
+package org.bouncycastle.math.ec;
+
+import java.math.BigInteger;
+
+/**
+ * Class implementing the NAF (Non-Adjacent Form) multiplication algorithm (right-to-left) using
+ * mixed coordinates.
+ */
+public class MixedNafR2LMultiplier extends AbstractECMultiplier
+{
+    protected int additionCoord, doublingCoord;
+
+    /**
+     * By default, addition will be done in Jacobian coordinates, and doubling will be done in
+     * Modified Jacobian coordinates (independent of the original coordinate system of each point).
+     */
+    public MixedNafR2LMultiplier()
+    {
+        this(ECCurve.COORD_JACOBIAN, ECCurve.COORD_JACOBIAN_MODIFIED);
+    }
+
+    public MixedNafR2LMultiplier(int additionCoord, int doublingCoord)
+    {
+        this.additionCoord = additionCoord;
+        this.doublingCoord = doublingCoord;
+    }
+
+    protected ECPoint multiplyPositive(ECPoint p, BigInteger k)
+    {
+        ECCurve curveOrig = p.getCurve();
+
+        ECCurve curveAdd = configureCurve(curveOrig, additionCoord);
+        ECCurve curveDouble = configureCurve(curveOrig, doublingCoord);
+
+        int[] naf = WNafUtil.generateCompactNaf(k);
+
+        ECPoint Ra = curveAdd.getInfinity();
+        ECPoint Td = curveDouble.importPoint(p);
+
+        int zeroes = 0;
+        for (int i = 0; i < naf.length; ++i)
+        {
+            int ni = naf[i];
+            int digit = ni >> 16;
+            zeroes += ni & 0xFFFF;
+
+            Td = Td.timesPow2(zeroes);
+
+            ECPoint Tj = curveAdd.importPoint(Td);
+            if (digit < 0)
+            {
+                Tj = Tj.negate();
+            }
+
+            Ra = Ra.add(Tj);
+
+            zeroes = 1;
+        }
+
+        return curveOrig.importPoint(Ra);
+    }
+
+    protected ECCurve configureCurve(ECCurve c, int coord)
+    {
+        if (c.getCoordinateSystem() == coord)
+        {
+            return c;
+        }
+
+        if (!c.supportsCoordinateSystem(coord))
+        {
+            throw new IllegalArgumentException("Coordinate system " + coord + " not supported by this curve");
+        }
+
+        return c.configure().setCoordinateSystem(coord).create();
+    }
+}
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/MontgomeryLadderMultiplier.java b/bcprov/src/main/java/org/bouncycastle/math/ec/MontgomeryLadderMultiplier.java
new file mode 100644
index 0000000..cd969b5
--- /dev/null
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/MontgomeryLadderMultiplier.java
@@ -0,0 +1,25 @@
+package org.bouncycastle.math.ec;
+
+import java.math.BigInteger;
+
+public class MontgomeryLadderMultiplier extends AbstractECMultiplier
+{
+    /**
+     * Montgomery ladder.
+     */
+    protected ECPoint multiplyPositive(ECPoint p, BigInteger k)
+    {
+        ECPoint[] R = new ECPoint[]{ p.getCurve().getInfinity(), p };
+
+        int n = k.bitLength();
+        int i = n;
+        while (--i >= 0)
+        {
+            int b = k.testBit(i) ? 1 : 0;
+            int bp = 1 - b;
+            R[bp] = R[bp].add(R[b]);
+            R[b] = R[b].twice();
+        }
+        return R[0];
+    }
+}
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/NafL2RMultiplier.java b/bcprov/src/main/java/org/bouncycastle/math/ec/NafL2RMultiplier.java
new file mode 100644
index 0000000..91d91d1
--- /dev/null
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/NafL2RMultiplier.java
@@ -0,0 +1,30 @@
+package org.bouncycastle.math.ec;
+
+import java.math.BigInteger;
+
+/**
+ * Class implementing the NAF (Non-Adjacent Form) multiplication algorithm (left-to-right).
+ */
+public class NafL2RMultiplier extends AbstractECMultiplier
+{
+    protected ECPoint multiplyPositive(ECPoint p, BigInteger k)
+    {
+        int[] naf = WNafUtil.generateCompactNaf(k);
+
+        ECPoint addP = p.normalize(), subP = addP.negate();
+
+        ECPoint R = p.getCurve().getInfinity();
+
+        int i = naf.length;
+        while (--i >= 0)
+        {
+            int ni = naf[i];
+            int digit = ni >> 16, zeroes = ni & 0xFFFF;
+
+            R = R.twicePlus(digit < 0 ? subP : addP);
+            R = R.timesPow2(zeroes);
+        }
+
+        return R;
+    }
+}
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/NafR2LMultiplier.java b/bcprov/src/main/java/org/bouncycastle/math/ec/NafR2LMultiplier.java
new file mode 100644
index 0000000..aed2336
--- /dev/null
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/NafR2LMultiplier.java
@@ -0,0 +1,31 @@
+package org.bouncycastle.math.ec;
+
+import java.math.BigInteger;
+
+/**
+ * Class implementing the NAF (Non-Adjacent Form) multiplication algorithm (right-to-left).
+ */
+public class NafR2LMultiplier extends AbstractECMultiplier
+{
+    protected ECPoint multiplyPositive(ECPoint p, BigInteger k)
+    {
+        int[] naf = WNafUtil.generateCompactNaf(k);
+
+        ECPoint R0 = p.getCurve().getInfinity(), R1 = p;
+
+        int zeroes = 0;
+        for (int i = 0; i < naf.length; ++i)
+        {
+            int ni = naf[i];
+            int digit = ni >> 16;
+            zeroes += ni & 0xFFFF;
+
+            R1 = R1.timesPow2(zeroes);
+            R0 = R0.add(digit < 0 ? R1.negate() : R1);
+
+            zeroes = 1;
+        }
+
+        return R0;
+    }
+}
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/PreCompInfo.java b/bcprov/src/main/java/org/bouncycastle/math/ec/PreCompInfo.java
index 804dcf7..3849858 100644
--- a/bcprov/src/main/java/org/bouncycastle/math/ec/PreCompInfo.java
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/PreCompInfo.java
@@ -2,9 +2,9 @@ package org.bouncycastle.math.ec;
 
 /**
  * Interface for classes storing precomputation data for multiplication
- * algorithms. Used as a Memento (see GOF patterns) for
- * <code>WNafMultiplier</code>.
+ * algorithms. Used as a Memento (see GOF patterns) by e.g. 
+ * <code>WNafL2RMultiplier</code>.
  */
-interface PreCompInfo
+public interface PreCompInfo
 {
 }
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/ReferenceMultiplier.java b/bcprov/src/main/java/org/bouncycastle/math/ec/ReferenceMultiplier.java
index c1dd548..3601856 100644
--- a/bcprov/src/main/java/org/bouncycastle/math/ec/ReferenceMultiplier.java
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/ReferenceMultiplier.java
@@ -2,7 +2,7 @@ package org.bouncycastle.math.ec;
 
 import java.math.BigInteger;
 
-class ReferenceMultiplier implements ECMultiplier
+public class ReferenceMultiplier extends AbstractECMultiplier
 {
     /**
      * Simple shift-and-add multiplication. Serves as reference implementation
@@ -13,17 +13,24 @@ class ReferenceMultiplier implements ECMultiplier
      * @param k The factor by which to multiply.
      * @return The result of the point multiplication <code>k * p</code>.
      */
-    public ECPoint multiply(ECPoint p, BigInteger k, PreCompInfo preCompInfo)
+    protected ECPoint multiplyPositive(ECPoint p, BigInteger k)
     {
         ECPoint q = p.getCurve().getInfinity();
         int t = k.bitLength();
-        for (int i = 0; i < t; i++)
+        if (t > 0)
         {
-            if (k.testBit(i))
+            if (k.testBit(0))
             {
-                q = q.add(p);
+                q = p;
+            }
+            for (int i = 1; i < t; i++)
+            {
+                p = p.twice();
+                if (k.testBit(i))
+                {
+                    q = q.add(p);
+                }
             }
-            p = p.twice();
         }
         return q;
     }
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/Tnaf.java b/bcprov/src/main/java/org/bouncycastle/math/ec/Tnaf.java
index af4355f..42d6738 100644
--- a/bcprov/src/main/java/org/bouncycastle/math/ec/Tnaf.java
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/Tnaf.java
@@ -392,15 +392,7 @@ class Tnaf
      */
     public static ECPoint.F2m tau(ECPoint.F2m p)
     {
-        if (p.isInfinity())
-        {
-            return p;
-        }
-
-        ECFieldElement x = p.getX();
-        ECFieldElement y = p.getY();
-
-        return new ECPoint.F2m(p.getCurve(), x.square(), y.square(), p.isCompressed());
+        return p.tau();
     }
 
     /**
@@ -415,23 +407,17 @@ class Tnaf
      */
     public static byte getMu(ECCurve.F2m curve)
     {
-        BigInteger a = curve.getA().toBigInteger();
-        byte mu;
-
-        if (a.equals(ECConstants.ZERO))
-        {
-            mu = -1;
-        }
-        else if (a.equals(ECConstants.ONE))
+        if (!curve.isKoblitz())
         {
-            mu = 1;
+            throw new IllegalArgumentException("No Koblitz curve (ABC), TNAF multiplication not possible");
         }
-        else
+
+        if (curve.getA().isZero())
         {
-            throw new IllegalArgumentException("No Koblitz curve (ABC), " +
-                    "TNAF multiplication not possible");
+            return -1;
         }
-        return mu;
+
+        return 1;
     }
 
     /**
@@ -838,7 +824,9 @@ class Tnaf
         {
             pu[i] = Tnaf.multiplyFromTnaf(p, alphaTnaf[i]);
         }
-        
+
+        p.getCurve().normalizeAll(pu);
+
         return pu;
     }
 }
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/WNafL2RMultiplier.java b/bcprov/src/main/java/org/bouncycastle/math/ec/WNafL2RMultiplier.java
new file mode 100644
index 0000000..59a9313
--- /dev/null
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/WNafL2RMultiplier.java
@@ -0,0 +1,101 @@
+package org.bouncycastle.math.ec;
+
+import java.math.BigInteger;
+
+/**
+ * Class implementing the WNAF (Window Non-Adjacent Form) multiplication
+ * algorithm.
+ */
+public class WNafL2RMultiplier extends AbstractECMultiplier
+{
+    /**
+     * Multiplies <code>this</code> by an integer <code>k</code> using the
+     * Window NAF method.
+     * @param k The integer by which <code>this</code> is multiplied.
+     * @return A new <code>ECPoint</code> which equals <code>this</code>
+     * multiplied by <code>k</code>.
+     */
+    protected ECPoint multiplyPositive(ECPoint p, BigInteger k)
+    {
+        // Clamp the window width in the range [2, 16]
+        int width = Math.max(2, Math.min(16, getWindowSize(k.bitLength())));
+
+        WNafPreCompInfo wnafPreCompInfo = WNafUtil.precompute(p, width, true);
+        ECPoint[] preComp = wnafPreCompInfo.getPreComp();
+        ECPoint[] preCompNeg = wnafPreCompInfo.getPreCompNeg();
+
+        int[] wnaf = WNafUtil.generateCompactWindowNaf(width, k);
+
+        ECPoint R = p.getCurve().getInfinity();
+
+        int i = wnaf.length;
+
+        /*
+         * NOTE This code optimizes the first window using the precomputed points to substitute an
+         * addition for 2 or more doublings.
+         */
+        if (i > 1)
+        {
+            int wi = wnaf[--i];
+            int digit = wi >> 16, zeroes = wi & 0xFFFF;
+
+            int n = Math.abs(digit);
+            ECPoint[] table = digit < 0 ? preCompNeg : preComp;
+
+            /*
+             * NOTE: We use this optimization conservatively, since some coordinate systems have
+             * significantly cheaper doubling relative to addition.
+             * 
+             * (n << 2) selects precomputed values in the lower half of the table
+             * (n << 3) selects precomputed values in the lower quarter of the table
+             */
+            //if ((n << 2) < (1 << width))
+            if ((n << 3) < (1 << width))
+            {
+                int highest = LongArray.bitLengths[n];
+                int lowBits =  n ^ (1 << (highest - 1));
+                int scale = width - highest;
+
+                int i1 = ((1 << (width - 1)) - 1);
+                int i2 = (lowBits << scale) + 1;
+                R = table[i1 >>> 1].add(table[i2 >>> 1]);
+
+                zeroes -= scale;
+
+//              System.out.println("Optimized: 2^" + scale + " * " + n + " = " + i1 + " + " + i2);
+            }
+            else
+            {
+                R = table[n >>> 1];
+            }
+
+            R = R.timesPow2(zeroes);
+        }
+
+        while (i > 0)
+        {
+            int wi = wnaf[--i];
+            int digit = wi >> 16, zeroes = wi & 0xFFFF;
+
+            int n = Math.abs(digit);
+            ECPoint[] table = digit < 0 ? preCompNeg : preComp;
+            ECPoint r = table[n >>> 1];
+
+            R = R.twicePlus(r);
+            R = R.timesPow2(zeroes);
+        }
+
+        return R;
+    }
+
+    /**
+     * Determine window width to use for a scalar multiplication of the given size.
+     * 
+     * @param bits the bit-length of the scalar to multiply by
+     * @return the window size to use
+     */
+    protected int getWindowSize(int bits)
+    {
+        return WNafUtil.getWindowSize(bits);
+    }
+}
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/WNafMultiplier.java b/bcprov/src/main/java/org/bouncycastle/math/ec/WNafMultiplier.java
deleted file mode 100644
index 10c8ed2..0000000
--- a/bcprov/src/main/java/org/bouncycastle/math/ec/WNafMultiplier.java
+++ /dev/null
@@ -1,240 +0,0 @@
-package org.bouncycastle.math.ec;
-
-import java.math.BigInteger;
-
-/**
- * Class implementing the WNAF (Window Non-Adjacent Form) multiplication
- * algorithm.
- */
-class WNafMultiplier implements ECMultiplier
-{
-    /**
-     * Computes the Window NAF (non-adjacent Form) of an integer.
-     * @param width The width <code>w</code> of the Window NAF. The width is
-     * defined as the minimal number <code>w</code>, such that for any
-     * <code>w</code> consecutive digits in the resulting representation, at
-     * most one is non-zero.
-     * @param k The integer of which the Window NAF is computed.
-     * @return The Window NAF of the given width, such that the following holds:
-     * <code>k = &sum;<sub>i=0</sub><sup>l-1</sup> k<sub>i</sub>2<sup>i</sup>
-     * </code>, where the <code>k<sub>i</sub></code> denote the elements of the
-     * returned <code>byte[]</code>.
-     */
-    public byte[] windowNaf(byte width, BigInteger k)
-    {
-        // The window NAF is at most 1 element longer than the binary
-        // representation of the integer k. byte can be used instead of short or
-        // int unless the window width is larger than 8. For larger width use
-        // short or int. However, a width of more than 8 is not efficient for
-        // m = log2(q) smaller than 2305 Bits. Note: Values for m larger than
-        // 1000 Bits are currently not used in practice.
-        byte[] wnaf = new byte[k.bitLength() + 1];
-
-        // 2^width as short and BigInteger
-        short pow2wB = (short)(1 << width);
-        BigInteger pow2wBI = BigInteger.valueOf(pow2wB);
-
-        int i = 0;
-
-        // The actual length of the WNAF
-        int length = 0;
-
-        // while k >= 1
-        while (k.signum() > 0)
-        {
-            // if k is odd
-            if (k.testBit(0))
-            {
-                // k mod 2^width
-                BigInteger remainder = k.mod(pow2wBI);
-
-                // if remainder > 2^(width - 1) - 1
-                if (remainder.testBit(width - 1))
-                {
-                    wnaf[i] = (byte)(remainder.intValue() - pow2wB);
-                }
-                else
-                {
-                    wnaf[i] = (byte)remainder.intValue();
-                }
-                // wnaf[i] is now in [-2^(width-1), 2^(width-1)-1]
-
-                k = k.subtract(BigInteger.valueOf(wnaf[i]));
-                length = i;
-            }
-            else
-            {
-                wnaf[i] = 0;
-            }
-
-            // k = k/2
-            k = k.shiftRight(1);
-            i++;
-        }
-
-        length++;
-
-        // Reduce the WNAF array to its actual length
-        byte[] wnafShort = new byte[length];
-        System.arraycopy(wnaf, 0, wnafShort, 0, length);
-        return wnafShort;
-    }
-
-    /**
-     * Multiplies <code>this</code> by an integer <code>k</code> using the
-     * Window NAF method.
-     * @param k The integer by which <code>this</code> is multiplied.
-     * @return A new <code>ECPoint</code> which equals <code>this</code>
-     * multiplied by <code>k</code>.
-     */
-    public ECPoint multiply(ECPoint p, BigInteger k, PreCompInfo preCompInfo)
-    {
-        WNafPreCompInfo wnafPreCompInfo;
-
-        if ((preCompInfo != null) && (preCompInfo instanceof WNafPreCompInfo))
-        {
-            wnafPreCompInfo = (WNafPreCompInfo)preCompInfo;
-        }
-        else
-        {
-            // Ignore empty PreCompInfo or PreCompInfo of incorrect type
-            wnafPreCompInfo = new WNafPreCompInfo();
-        }
-
-        // floor(log2(k))
-        int m = k.bitLength();
-
-        // width of the Window NAF
-        byte width;
-
-        // Required length of precomputation array
-        int reqPreCompLen;
-
-        // Determine optimal width and corresponding length of precomputation
-        // array based on literature values
-        if (m < 13)
-        {
-            width = 2;
-            reqPreCompLen = 1;
-        }
-        else
-        {
-            if (m < 41)
-            {
-                width = 3;
-                reqPreCompLen = 2;
-            }
-            else
-            {
-                if (m < 121)
-                {
-                    width = 4;
-                    reqPreCompLen = 4;
-                }
-                else
-                {
-                    if (m < 337)
-                    {
-                        width = 5;
-                        reqPreCompLen = 8;
-                    }
-                    else
-                    {
-                        if (m < 897)
-                        {
-                            width = 6;
-                            reqPreCompLen = 16;
-                        }
-                        else
-                        {
-                            if (m < 2305)
-                            {
-                                width = 7;
-                                reqPreCompLen = 32;
-                            }
-                            else
-                            {
-                                width = 8;
-                                reqPreCompLen = 127;
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        // The length of the precomputation array
-        int preCompLen = 1;
-
-        ECPoint[] preComp = wnafPreCompInfo.getPreComp();
-        ECPoint twiceP = wnafPreCompInfo.getTwiceP();
-
-        // Check if the precomputed ECPoints already exist
-        if (preComp == null)
-        {
-            // Precomputation must be performed from scratch, create an empty
-            // precomputation array of desired length
-            preComp = new ECPoint[]{ p };
-        }
-        else
-        {
-            // Take the already precomputed ECPoints to start with
-            preCompLen = preComp.length;
-        }
-
-        if (twiceP == null)
-        {
-            // Compute twice(p)
-            twiceP = p.twice();
-        }
-
-        if (preCompLen < reqPreCompLen)
-        {
-            // Precomputation array must be made bigger, copy existing preComp
-            // array into the larger new preComp array
-            ECPoint[] oldPreComp = preComp;
-            preComp = new ECPoint[reqPreCompLen];
-            System.arraycopy(oldPreComp, 0, preComp, 0, preCompLen);
-
-            for (int i = preCompLen; i < reqPreCompLen; i++)
-            {
-                // Compute the new ECPoints for the precomputation array.
-                // The values 1, 3, 5, ..., 2^(width-1)-1 times p are
-                // computed
-                preComp[i] = twiceP.add(preComp[i - 1]);
-            }            
-        }
-
-        // Compute the Window NAF of the desired width
-        byte[] wnaf = windowNaf(width, k);
-        int l = wnaf.length;
-
-        // Apply the Window NAF to p using the precomputed ECPoint values.
-        ECPoint q = p.getCurve().getInfinity();
-        for (int i = l - 1; i >= 0; i--)
-        {
-            q = q.twice();
-
-            if (wnaf[i] != 0)
-            {
-                if (wnaf[i] > 0)
-                {
-                    q = q.add(preComp[(wnaf[i] - 1)/2]);
-                }
-                else
-                {
-                    // wnaf[i] < 0
-                    q = q.subtract(preComp[(-wnaf[i] - 1)/2]);
-                }
-            }
-        }
-
-        // Set PreCompInfo in ECPoint, such that it is available for next
-        // multiplication.
-        wnafPreCompInfo.setPreComp(preComp);
-        wnafPreCompInfo.setTwiceP(twiceP);
-        p.setPreCompInfo(wnafPreCompInfo);
-        return q;
-    }
-
-}
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/WNafPreCompInfo.java b/bcprov/src/main/java/org/bouncycastle/math/ec/WNafPreCompInfo.java
index fc0d5fe..d142ab7 100644
--- a/bcprov/src/main/java/org/bouncycastle/math/ec/WNafPreCompInfo.java
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/WNafPreCompInfo.java
@@ -4,21 +4,23 @@ package org.bouncycastle.math.ec;
  * Class holding precomputation data for the WNAF (Window Non-Adjacent Form)
  * algorithm.
  */
-class WNafPreCompInfo implements PreCompInfo
+public class WNafPreCompInfo implements PreCompInfo
 {
     /**
-     * Array holding the precomputed <code>ECPoint</code>s used for the Window
-     * NAF multiplication in <code>
-     * {@link org.bouncycastle.math.ec.multiplier.WNafMultiplier.multiply()
-     * WNafMultiplier.multiply()}</code>.
+     * Array holding the precomputed <code>ECPoint</code>s used for a Window
+     * NAF multiplication.
      */
     private ECPoint[] preComp = null;
 
     /**
+     * Array holding the negations of the precomputed <code>ECPoint</code>s used
+     * for a Window NAF multiplication.
+     */
+    private ECPoint[] preCompNeg = null;
+
+    /**
      * Holds an <code>ECPoint</code> representing twice(this). Used for the
-     * Window NAF multiplication in <code>
-     * {@link org.bouncycastle.math.ec.multiplier.WNafMultiplier.multiply()
-     * WNafMultiplier.multiply()}</code>.
+     * Window NAF multiplication to create or extend the precomputed values.
      */
     private ECPoint twiceP = null;
 
@@ -27,18 +29,28 @@ class WNafPreCompInfo implements PreCompInfo
         return preComp;
     }
 
+    protected ECPoint[] getPreCompNeg()
+    {
+        return preCompNeg;
+    }
+
     protected void setPreComp(ECPoint[] preComp)
     {
         this.preComp = preComp;
     }
 
+    protected void setPreCompNeg(ECPoint[] preCompNeg)
+    {
+        this.preCompNeg = preCompNeg;
+    }
+
     protected ECPoint getTwiceP()
     {
         return twiceP;
     }
 
-    protected void setTwiceP(ECPoint twiceThis)
+    protected void setTwiceP(ECPoint twiceP)
     {
-        this.twiceP = twiceThis;
+        this.twiceP = twiceP;
     }
 }
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/WNafUtil.java b/bcprov/src/main/java/org/bouncycastle/math/ec/WNafUtil.java
new file mode 100644
index 0000000..6465d66
--- /dev/null
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/WNafUtil.java
@@ -0,0 +1,393 @@
+package org.bouncycastle.math.ec;
+
+import java.math.BigInteger;
+
+public abstract class WNafUtil
+{
+    private static int[] DEFAULT_WINDOW_SIZE_CUTOFFS = new int[]{ 13, 41, 121, 337, 897, 2305 };
+
+    public static int[] generateCompactNaf(BigInteger k)
+    {
+        if ((k.bitLength() >>> 16) != 0)
+        {
+            throw new IllegalArgumentException("'k' must have bitlength < 2^16");
+        }
+
+        BigInteger _3k = k.shiftLeft(1).add(k);
+
+        int digits = _3k.bitLength() - 1;
+        int[] naf = new int[(digits + 1) >> 1];
+
+        int length = 0, zeroes = 0;
+        for (int i = 1; i <= digits; ++i)
+        {
+            boolean _3kBit = _3k.testBit(i);
+            boolean kBit = k.testBit(i);
+
+            if (_3kBit == kBit)
+            {
+                ++zeroes;
+            }
+            else
+            {
+                int digit  = kBit ? -1 : 1;
+                naf[length++] = (digit << 16) | zeroes;
+                zeroes = 0;
+            }
+        }
+
+        if (naf.length > length)
+        {
+            naf = trim(naf, length);
+        }
+
+        return naf;
+    }
+
+    public static int[] generateCompactWindowNaf(int width, BigInteger k)
+    {
+        if (width == 2)
+        {
+            return generateCompactNaf(k);
+        }
+
+        if (width < 2 || width > 16)
+        {
+            throw new IllegalArgumentException("'width' must be in the range [2, 16]");
+        }
+        if ((k.bitLength() >>> 16) != 0)
+        {
+            throw new IllegalArgumentException("'k' must have bitlength < 2^16");
+        }
+
+        int[] wnaf = new int[k.bitLength() / width + 1];
+
+        // 2^width and a mask and sign bit set accordingly
+        int pow2 = 1 << width;
+        int mask = pow2 - 1;
+        int sign = pow2 >>> 1;
+
+        boolean carry = false;
+        int length = 0, pos = 0;
+
+        while (pos <= k.bitLength())
+        {
+            if (k.testBit(pos) == carry)
+            {
+                ++pos;
+                continue;
+            }
+
+            k = k.shiftRight(pos);
+
+            int digit = k.intValue() & mask;
+            if (carry)
+            {
+                ++digit;
+            }
+
+            carry = (digit & sign) != 0;
+            if (carry)
+            {
+                digit -= pow2;
+            }
+
+            int zeroes = length > 0 ? pos - 1 : pos;
+            wnaf[length++] = (digit << 16) | zeroes;
+            pos = width;
+        }
+
+        // Reduce the WNAF array to its actual length
+        if (wnaf.length > length)
+        {
+            wnaf = trim(wnaf, length);
+        }
+
+        return wnaf;
+    }
+
+    public static byte[] generateJSF(BigInteger g, BigInteger h)
+    {
+        int digits = Math.max(g.bitLength(), h.bitLength()) + 1;
+        byte[] jsf = new byte[digits];
+
+        BigInteger k0 = g, k1 = h;
+        int j = 0, d0 = 0, d1 = 0;
+
+        while (k0.signum() > 0 || k1.signum() > 0 || d0 > 0 || d1 > 0)
+        {
+            int n0 = (k0.intValue() + d0) & 7, n1 = (k1.intValue() + d1) & 7;
+
+            int u0 = n0 & 1;
+            if (u0 != 0)
+            {
+                u0 -= (n0 & 2);
+                if ((n0 + u0) == 4 && (n1 & 3) == 2)
+                {
+                    u0 = -u0;
+                }
+            }
+
+            int u1 = n1 & 1;
+            if (u1 != 0)
+            {
+                u1 -= (n1 & 2);
+                if ((n1 + u1) == 4 && (n0 & 3) == 2)
+                {
+                    u1 = -u1;
+                }
+            }
+
+            if ((d0 << 1) == 1 + u0)
+            {
+                d0 = 1 - d0;
+            }
+            if ((d1 << 1) == 1 + u1)
+            {
+                d1 = 1 - d1;
+            }
+
+            k0 = k0.shiftRight(1);
+            k1 = k1.shiftRight(1);
+
+            jsf[j++] = (byte)((u0 << 4) | (u1 & 0xF));
+        }
+
+        // Reduce the JSF array to its actual length
+        if (jsf.length > j)
+        {
+            jsf = trim(jsf, j);
+        }
+
+        return jsf;
+    }
+
+    public static byte[] generateNaf(BigInteger k)
+    {
+        BigInteger _3k = k.shiftLeft(1).add(k);
+
+        int digits = _3k.bitLength() - 1;
+        byte[] naf = new byte[digits];
+
+        for (int i = 1; i <= digits; ++i)
+        {
+            boolean _3kBit = _3k.testBit(i);
+            boolean kBit = k.testBit(i);
+
+            naf[i - 1] = (byte)(_3kBit == kBit ? 0 : kBit ? -1 : 1);
+        }
+
+        return naf;
+    }
+
+    /**
+     * Computes the Window NAF (non-adjacent Form) of an integer.
+     * @param width The width <code>w</code> of the Window NAF. The width is
+     * defined as the minimal number <code>w</code>, such that for any
+     * <code>w</code> consecutive digits in the resulting representation, at
+     * most one is non-zero.
+     * @param k The integer of which the Window NAF is computed.
+     * @return The Window NAF of the given width, such that the following holds:
+     * <code>k = &sum;<sub>i=0</sub><sup>l-1</sup> k<sub>i</sub>2<sup>i</sup>
+     * </code>, where the <code>k<sub>i</sub></code> denote the elements of the
+     * returned <code>byte[]</code>.
+     */
+    public static byte[] generateWindowNaf(int width, BigInteger k)
+    {
+        if (width == 2)
+        {
+            return generateNaf(k);
+        }
+
+        if (width < 2 || width > 8)
+        {
+            throw new IllegalArgumentException("'width' must be in the range [2, 8]");
+        }
+
+        byte[] wnaf = new byte[k.bitLength() + 1];
+
+        // 2^width and a mask and sign bit set accordingly
+        int pow2 = 1 << width;
+        int mask = pow2 - 1;
+        int sign = pow2 >>> 1;
+
+        boolean carry = false;
+        int length = 0, pos = 0;
+
+        while (pos <= k.bitLength())
+        {
+            if (k.testBit(pos) == carry)
+            {
+                ++pos;
+                continue;
+            }
+
+            k = k.shiftRight(pos);
+
+            int digit = k.intValue() & mask;
+            if (carry)
+            {
+                ++digit;
+            }
+
+            carry = (digit & sign) != 0;
+            if (carry)
+            {
+                digit -= pow2;
+            }
+
+            length += (length > 0) ? pos - 1 : pos;
+            wnaf[length++] = (byte)digit;
+            pos = width;
+        }
+
+        // Reduce the WNAF array to its actual length
+        if (wnaf.length > length)
+        {
+            wnaf = trim(wnaf, length);
+        }
+        
+        return wnaf;
+    }
+
+    public static WNafPreCompInfo getWNafPreCompInfo(PreCompInfo preCompInfo)
+    {
+        if ((preCompInfo != null) && (preCompInfo instanceof WNafPreCompInfo))
+        {
+            return (WNafPreCompInfo)preCompInfo;
+        }
+
+        return new WNafPreCompInfo();
+    }
+
+    /**
+     * Determine window width to use for a scalar multiplication of the given size.
+     * 
+     * @param bits the bit-length of the scalar to multiply by
+     * @return the window size to use
+     */
+    public static int getWindowSize(int bits)
+    {
+        return getWindowSize(bits, DEFAULT_WINDOW_SIZE_CUTOFFS);
+    }
+
+    /**
+     * Determine window width to use for a scalar multiplication of the given size.
+     * 
+     * @param bits the bit-length of the scalar to multiply by
+     * @param windowSizeCutoffs a monotonically increasing list of bit sizes at which to increment the window width
+     * @return the window size to use
+     */
+    public static int getWindowSize(int bits, int[] windowSizeCutoffs)
+    {
+        int w = 0;
+        for (; w < windowSizeCutoffs.length; ++w)
+        {
+            if (bits < windowSizeCutoffs[w])
+            {
+                break;
+            }
+        }
+        return w + 2;
+    }
+
+    public static WNafPreCompInfo precompute(ECPoint p, int width, boolean includeNegated)
+    {
+        ECCurve c = p.getCurve();
+        WNafPreCompInfo wnafPreCompInfo = getWNafPreCompInfo(c.getPreCompInfo(p));
+
+        ECPoint[] preComp = wnafPreCompInfo.getPreComp();
+        if (preComp == null)
+        {
+            preComp = new ECPoint[]{ p };
+        }
+
+        int preCompLen = preComp.length;
+        int reqPreCompLen = 1 << Math.max(0, width - 2);
+
+        if (preCompLen < reqPreCompLen)
+        {
+            ECPoint twiceP = wnafPreCompInfo.getTwiceP();
+            if (twiceP == null)
+            {
+                twiceP = preComp[0].twice().normalize();
+                wnafPreCompInfo.setTwiceP(twiceP);
+            }
+
+            preComp = resizeTable(preComp, reqPreCompLen);
+
+            /*
+             * TODO Okeya/Sakurai paper has precomputation trick and  "Montgomery's Trick" to speed this up.
+             * Also, co-Z arithmetic could avoid the subsequent normalization too.
+             */
+            for (int i = preCompLen; i < reqPreCompLen; i++)
+            {
+                /*
+                 * Compute the new ECPoints for the precomputation array. The values 1, 3, 5, ...,
+                 * 2^(width-1)-1 times p are computed
+                 */
+                preComp[i] = twiceP.add(preComp[i - 1]);
+            }
+
+            /*
+             * Having oft-used operands in affine form makes operations faster.
+             */
+            c.normalizeAll(preComp);
+        }
+
+        wnafPreCompInfo.setPreComp(preComp);
+
+        if (includeNegated)
+        {
+            ECPoint[] preCompNeg = wnafPreCompInfo.getPreCompNeg();
+            
+            int pos;
+            if (preCompNeg == null)
+            {
+                pos = 0;
+                preCompNeg = new ECPoint[reqPreCompLen]; 
+            }
+            else
+            {
+                pos = preCompNeg.length;
+                if (pos < reqPreCompLen)
+                {
+                    preCompNeg = resizeTable(preCompNeg, reqPreCompLen);
+                }
+            }
+
+            while (pos < reqPreCompLen)
+            {
+                preCompNeg[pos] = preComp[pos].negate();
+                ++pos;
+            }
+
+            wnafPreCompInfo.setPreCompNeg(preCompNeg);
+        }
+
+        c.setPreCompInfo(p, wnafPreCompInfo);
+
+        return wnafPreCompInfo;
+    }
+
+    private static byte[] trim(byte[] a, int length)
+    {
+        byte[] result = new byte[length];
+        System.arraycopy(a, 0, result, 0, result.length);
+        return result;
+    }
+
+    private static int[] trim(int[] a, int length)
+    {
+        int[] result = new int[length];
+        System.arraycopy(a, 0, result, 0, result.length);
+        return result;
+    }
+
+    private static ECPoint[] resizeTable(ECPoint[] a, int length)
+    {
+        ECPoint[] result = new ECPoint[length];
+        System.arraycopy(a, 0, result, 0, a.length);
+        return result;
+    }
+}
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/WTauNafMultiplier.java b/bcprov/src/main/java/org/bouncycastle/math/ec/WTauNafMultiplier.java
index 2353979..7bd30ec 100644
--- a/bcprov/src/main/java/org/bouncycastle/math/ec/WTauNafMultiplier.java
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/WTauNafMultiplier.java
@@ -6,7 +6,7 @@ import java.math.BigInteger;
  * Class implementing the WTNAF (Window
  * <code>&tau;</code>-adic Non-Adjacent Form) algorithm.
  */
-class WTauNafMultiplier implements ECMultiplier
+public class WTauNafMultiplier extends AbstractECMultiplier
 {
     /**
      * Multiplies a {@link org.bouncycastle.math.ec.ECPoint.F2m ECPoint.F2m}
@@ -16,7 +16,7 @@ class WTauNafMultiplier implements ECMultiplier
      * @param k The integer by which to multiply <code>k</code>.
      * @return <code>p</code> multiplied by <code>k</code>.
      */
-    public ECPoint multiply(ECPoint point, BigInteger k, PreCompInfo preCompInfo)
+    protected ECPoint multiplyPositive(ECPoint point, BigInteger k)
     {
         if (!(point instanceof ECPoint.F2m))
         {
@@ -25,8 +25,7 @@ class WTauNafMultiplier implements ECMultiplier
         }
 
         ECPoint.F2m p = (ECPoint.F2m)point;
-
-        ECCurve.F2m curve = (ECCurve.F2m) p.getCurve();
+        ECCurve.F2m curve = (ECCurve.F2m)p.getCurve();
         int m = curve.getM();
         byte a = curve.getA().toBigInteger().byteValue();
         byte mu = curve.getMu();
@@ -34,7 +33,7 @@ class WTauNafMultiplier implements ECMultiplier
 
         ZTauElement rho = Tnaf.partModReduction(k, m, a, s, mu, (byte)10);
 
-        return multiplyWTnaf(p, rho, preCompInfo, a, mu);
+        return multiplyWTnaf(p, rho, curve.getPreCompInfo(p), a, mu);
     }
 
     /**
@@ -88,7 +87,7 @@ class WTauNafMultiplier implements ECMultiplier
         if ((preCompInfo == null) || !(preCompInfo instanceof WTauNafPreCompInfo))
         {
             pu = Tnaf.getPreComp(p, a);
-            p.setPreCompInfo(new WTauNafPreCompInfo(pu));
+            curve.setPreCompInfo(p, new WTauNafPreCompInfo(pu));
         }
         else
         {
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/ZSignedDigitL2RMultiplier.java b/bcprov/src/main/java/org/bouncycastle/math/ec/ZSignedDigitL2RMultiplier.java
new file mode 100644
index 0000000..b478dc7
--- /dev/null
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/ZSignedDigitL2RMultiplier.java
@@ -0,0 +1,29 @@
+package org.bouncycastle.math.ec;
+
+import java.math.BigInteger;
+
+public class ZSignedDigitL2RMultiplier extends AbstractECMultiplier
+{
+    /**
+     * 'Zeroless' Signed Digit Left-to-Right.
+     */
+    protected ECPoint multiplyPositive(ECPoint p, BigInteger k)
+    {
+        ECPoint addP = p.normalize(), subP = addP.negate();
+
+        ECPoint R0 = addP;
+
+        int n = k.bitLength();
+        int s = k.getLowestSetBit();
+
+        int i = n;
+        while (--i > s)
+        {
+            R0 = R0.twicePlus(k.testBit(i) ? addP : subP);
+        }
+
+        R0 = R0.timesPow2(s);
+
+        return R0;
+    }
+}
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/ZSignedDigitR2LMultiplier.java b/bcprov/src/main/java/org/bouncycastle/math/ec/ZSignedDigitR2LMultiplier.java
new file mode 100644
index 0000000..baa702f
--- /dev/null
+++ b/bcprov/src/main/java/org/bouncycastle/math/ec/ZSignedDigitR2LMultiplier.java
@@ -0,0 +1,30 @@
+package org.bouncycastle.math.ec;
+
+import java.math.BigInteger;
+
+public class ZSignedDigitR2LMultiplier extends AbstractECMultiplier
+{
+    /**
+     * 'Zeroless' Signed Digit Right-to-Left.
+     */
+    protected ECPoint multiplyPositive(ECPoint p, BigInteger k)
+    {
+        ECPoint R0 = p.getCurve().getInfinity(), R1 = p;
+
+        int n = k.bitLength();
+        int s = k.getLowestSetBit();
+
+        R1 = R1.timesPow2(s);
+
+        int i = s;
+        while (++i < n)
+        {
+            R0 = R0.add(k.testBit(i) ? R1 : R1.negate());
+            R1 = R1.twice();
+        }
+
+        R0 = R0.add(R1);
+
+        return R0;
+    }
+}
diff --git a/bcprov/src/main/java/org/bouncycastle/math/ec/package.html b/bcprov/src/main/java/org/bouncycastle/math/ec/package.html
deleted file mode 100644
index a02605b..0000000
--- a/bcprov/src/main/java/org/bouncycastle/math/ec/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body bgcolor="#ffffff">
-Math support for Elliptic Curve.
-</body>
-</html>