diff --git a/doc/ZZ_pX.txt b/doc/ZZ_pX.txt index 66ba65e..17d539b 100644 --- a/doc/ZZ_pX.txt +++ b/doc/ZZ_pX.txt @@ -426,8 +426,9 @@ All routines require n >= 0, otherwise an error is raised. \**************************************************************************/ -void trunc(ZZ_pX& x, const ZZ_pX& a, long n); // x = a % X^n +void trunc(ZZ_pX& x, const ZZ_pX& a, long n); ZZ_pX trunc(const ZZ_pX& a, long n); +// x = a % X^n void MulTrunc(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, long n); ZZ_pX MulTrunc(const ZZ_pX& a, const ZZ_pX& b, long n); @@ -439,7 +440,11 @@ ZZ_pX SqrTrunc(const ZZ_pX& a, long n); void InvTrunc(ZZ_pX& x, const ZZ_pX& a, long n); ZZ_pX InvTrunc(const ZZ_pX& a, long n); -// computes x = a^{-1} % X^m. Must have ConstTerm(a) invertible. +// computes x = a^{-1} % X^n. Must have ConstTerm(a) invertible. + +void ExpTrunc(ZZ_pX& x, const ZZ_pX& a, long n); +ZZ_pX ExpTrunc(const ZZ_pX& a, long n); +// x = exp(a) % X^n /**************************************************************************\ @@ -911,11 +916,11 @@ ZZ_p resultant(const ZZ_pX& a, const ZZ_pX& b); void CharPolyMod(ZZ_pX& g, const ZZ_pX& a, const ZZ_pX& f); ZZ_pX CharPolyMod(const ZZ_pX& a, const ZZ_pX& f); -// g = charcteristic polynomial of (a mod f); 0 < deg(f), deg(g) < -// deg(f); this routine works for arbitrary f; if f is irreducible, -// it is faster to use the IrredPolyMod routine, and then exponentiate -// if necessary (since in this case the CharPoly is just a power of -// the IrredPoly). +// g = characteristic polynomial of (a mod f); 0 < deg(f), deg(g) < deg(f). +// This routine works for arbitrary f. For irreducible f, +// it is often faster to use IrredPolyMod() and then exponentiate +// as necessary, since in this case the characteristic polynomial +// is a power of the minimal polynomial. /**************************************************************************\ diff --git a/doc/lzz_pX.txt b/doc/lzz_pX.txt index 0e5997f..85287cd 100644 --- a/doc/lzz_pX.txt +++ b/doc/lzz_pX.txt @@ -416,8 +416,9 @@ It is required that n >= 0, otherwise an error is raised. \**************************************************************************/ -void trunc(zz_pX& x, const zz_pX& a, long n); // x = a % X^n +void trunc(zz_pX& x, const zz_pX& a, long n); zz_pX trunc(const zz_pX& a, long n); +// x = a % X^n void MulTrunc(zz_pX& x, const zz_pX& a, const zz_pX& b, long n); zz_pX MulTrunc(const zz_pX& a, const zz_pX& b, long n); @@ -431,6 +432,10 @@ void InvTrunc(zz_pX& x, const zz_pX& a, long n); zz_pX InvTrunc(const zz_pX& a, long n); // computes x = a^{-1} % X^n. Must have ConstTerm(a) invertible. +void ExpTrunc(zz_pX& x, const zz_pX& a, long n); +zz_pX ExpTrunc(const zz_pX& a, long n) +// x = exp(a) % X^n + /**************************************************************************\ Modular Arithmetic (without pre-conditioning) @@ -855,11 +860,11 @@ void MinPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F); zz_pX MinPolyMod(const zz_pX& g, const zz_pXModulus& F); // same as above, but guarantees that result is correct -void IrredPoly(zz_pX& h, const zz_pX& g, const zz_pXModulus& F, long m); -zz_pX IrredPoly(const zz_pX& g, const zz_pXModulus& F, long m); +void IrredPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F, long m); +zz_pX IrredPolyMod(const zz_pX& g, const zz_pXModulus& F, long m); -void IrredPoly(zz_pX& h, const zz_pX& g, const zz_pXModulus& F); -zz_pX IrredPoly(const zz_pX& g, const zz_pXModulus& F); +void IrredPolyMod(zz_pX& h, const zz_pX& g, const zz_pXModulus& F); +zz_pX IrredPolyMod(const zz_pX& g, const zz_pXModulus& F); // same as above, but assumes that f is irreducible, or at least that // the minimal poly of g is itself irreducible. The algorithm is @@ -903,10 +908,10 @@ zz_pX resultant(zz_p& x, const zz_pX& a, const zz_pX& b); void CharPolyMod(zz_pX& g, const zz_pX& a, const zz_pX& f); zz_pX CharPolyMod(const zz_pX& a, const zz_pX& f); -// g = charcteristic polynomial of (a mod f); 0 < deg(f), deg(g) < -// deg(f). This routine works for arbitrary f. For irreducible f, -// is it faster to use IrredPolyMod, and then exponentiate as -// necessary, since in this case the characterstic polynomial +// g = characteristic polynomial of (a mod f); 0 < deg(f), deg(g) < deg(f). +// This routine works for arbitrary f. For irreducible f, +// it is often faster to use IrredPolyMod() and then exponentiate +// as necessary, since in this case the characteristic polynomial // is a power of the minimal polynomial. diff --git a/include/NTL/ZZ_pX.h b/include/NTL/ZZ_pX.h index d9bd98d..5d1c98f 100644 --- a/include/NTL/ZZ_pX.h +++ b/include/NTL/ZZ_pX.h @@ -548,6 +548,13 @@ inline ZZ_pX power(const ZZ_pX& a, long e) { ZZ_pX x; power(x, a, e); NTL_OPT_RETURN(ZZ_pX, x); } +void ExpTrunc(ZZ_pX& x, const ZZ_pX& a, long n); +inline ZZ_pX ExpTrunc(const ZZ_pX& a, long n) + { ZZ_pX x; ExpTrunc(x, a, n); NTL_OPT_RETURN(ZZ_pX, x); } + + + + // The following data structures and routines allow one // to hand-craft various algorithms, using the FFT convolution // algorithms directly. diff --git a/include/NTL/lzz_pX.h b/include/NTL/lzz_pX.h index 474d791..f864402 100644 --- a/include/NTL/lzz_pX.h +++ b/include/NTL/lzz_pX.h @@ -553,6 +553,9 @@ inline zz_pX power(const zz_pX& a, long e) { zz_pX x; power(x, a, e); NTL_OPT_RETURN(zz_pX, x); } +void ExpTrunc(zz_pX& x, const zz_pX& a, long n); +inline zz_pX ExpTrunc(const zz_pX& a, long n) + { zz_pX x; ExpTrunc(x, a, n); NTL_OPT_RETURN(zz_pX, x); } diff --git a/src/ZZ_pX1.cpp b/src/ZZ_pX1.cpp index b29f418..65da770 100644 --- a/src/ZZ_pX1.cpp +++ b/src/ZZ_pX1.cpp @@ -1753,6 +1753,78 @@ void SqrTrunc(ZZ_pX& x, const ZZ_pX& a, long n) } +// slight variant of https://homepages.loria.fr/PZimmermann/papers/fastnewton.ps.gz §5 +static void ExpTruncFast(ZZ_pX& f, ZZ_pX& g, const ZZ_pX& h, long prec) +{ + switch (prec) { + case 0: + clear(f); + clear(g); + return; + case 1: + f = 1; + clear(g); + return; + case 2: + f = 1 + trunc(h, 2); + g = 1; + return; + case 3: + f = 1 + trunc(h, 3) + MulTrunc(h, h, 3) / 2; + g = 1 - trunc(h, 2); + return; + } + + { + ZZ_pX h2 = MulTrunc(h, h, 4) / 2; + ZZ_pX h3 = MulTrunc(h, h2, 4) / 3; + f = 1 + trunc(h, 4) + h2 + h3; + g = 1 - trunc(h, 2); + } + + // invariant: f = exp(h) + O(x^n), g = exp(-h) + O(x^(n/2)) + for (long n = 4; n < prec; n *= 2) { + + // step 2 + ZZ_pX m1 = MulTrunc(f, sqr(g), n); + g.SetLength(n); + for (long i = n/2; i < n; ++i) + g[i] = -coeff(m1, i); + g.normalize(); + + // step 3 + ZZ_pX q = diff(trunc(h, n)); + + // step 4 + ZZ_pX s = MulTrunc(g, f*q >> (n-1), n); + q.SetLength(2*n-1); + for (long i = 0; i <= deg(s); ++i) + q[n-1+i] = -s[i]; + q.normalize(); + + // step 5 + unsigned long length = NTL::min(deg(q)+2, prec); + ZZ_pX intq; + intq.SetLength(length); + intq[0] = 0; + for (long i = 1; i < length; ++i) + intq[i] = q[i-1] / i; + intq.normalize(); + f = MulTrunc(f, 1 + h - intq, NTL::min(2*n, prec)); + } +} + +void ExpTrunc(ZZ_pX& x, const ZZ_pX& a, long n) +{ + if (n < 1) + LogicError("ExpTrunc: bad args"); + if (ConstTerm(a) != 0) + LogicError("ExpTrunc: nonzero constant term"); + ZZ_pX y; + ExpTruncFast(x, y, a, n); +} + + void FastTraceVec(vec_ZZ_p& S, const ZZ_pX& f) { long n = deg(f); diff --git a/src/ZZ_pXCharPoly.cpp b/src/ZZ_pXCharPoly.cpp index 964d008..991687d 100644 --- a/src/ZZ_pXCharPoly.cpp +++ b/src/ZZ_pXCharPoly.cpp @@ -29,14 +29,15 @@ void HessCharPoly(ZZ_pX& g, const ZZ_pX& a, const ZZ_pX& f) CharPoly(g, M); } +// "well-known" algorithm according to https://arxiv.org/pdf/1109.4323.pdf p.11 void CharPolyMod(ZZ_pX& g, const ZZ_pX& a, const ZZ_pX& ff) { ZZ_pX f = ff; MakeMonic(f); long n = deg(f); - if (n <= 0 || deg(a) >= n) - LogicError("CharPoly: bad args"); + if (n <= 0 || deg(a) >= n) + LogicError("CharPolyMod: bad args"); if (IsZero(a)) { clear(g); @@ -44,33 +45,38 @@ void CharPolyMod(ZZ_pX& g, const ZZ_pX& a, const ZZ_pX& ff) return; } - if (n > 25) { - ZZ_pX h; - MinPolyMod(h, a, f); - if (deg(h) == n) { - g = h; - return; - } - } - if (ZZ_p::modulus() < n+1) { - HessCharPoly(g, a, f); + MinPolyMod(g, a, f); + if (deg(g) < n) + HessCharPoly(g, a, f); return; } - vec_ZZ_p u(INIT_SIZE, n+1), v(INIT_SIZE, n+1); + // compute tr(x^0 mod f), tr(x^1 mod f), ..., tr(x^(n-1) mod f) as rev(f')/rev(f) + // see for instance https://cr.yp.to/papers/newton.pdf + vec_ZZ_p T(INIT_SIZE, n); + { + ZZ_pX rev_f = reverse(f); + ZZ_pX rev_df = reverse(diff(f)); + ZZ_pX inv_rev_f = InvTrunc(rev_f, n); + ZZ_pX quot = MulTrunc(rev_df, inv_rev_f, n); + VectorCopy(T, quot, n); + } - ZZ_pX h, h1; - negate(h, a); - long i; + // compute tr(a^0 mod f), tr(a^1 mod f), ..., tr(a^n mod f) + vec_ZZ_p tr = ProjectPowers(T, n+1, a, ff); - for (i = 0; i <= n; i++) { - u[i] = i; - add(h1, h, u[i]); - resultant(v[i], f, h1); - } + // recover characteristic polynomial using exp(∫g'/g) = g; + // "well-known" according to https://dl.acm.org/doi/pdf/10.1145/1145768.1145814 Prop. 9 + ZZ_pX trpoly; + trpoly.SetLength(n+1); + trpoly[0] = 0; + for (long i = 1; i < n+1; ++i) + trpoly[i] = -tr[i] / i; + trpoly.normalize(); - interpolate(g, u, v); + ExpTrunc(g, trpoly, n+1); + reverse(g, g, n); } NTL_END_IMPL diff --git a/src/lzz_pX1.cpp b/src/lzz_pX1.cpp index 798f629..e232125 100644 --- a/src/lzz_pX1.cpp +++ b/src/lzz_pX1.cpp @@ -1668,6 +1668,79 @@ void SqrTrunc(zz_pX& x, const zz_pX& a, long n) +// slight variant of https://homepages.loria.fr/PZimmermann/papers/fastnewton.ps.gz §5 +static void ExpTruncFast(zz_pX& f, zz_pX& g, const zz_pX& h, long prec) +{ + switch (prec) { + case 0: + clear(f); + clear(g); + return; + case 1: + f = 1; + clear(g); + return; + case 2: + f = 1 + trunc(h, 2); + g = 1; + return; + case 3: + f = 1 + trunc(h, 3) + MulTrunc(h, h, 3) / 2; + g = 1 - trunc(h, 2); + return; + } + + { + zz_pX h2 = MulTrunc(h, h, 4) / 2; + zz_pX h3 = MulTrunc(h, h2, 4) / 3; + f = 1 + trunc(h, 4) + h2 + h3; + g = 1 - trunc(h, 2); + } + + // invariant: f = exp(h) + O(x^n), g = exp(-h) + O(x^(n/2)) + for (long n = 4; n < prec; n *= 2) { + + // step 2 + zz_pX m1 = MulTrunc(f, sqr(g), n); + g.SetLength(n); + for (long i = n/2; i < n; ++i) + g[i] = -coeff(m1, i); + g.normalize(); + + // step 3 + zz_pX q = diff(trunc(h, n)); + + // step 4 + zz_pX s = MulTrunc(g, f*q >> (n-1), n); + q.SetLength(2*n-1); + for (long i = 0; i <= deg(s); ++i) + q[n-1+i] = -s[i]; + q.normalize(); + + // step 5 + unsigned long length = NTL::min(deg(q)+2, prec); + zz_pX intq; + intq.SetLength(length); + intq[0] = 0; + for (long i = 1; i < length; ++i) + intq[i] = q[i-1] / i; + intq.normalize(); + f = MulTrunc(f, 1 + h - intq, NTL::min(2*n, prec)); + } +} + +void ExpTrunc(zz_pX& x, const zz_pX& a, long n) +{ + if (n < 1) + LogicError("ExpTrunc: bad args"); + if (ConstTerm(a) != 0) + LogicError("ExpTrunc: nonzero constant term"); + zz_pX y; + ExpTruncFast(x, y, a, n); +} + + + void FastTraceVec(vec_zz_p& S, const zz_pX& f) { long n = deg(f); diff --git a/src/lzz_pXCharPoly.cpp b/src/lzz_pXCharPoly.cpp index f971981..70c14bf 100644 --- a/src/lzz_pXCharPoly.cpp +++ b/src/lzz_pXCharPoly.cpp @@ -29,14 +29,15 @@ void HessCharPoly(zz_pX& g, const zz_pX& a, const zz_pX& f) CharPoly(g, M); } +// "well-known" algorithm according to https://arxiv.org/pdf/1109.4323.pdf p.11 void CharPolyMod(zz_pX& g, const zz_pX& a, const zz_pX& ff) { zz_pX f = ff; MakeMonic(f); long n = deg(f); - if (n <= 0 || deg(a) >= n) - LogicError("CharPoly: bad args"); + if (n <= 0 || deg(a) >= n) + LogicError("CharPolyMod: bad args"); if (IsZero(a)) { clear(g); @@ -44,33 +45,38 @@ void CharPolyMod(zz_pX& g, const zz_pX& a, const zz_pX& ff) return; } - if (n > 90 || (zz_p::PrimeCnt() <= 1 && n > 45)) { - zz_pX h; - MinPolyMod(h, a, f); - if (deg(h) == n) { - g = h; - return; - } - } - if (zz_p::modulus() < n+1) { - HessCharPoly(g, a, f); + MinPolyMod(g, a, f); + if (deg(g) < n) + HessCharPoly(g, a, f); return; } - vec_zz_p u(INIT_SIZE, n+1), v(INIT_SIZE, n+1); + // compute tr(x^0 mod f), tr(x^1 mod f), ..., tr(x^(n-1) mod f) as rev(f')/rev(f) + // see for instance https://cr.yp.to/papers/newton.pdf + vec_zz_p T(INIT_SIZE, n); + { + zz_pX rev_f = reverse(f); + zz_pX rev_df = reverse(diff(f)); + zz_pX inv_rev_f = InvTrunc(rev_f, n); + zz_pX quot = MulTrunc(rev_df, inv_rev_f, n); + VectorCopy(T, quot, n); + } - zz_pX h, h1; - negate(h, a); - long i; + // compute tr(a^0 mod f), tr(a^1 mod f), ..., tr(a^n mod f) + vec_zz_p tr = ProjectPowers(T, n+1, a, ff); - for (i = 0; i <= n; i++) { - u[i] = i; - add(h1, h, u[i]); - resultant(v[i], f, h1); - } + // recover characteristic polynomial using exp(∫g'/g) = g; + // "well-known" according to https://dl.acm.org/doi/pdf/10.1145/1145768.1145814 Prop. 9 + zz_pX trpoly; + trpoly.SetLength(n+1); + trpoly[0] = 0; + for (long i = 1; i < n+1; ++i) + trpoly[i] = -tr[i] / i; + trpoly.normalize(); - interpolate(g, u, v); + ExpTrunc(g, trpoly, n+1); + reverse(g, g, n); } NTL_END_IMPL