mpn/generic/hgcd_jacobi.c - gmp - Git at Google

 /* hgcd_jacobi.c.

    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.

 Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.

 This file is part of the GNU MP Library.

 The GNU MP Library is free software; you can redistribute it and/or modify
 it under the terms of either:

   * the GNU Lesser General Public License as published by the Free
     Software Foundation; either version 3 of the License, or (at your
     option) any later version.

 or

   * the GNU General Public License as published by the Free Software
     Foundation; either version 2 of the License, or (at your option) any
     later version.

 or both in parallel, as here.

 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 for more details.

 You should have received copies of the GNU General Public License and the
 GNU Lesser General Public License along with the GNU MP Library.  If not,
 see https://www.gnu.org/licenses/.  */

 #include "gmp-impl.h"
 #include "longlong.h"

 /* This file is almost a copy of hgcd.c, with some added calls to
    mpn_jacobi_update */

 struct hgcd_jacobi_ctx
 {
   struct hgcd_matrix *M;
   unsigned *bitsp;
 };

 static void
 hgcd_jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
 		  mp_srcptr qp, mp_size_t qn, int d)
 {
   ASSERT (!gp);
   ASSERT (d >= 0);

   MPN_NORMALIZE (qp, qn);
   if (qn > 0)
     {
       struct hgcd_jacobi_ctx *ctx = (struct hgcd_jacobi_ctx *) p;
       /* NOTES: This is a bit ugly. A tp area is passed to
 	 gcd_subdiv_step, which stores q at the start of that area. We
 	 now use the rest. */
       mp_ptr tp = (mp_ptr) qp + qn;

       mpn_hgcd_matrix_update_q (ctx->M, qp, qn, d, tp);
       *ctx->bitsp = mpn_jacobi_update (*ctx->bitsp, d, qp[0] & 3);
     }
 }

 /* Perform a few steps, using some of mpn_hgcd2, subtraction and
    division. Reduces the size by almost one limb or more, but never
    below the given size s. Return new size for a and b, or 0 if no
    more steps are possible.

    If hgcd2 succeeds, needs temporary space for hgcd_matrix_mul_1, M->n
    limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
    fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
    hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
    resulting size of M.

    If N is the input size to the calling hgcd, then s = floor(N/2) +
    1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
    < N, so N is sufficient.
 */

 static mp_size_t
 hgcd_jacobi_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
 		  struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
 {
   struct hgcd_matrix1 M1;
   mp_limb_t mask;
   mp_limb_t ah, al, bh, bl;

   ASSERT (n > s);

   mask = ap[n-1] | bp[n-1];
   ASSERT (mask > 0);

   if (n == s + 1)
     {
       if (mask < 4)
 	goto subtract;

       ah = ap[n-1]; al = ap[n-2];
       bh = bp[n-1]; bl = bp[n-2];
     }
   else if (mask & GMP_NUMB_HIGHBIT)
     {
       ah = ap[n-1]; al = ap[n-2];
       bh = bp[n-1]; bl = bp[n-2];
     }
   else
     {
       int shift;

       count_leading_zeros (shift, mask);
       ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
       al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
       bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
       bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
     }

   /* Try an mpn_hgcd2 step */
   if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M1, bitsp))
     {
       /* Multiply M <- M * M1 */
       mpn_hgcd_matrix_mul_1 (M, &M1, tp);

       /* Can't swap inputs, so we need to copy. */
       MPN_COPY (tp, ap, n);
       /* Multiply M1^{-1} (a;b) */
       return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
     }

  subtract:
   {
     struct hgcd_jacobi_ctx ctx;
     ctx.M = M;
     ctx.bitsp = bitsp;

     return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_jacobi_hook, &ctx, tp);
   }
 }

 /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
    with elements of size at most (n+1)/2 - 1. Returns new size of a,
    b, or zero if no reduction is possible. */

 /* Same scratch requirements as for mpn_hgcd. */
 mp_size_t
 mpn_hgcd_jacobi (mp_ptr ap, mp_ptr bp, mp_size_t n,
 		 struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
 {
   mp_size_t s = n/2 + 1;

   mp_size_t nn;
   int success = 0;

   if (n <= s)
     /* Happens when n <= 2, a fairly uninteresting case but exercised
        by the random inputs of the testsuite. */
     return 0;

   ASSERT ((ap[n-1] | bp[n-1]) > 0);

   ASSERT ((n+1)/2 - 1 < M->alloc);

   if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
     {
       mp_size_t n2 = (3*n)/4 + 1;
       mp_size_t p = n/2;

       nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, M, bitsp, tp);
       if (nn > 0)
 	{
 	  /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
 	     = 2 (n - 1) */
 	  n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
 	  success = 1;
 	}
       while (n > n2)
 	{
 	  /* Needs n + 1 storage */
 	  nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
 	  if (!nn)
 	    return success ? n : 0;
 	  n = nn;
 	  success = 1;
 	}

       if (n > s + 2)
 	{
 	  struct hgcd_matrix M1;
 	  mp_size_t scratch;

 	  p = 2*s - n + 1;
 	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);

 	  mpn_hgcd_matrix_init(&M1, n - p, tp);
 	  nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M1, bitsp, tp + scratch);
 	  if (nn > 0)
 	    {
 	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
 	      ASSERT (M->n + 2 >= M1.n);

 	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
 		 then either q or q + 1 is a correct quotient, and M1 will
 		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
 		 rules out the case that the size of M * M1 is much
 		 smaller than the expected M->n + M1->n. */

 	      ASSERT (M->n + M1.n < M->alloc);

 	      /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
 		 = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
 	      n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);

 	      /* We need a bound for of M->n + M1.n. Let n be the original
 		 input size. Then

 		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2

 		 and it follows that

 		 M.n + M1.n <= ceil(n/2) + 1

 		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
 		 amount of needed scratch space. */
 	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
 	      success = 1;
 	    }
 	}
     }

   for (;;)
     {
       /* Needs s+3 < n */
       nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
       if (!nn)
 	return success ? n : 0;

       n = nn;
       success = 1;
     }
 }
	/* hgcd_jacobi.c.

	THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
	SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
	GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.

	Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.

	This file is part of the GNU MP Library.

	The GNU MP Library is free software; you can redistribute it and/or modify
	it under the terms of either:

	* the GNU Lesser General Public License as published by the Free
	Software Foundation; either version 3 of the License, or (at your
	option) any later version.

	or

	* the GNU General Public License as published by the Free Software
	Foundation; either version 2 of the License, or (at your option) any
	later version.

	or both in parallel, as here.

	The GNU MP Library is distributed in the hope that it will be useful, but
	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	for more details.

	You should have received copies of the GNU General Public License and the
	GNU Lesser General Public License along with the GNU MP Library. If not,
	see https://www.gnu.org/licenses/. */

	#include "gmp-impl.h"
	#include "longlong.h"

	/* This file is almost a copy of hgcd.c, with some added calls to
	mpn_jacobi_update */

	struct hgcd_jacobi_ctx
	{
	struct hgcd_matrix *M;
	unsigned *bitsp;
	};

	static void
	hgcd_jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
	mp_srcptr qp, mp_size_t qn, int d)
	{
	ASSERT (!gp);
	ASSERT (d >= 0);

	MPN_NORMALIZE (qp, qn);
	if (qn > 0)
	{
	struct hgcd_jacobi_ctx ctx = (struct hgcd_jacobi_ctx ) p;
	/* NOTES: This is a bit ugly. A tp area is passed to
	gcd_subdiv_step, which stores q at the start of that area. We
	now use the rest. */
	mp_ptr tp = (mp_ptr) qp + qn;

	mpn_hgcd_matrix_update_q (ctx->M, qp, qn, d, tp);
	ctx->bitsp = mpn_jacobi_update (ctx->bitsp, d, qp[0] & 3);
	}
	}

	/* Perform a few steps, using some of mpn_hgcd2, subtraction and
	division. Reduces the size by almost one limb or more, but never
	below the given size s. Return new size for a and b, or 0 if no
	more steps are possible.

	If hgcd2 succeeds, needs temporary space for hgcd_matrix_mul_1, M->n
	limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
	fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
	hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
	resulting size of M.

	If N is the input size to the calling hgcd, then s = floor(N/2) +
	1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
	< N, so N is sufficient.
	*/

	static mp_size_t
	hgcd_jacobi_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
	struct hgcd_matrix M, unsigned bitsp, mp_ptr tp)
	{
	struct hgcd_matrix1 M1;
	mp_limb_t mask;
	mp_limb_t ah, al, bh, bl;

	ASSERT (n > s);

	mask = ap[n-1] \| bp[n-1];
	ASSERT (mask > 0);

	if (n == s + 1)
	{
	if (mask < 4)
	goto subtract;

	ah = ap[n-1]; al = ap[n-2];
	bh = bp[n-1]; bl = bp[n-2];
	}
	else if (mask & GMP_NUMB_HIGHBIT)
	{
	ah = ap[n-1]; al = ap[n-2];
	bh = bp[n-1]; bl = bp[n-2];
	}
	else
	{
	int shift;

	count_leading_zeros (shift, mask);
	ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
	al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
	bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
	bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
	}

	/* Try an mpn_hgcd2 step */
	if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M1, bitsp))
	{
	/* Multiply M <- M * M1 */
	mpn_hgcd_matrix_mul_1 (M, &M1, tp);

	/* Can't swap inputs, so we need to copy. */
	MPN_COPY (tp, ap, n);
	/* Multiply M1^{-1} (a;b) */
	return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
	}

	subtract:
	{
	struct hgcd_jacobi_ctx ctx;
	ctx.M = M;
	ctx.bitsp = bitsp;

	return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_jacobi_hook, &ctx, tp);
	}
	}

	/* Reduces a,b until \|a-b\| fits in n/2 + 1 limbs. Constructs matrix M
	with elements of size at most (n+1)/2 - 1. Returns new size of a,
	b, or zero if no reduction is possible. */

	/* Same scratch requirements as for mpn_hgcd. */
	mp_size_t
	mpn_hgcd_jacobi (mp_ptr ap, mp_ptr bp, mp_size_t n,
	struct hgcd_matrix M, unsigned bitsp, mp_ptr tp)
	{
	mp_size_t s = n/2 + 1;

	mp_size_t nn;
	int success = 0;

	if (n <= s)
	/* Happens when n <= 2, a fairly uninteresting case but exercised
	by the random inputs of the testsuite. */
	return 0;

	ASSERT ((ap[n-1] \| bp[n-1]) > 0);

	ASSERT ((n+1)/2 - 1 < M->alloc);

	if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
	{
	mp_size_t n2 = (3*n)/4 + 1;
	mp_size_t p = n/2;

	nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, M, bitsp, tp);
	if (nn > 0)
	{
	/* Needs 2(p + M->n) <= 2(floor(n/2) + ceil(n/2) - 1)
	= 2 (n - 1) */
	n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
	success = 1;
	}
	while (n > n2)
	{
	/* Needs n + 1 storage */
	nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
	if (!nn)
	return success ? n : 0;
	n = nn;
	success = 1;
	}

	if (n > s + 2)
	{
	struct hgcd_matrix M1;
	mp_size_t scratch;

	p = 2*s - n + 1;
	scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);

	mpn_hgcd_matrix_init(&M1, n - p, tp);
	nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M1, bitsp, tp + scratch);
	if (nn > 0)
	{
	/* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
	ASSERT (M->n + 2 >= M1.n);

	/* Furthermore, assume M ends with a quotient (1, q; 0, 1),
	then either q or q + 1 is a correct quotient, and M1 will
	start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
	rules out the case that the size of M * M1 is much
	smaller than the expected M->n + M1->n. */

	ASSERT (M->n + M1.n < M->alloc);

	/* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
	= 2s <= 2(floor(n/2) + 1) <= n + 2. */
	n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);

	/* We need a bound for of M->n + M1.n. Let n be the original
	input size. Then

	ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2

	and it follows that

	M.n + M1.n <= ceil(n/2) + 1

	Then 3(M.n + M1.n) + 5 <= 3 ceil(n/2) + 8 is the
	amount of needed scratch space. */
	mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
	success = 1;
	}
	}
	}

	for (;;)
	{
	/* Needs s+3 < n */
	nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
	if (!nn)
	return success ? n : 0;

	n = nn;
	success = 1;
	}
	}