mpn/generic/toom_interpolate_8pts.c - gmp - Git at Google

 /* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.

    Contributed to the GNU project by Marco Bodrato.

    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.

 Copyright 2009, 2011, 2012 Free Software Foundation, Inc.

 This file is part of the GNU MP Library.

 The GNU MP Library is free software; you can redistribute it and/or modify
 it under the terms of either:

   * the GNU Lesser General Public License as published by the Free
     Software Foundation; either version 3 of the License, or (at your
     option) any later version.

 or

   * the GNU General Public License as published by the Free Software
     Foundation; either version 2 of the License, or (at your option) any
     later version.

 or both in parallel, as here.

 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 for more details.

 You should have received copies of the GNU General Public License and the
 GNU Lesser General Public License along with the GNU MP Library.  If not,
 see https://www.gnu.org/licenses/.  */

 #include "gmp-impl.h"

 #define BINVERT_3 MODLIMB_INVERSE_3

 #define BINVERT_15 \
   ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)

 #define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)

 #ifndef mpn_divexact_by3
 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
 #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
 #else
 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
 #endif
 #endif

 #ifndef mpn_divexact_by45
 #if GMP_NUMB_BITS % 12 == 0
 #define mpn_divexact_by45(dst,src,size) \
   (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
 #else
 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
 #define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
 #else
 #define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
 #endif
 #endif
 #endif

 #if HAVE_NATIVE_mpn_sublsh2_n_ip1
 #define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
 #else
 #define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
 #endif

 #if HAVE_NATIVE_mpn_sublsh_n
 #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
 #else
 static mp_limb_t
 DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
 {
 #if USE_MUL_1 && 0
   return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
 #else
   mp_limb_t __cy;
   __cy = mpn_lshift (ws,src,n,s);
   return __cy + mpn_sub_n (dst,dst,ws,n);
 #endif
 }
 #endif


 #if HAVE_NATIVE_mpn_subrsh
 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
 #else
 /* This is not a correct definition, it assumes no carry */
 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
 do {									\
   mp_limb_t __cy;							\
   MPN_DECR_U (dst, nd, src[0] >> s);					\
   __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
   MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
 } while (0)
 #endif

 /* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
    points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
    we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
    degree 7 (or 6), given the 8 (rsp. 7) values:

      r1 = limit at infinity of f(x) / x^7,
      r2 = f(4),
      r3 = f(-4),
      r4 = f(2),
      r5 = f(-2),
      r6 = f(1),
      r7 = f(-1),
      r8 = f(0).

    All couples of the form f(n),f(-n) must be already mixed with
    toom_couple_handling(f(n),...,f(-n),...)

    The result is stored in {pp, spt + 7*n (or 6*n)}.
    At entry, r8 is stored at {pp, 2n},
    r5 is stored at {pp + 3n, 3n + 1}.

    The other values are 2n+... limbs each (with most significant limbs small).

    All intermediate results are positive.
    Inputs are destroyed.
 */

 void
 mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
 			   mp_ptr r3, mp_ptr r7,
 			   mp_size_t spt, mp_ptr ws)
 {
   mp_limb_signed_t cy;
   mp_ptr r5, r1;
   r5 = (pp + 3 * n);			/* 3n+1 */
   r1 = (pp + 7 * n);			/* spt */

   /******************************* interpolation *****************************/

   DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
   cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
   MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);

   DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
   cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
   MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);

   r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
   cy = mpn_sub_n (r7, r7, r1, spt);
   MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);

   ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
   ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));

   ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));

   ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));

   mpn_divexact_by45 (r3, r3, 3 * n + 1);

   ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));

   ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));

   /* last interpolation steps... */
   /* ... are mixed with recomposition */

   /***************************** recomposition *******************************/
   /*
     pp[] prior to operations:
      |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp

     summation scheme for remaining operations:
      |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
      |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
 	  ||_H r3|_M r3|_L*r3|
 				  ||_H_r7|_M_r7|_L_r7|
 		      ||-H r3|-M r3|-L*r3|
 				  ||-H*r5|-M_r5|-L_r5|
   */

   cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
   cy-= mpn_sub_n (pp + n, pp + n, r5, n);
   if (cy > 0) {
     MPN_INCR_U (r7 + n, 2*n + 1, 1);
     cy = 0;
   }

   cy = mpn_sub_nc (pp + 2*n, r7 + n, r5 + n, n, -cy); /* Mr7-Mr5 */
   MPN_DECR_U (r7 + 2*n, n + 1, cy);

   cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
   r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
   cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
   if (UNLIKELY(0 > cy))
     MPN_DECR_U (r5 + n + 1, 2*n, 1);
   else
     MPN_INCR_U (r5 + n + 1, 2*n, cy);

   ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */

   cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
   MPN_INCR_U (r3 + 2*n, n + 1, cy);
   cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
   if (LIKELY(spt != n))
     MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
   else
     ASSERT (r3[3*n] + cy == 0);
 }
	/* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.

	Contributed to the GNU project by Marco Bodrato.

	THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
	SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
	GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.

	Copyright 2009, 2011, 2012 Free Software Foundation, Inc.

	This file is part of the GNU MP Library.

	The GNU MP Library is free software; you can redistribute it and/or modify
	it under the terms of either:

	* the GNU Lesser General Public License as published by the Free
	Software Foundation; either version 3 of the License, or (at your
	option) any later version.

	or

	* the GNU General Public License as published by the Free Software
	Foundation; either version 2 of the License, or (at your option) any
	later version.

	or both in parallel, as here.

	The GNU MP Library is distributed in the hope that it will be useful, but
	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	for more details.

	You should have received copies of the GNU General Public License and the
	GNU Lesser General Public License along with the GNU MP Library. If not,
	see https://www.gnu.org/licenses/. */

	#include "gmp-impl.h"

	#define BINVERT_3 MODLIMB_INVERSE_3

	#define BINVERT_15 \
	((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)

	#define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)

	#ifndef mpn_divexact_by3
	#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
	#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
	#else
	#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
	#endif
	#endif

	#ifndef mpn_divexact_by45
	#if GMP_NUMB_BITS % 12 == 0
	#define mpn_divexact_by45(dst,src,size) \
	(63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
	#else
	#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
	#define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
	#else
	#define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
	#endif
	#endif
	#endif

	#if HAVE_NATIVE_mpn_sublsh2_n_ip1
	#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
	#else
	#define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
	#endif

	#if HAVE_NATIVE_mpn_sublsh_n
	#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
	#else
	static mp_limb_t
	DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
	{
	#if USE_MUL_1 && 0
	return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
	#else
	mp_limb_t __cy;
	__cy = mpn_lshift (ws,src,n,s);
	return __cy + mpn_sub_n (dst,dst,ws,n);
	#endif
	}
	#endif


	#if HAVE_NATIVE_mpn_subrsh
	#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
	#else
	/* This is not a correct definition, it assumes no carry */
	#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \
	do { \
	mp_limb_t __cy; \
	MPN_DECR_U (dst, nd, src[0] >> s); \
	__cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \
	MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \
	} while (0)
	#endif

	/* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
	points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
	we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
	degree 7 (or 6), given the 8 (rsp. 7) values:

	r1 = limit at infinity of f(x) / x^7,
	r2 = f(4),
	r3 = f(-4),
	r4 = f(2),
	r5 = f(-2),
	r6 = f(1),
	r7 = f(-1),
	r8 = f(0).

	All couples of the form f(n),f(-n) must be already mixed with
	toom_couple_handling(f(n),...,f(-n),...)

	The result is stored in {pp, spt + 7n (or 6n)}.
	At entry, r8 is stored at {pp, 2n},
	r5 is stored at {pp + 3n, 3n + 1}.

	The other values are 2n+... limbs each (with most significant limbs small).

	All intermediate results are positive.
	Inputs are destroyed.
	*/

	void
	mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
	mp_ptr r3, mp_ptr r7,
	mp_size_t spt, mp_ptr ws)
	{
	mp_limb_signed_t cy;
	mp_ptr r5, r1;
	r5 = (pp + 3 * n); /* 3n+1 */
	r1 = (pp + 7 * n); /* spt */

	/***************************** interpolation ***************************/

	DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
	cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
	MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);

	DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
	cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
	MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);

	r7[3n] -= mpn_sub_n (r7+n, r7+n, pp, 2 n);
	cy = mpn_sub_n (r7, r7, r1, spt);
	MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);

	ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
	ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));

	ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));

	ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));

	mpn_divexact_by45 (r3, r3, 3 * n + 1);

	ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));

	ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));

	/* last interpolation steps... */
	/* ... are mixed with recomposition */

	/*************************** recomposition *****************************/
	/*
	pp[] prior to operations:
	\|_H r1\|_L r1\|____\|\|_H r5\|_M_r5\|_L r5\|_____\|_H r8\|_L r8\|pp

	summation scheme for remaining operations:
	\|____8\|n___7\|n___6\|n___5\|n___4\|n___3\|n___2\|n____\|n____\|pp
	\|_H r1\|_L r1\|____\|\|_H*r5\|_M r5\|_L r5\|_____\|_H_r8\|_L r8\|pp
	\|\|_H r3\|_M r3\|_L*r3\|
	\|\|_H_r7\|_M_r7\|_L_r7\|
	\|\|-H r3\|-M r3\|-L*r3\|
	\|\|-H*r5\|-M_r5\|-L_r5\|
	*/

	cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
	cy-= mpn_sub_n (pp + n, pp + n, r5, n);
	if (cy > 0) {
	MPN_INCR_U (r7 + n, 2*n + 1, 1);
	cy = 0;
	}

	cy = mpn_sub_nc (pp + 2n, r7 + n, r5 + n, n, -cy); / Mr7-Mr5 */
	MPN_DECR_U (r7 + 2*n, n + 1, cy);

	cy = mpn_add_n (pp + 3n, r5, r7+ 2n, n+1); /* Hr7+Lr5 */
	r5[3n]+= mpn_add_n (r5 + 2n, r5 + 2n, r3, n); / Hr5+Lr3 */
	cy-= mpn_sub_n (pp + 3n, pp + 3n, r5 + 2n, n+1); / Hr7-Hr5+Lr5-Lr3 */
	if (UNLIKELY(0 > cy))
	MPN_DECR_U (r5 + n + 1, 2*n, 1);
	else
	MPN_INCR_U (r5 + n + 1, 2*n, cy);

	ASSERT_NOCARRY(mpn_sub_n(pp + 4n, r5 + n, r3 + n, 2n +1)); /* Mr5-Mr3,Hr5-Hr3 */

	cy = mpn_add_1 (pp + 6n, r3 + n, n, pp[6n]);
	MPN_INCR_U (r3 + 2*n, n + 1, cy);
	cy = mpn_add_n (pp + 7n, pp + 7n, r3 + 2*n, n);
	if (LIKELY(spt != n))
	MPN_INCR_U (pp + 8n, spt - n, cy + r3[3n]);
	else
	ASSERT (r3[3*n] + cy == 0);
	}