mpn/generic/div_q.c - gmp - Git at Google

 /* mpn_div_q -- division for arbitrary size operands.

    Contributed to the GNU project by Torbjorn Granlund.

    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.

 Copyright 2009, 2010, 2015, 2018 Free Software Foundation, Inc.

 This file is part of the GNU MP Library.

 The GNU MP Library is free software; you can redistribute it and/or modify
 it under the terms of either:

   * the GNU Lesser General Public License as published by the Free
     Software Foundation; either version 3 of the License, or (at your
     option) any later version.

 or

   * the GNU General Public License as published by the Free Software
     Foundation; either version 2 of the License, or (at your option) any
     later version.

 or both in parallel, as here.

 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 for more details.

 You should have received copies of the GNU General Public License and the
 GNU Lesser General Public License along with the GNU MP Library.  If not,
 see https://www.gnu.org/licenses/.  */

 #include "gmp-impl.h"
 #include "longlong.h"


 /* Compute Q = N/D with truncation.
      N = {np,nn}
      D = {dp,dn}
      Q = {qp,nn-dn+1}
      T = {scratch,nn+1} is scratch space
    N and D are both untouched by the computation.
    N and T may overlap; pass the same space if N is irrelevant after the call,
    but note that tp needs an extra limb.

    Operand requirements:
      N >= D > 0
      dp[dn-1] != 0
      No overlap between the N, D, and Q areas.

    This division function does not clobber its input operands, since it is
    intended to support average-O(qn) division, and for that to be effective, it
    cannot put requirements on callers to copy a O(nn) operand.

    If a caller does not care about the value of {np,nn+1} after calling this
    function, it should pass np also for the scratch argument.  This function
    will then save some time and space by avoiding allocation and copying.
    (FIXME: Is this a good design?  We only really save any copying for
    already-normalised divisors, which should be rare.  It also prevents us from
    reasonably asking for all scratch space we need.)

    We write nn-dn+1 limbs for the quotient, but return void.  Why not return
    the most significant quotient limb?  Look at the 4 main code blocks below
    (consisting of an outer if-else where each arm contains an if-else). It is
    tricky for the first code block, since the mpn_*_div_q calls will typically
    generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless
    we generate the most significant quotient limb here, before calling
    mpn_*_div_q, or put the quotient in a temporary area.  Since this is a
    critical division case (the SB sub-case in particular) copying is not a good
    idea.

    It might make sense to split the if-else parts of the (qn + FUDGE
    >= dn) blocks into separate functions, since we could promise quite
    different things to callers in these two cases.  The 'then' case
    benefits from np=scratch, and it could perhaps even tolerate qp=np,
    saving some headache for many callers.

    FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size
    operands, we do not reuse the huge scratch for adjustments.  This can be a
    serious waste of memory for the largest operands.
 */

 /* FUDGE determines when to try getting an approximate quotient from the upper
    parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2
    for the code to be correct.  */
 #define FUDGE 5			/* FIXME: tune this */

 #define DC_DIV_Q_THRESHOLD      DC_DIVAPPR_Q_THRESHOLD
 #define MU_DIV_Q_THRESHOLD      MU_DIVAPPR_Q_THRESHOLD
 #define MUPI_DIV_Q_THRESHOLD  MUPI_DIVAPPR_Q_THRESHOLD
 #ifndef MUPI_DIVAPPR_Q_THRESHOLD
 #define MUPI_DIVAPPR_Q_THRESHOLD  MUPI_DIV_QR_THRESHOLD
 #endif

 void
 mpn_div_q (mp_ptr qp,
 	   mp_srcptr np, mp_size_t nn,
 	   mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
 {
   mp_ptr new_dp, new_np, tp, rp;
   mp_limb_t cy, dh, qh;
   mp_size_t new_nn, qn;
   gmp_pi1_t dinv;
   int cnt;
   TMP_DECL;
   TMP_MARK;

   ASSERT (nn >= dn);
   ASSERT (dn > 0);
   ASSERT (dp[dn - 1] != 0);
   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
   ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));

   ASSERT_ALWAYS (FUDGE >= 2);

   dh = dp[dn - 1];
   if (dn == 1)
     {
       mpn_divrem_1 (qp, 0L, np, nn, dh);
       return;
     }

   qn = nn - dn + 1;		/* Quotient size, high limb might be zero */

   if (qn + FUDGE >= dn)
     {
       /* |________________________|
                           |_______|  */
       new_np = scratch;

       if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
 	{
 	  count_leading_zeros (cnt, dh);

 	  cy = mpn_lshift (new_np, np, nn, cnt);
 	  new_np[nn] = cy;
 	  new_nn = nn + (cy != 0);

 	  new_dp = TMP_ALLOC_LIMBS (dn);
 	  mpn_lshift (new_dp, dp, dn, cnt);

 	  if (dn == 2)
 	    {
 	      qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
 	    }
 	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
 		   BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
 	    {
 	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
 	      qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
 	    }
 	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
 		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
 		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
 		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
 	    {
 	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
 	      qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
 	    }
 	  else
 	    {
 	      mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
 	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
 	      qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
 	    }
 	  if (cy == 0)
 	    qp[qn - 1] = qh;
 	  else
 	    ASSERT (qh == 0);
 	}
       else  /* divisor is already normalised */
 	{
 	  if (new_np != np)
 	    MPN_COPY (new_np, np, nn);

 	  if (dn == 2)
 	    {
 	      qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
 	    }
 	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
 		   BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
 	    {
 	      invert_pi1 (dinv, dh, dp[dn - 2]);
 	      qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
 	    }
 	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
 		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
 		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
 		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
 	    {
 	      invert_pi1 (dinv, dh, dp[dn - 2]);
 	      qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
 	    }
 	  else
 	    {
 	      mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
 	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
 	      qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
 	    }
 	  qp[nn - dn] = qh;
 	}
     }
   else
     {
       /* |________________________|
                 |_________________|  */
       tp = TMP_ALLOC_LIMBS (qn + 1);

       new_np = scratch;
       new_nn = 2 * qn + 1;
       if (new_np == np)
 	/* We need {np,nn} to remain untouched until the final adjustment, so
 	   we need to allocate separate space for new_np.  */
 	new_np = TMP_ALLOC_LIMBS (new_nn + 1);


       if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
 	{
 	  count_leading_zeros (cnt, dh);

 	  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
 	  new_np[new_nn] = cy;

 	  new_nn += (cy != 0);

 	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
 	  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
 	  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);

 	  if (qn + 1 == 2)
 	    {
 	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
 	    }
 	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
 	    {
 	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
 	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
 	    }
 	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
 	    {
 	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
 	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
 	    }
 	  else
 	    {
 	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
 	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
 	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
 	    }
 	  if (cy == 0)
 	    tp[qn] = qh;
 	  else if (UNLIKELY (qh != 0))
 	    {
 	      /* This happens only when the quotient is close to B^n and
 		 mpn_*_divappr_q returned B^n.  */
 	      mp_size_t i, n;
 	      n = new_nn - (qn + 1);
 	      for (i = 0; i < n; i++)
 		tp[i] = GMP_NUMB_MAX;
 	      qh = 0;		/* currently ignored */
 	    }
 	}
       else  /* divisor is already normalised */
 	{
 	  MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless if MU will be used */

 	  new_dp = (mp_ptr) dp + dn - (qn + 1);

 	  if (qn == 2 - 1)
 	    {
 	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
 	    }
 	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
 	    {
 	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
 	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
 	    }
 	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
 	    {
 	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
 	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
 	    }
 	  else
 	    {
 	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
 	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
 	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
 	    }
 	  tp[qn] = qh;
 	}

       MPN_COPY (qp, tp + 1, qn);
       if (tp[0] <= 4)
         {
 	  mp_size_t rn;

           rp = TMP_ALLOC_LIMBS (dn + qn);
           mpn_mul (rp, dp, dn, tp + 1, qn);
 	  rn = dn + qn;
 	  rn -= rp[rn - 1] == 0;

           if (rn > nn || mpn_cmp (np, rp, nn) < 0)
             MPN_DECR_U (qp, qn, 1);
         }
     }

   TMP_FREE;
 }
	/* mpn_div_q -- division for arbitrary size operands.

	Contributed to the GNU project by Torbjorn Granlund.

	THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
	SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
	GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.

	Copyright 2009, 2010, 2015, 2018 Free Software Foundation, Inc.

	This file is part of the GNU MP Library.

	The GNU MP Library is free software; you can redistribute it and/or modify
	it under the terms of either:

	* the GNU Lesser General Public License as published by the Free
	Software Foundation; either version 3 of the License, or (at your
	option) any later version.

	or

	* the GNU General Public License as published by the Free Software
	Foundation; either version 2 of the License, or (at your option) any
	later version.

	or both in parallel, as here.

	The GNU MP Library is distributed in the hope that it will be useful, but
	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	for more details.

	You should have received copies of the GNU General Public License and the
	GNU Lesser General Public License along with the GNU MP Library. If not,
	see https://www.gnu.org/licenses/. */

	#include "gmp-impl.h"
	#include "longlong.h"


	/* Compute Q = N/D with truncation.
	N = {np,nn}
	D = {dp,dn}
	Q = {qp,nn-dn+1}
	T = {scratch,nn+1} is scratch space
	N and D are both untouched by the computation.
	N and T may overlap; pass the same space if N is irrelevant after the call,
	but note that tp needs an extra limb.

	Operand requirements:
	N >= D > 0
	dp[dn-1] != 0
	No overlap between the N, D, and Q areas.

	This division function does not clobber its input operands, since it is
	intended to support average-O(qn) division, and for that to be effective, it
	cannot put requirements on callers to copy a O(nn) operand.

	If a caller does not care about the value of {np,nn+1} after calling this
	function, it should pass np also for the scratch argument. This function
	will then save some time and space by avoiding allocation and copying.
	(FIXME: Is this a good design? We only really save any copying for
	already-normalised divisors, which should be rare. It also prevents us from
	reasonably asking for all scratch space we need.)

	We write nn-dn+1 limbs for the quotient, but return void. Why not return
	the most significant quotient limb? Look at the 4 main code blocks below
	(consisting of an outer if-else where each arm contains an if-else). It is
	tricky for the first code block, since the mpn_*_div_q calls will typically
	generate all nn-dn+1 and return 0 or 1. I don't see how to fix that unless
	we generate the most significant quotient limb here, before calling
	mpn_*_div_q, or put the quotient in a temporary area. Since this is a
	critical division case (the SB sub-case in particular) copying is not a good
	idea.

	It might make sense to split the if-else parts of the (qn + FUDGE
	>= dn) blocks into separate functions, since we could promise quite
	different things to callers in these two cases. The 'then' case
	benefits from np=scratch, and it could perhaps even tolerate qp=np,
	saving some headache for many callers.

	FIXME: Scratch allocation leaves a lot to be desired. E.g., for the MU size
	operands, we do not reuse the huge scratch for adjustments. This can be a
	serious waste of memory for the largest operands.
	*/

	/* FUDGE determines when to try getting an approximate quotient from the upper
	parts of the dividend and divisor, then adjust. N.B. FUDGE must be >= 2
	for the code to be correct. */
	#define FUDGE 5 /* FIXME: tune this */

	#define DC_DIV_Q_THRESHOLD DC_DIVAPPR_Q_THRESHOLD
	#define MU_DIV_Q_THRESHOLD MU_DIVAPPR_Q_THRESHOLD
	#define MUPI_DIV_Q_THRESHOLD MUPI_DIVAPPR_Q_THRESHOLD
	#ifndef MUPI_DIVAPPR_Q_THRESHOLD
	#define MUPI_DIVAPPR_Q_THRESHOLD MUPI_DIV_QR_THRESHOLD
	#endif

	void
	mpn_div_q (mp_ptr qp,
	mp_srcptr np, mp_size_t nn,
	mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
	{
	mp_ptr new_dp, new_np, tp, rp;
	mp_limb_t cy, dh, qh;
	mp_size_t new_nn, qn;
	gmp_pi1_t dinv;
	int cnt;
	TMP_DECL;
	TMP_MARK;

	ASSERT (nn >= dn);
	ASSERT (dn > 0);
	ASSERT (dp[dn - 1] != 0);
	ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
	ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
	ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));

	ASSERT_ALWAYS (FUDGE >= 2);

	dh = dp[dn - 1];
	if (dn == 1)
	{
	mpn_divrem_1 (qp, 0L, np, nn, dh);
	return;
	}

	qn = nn - dn + 1; /* Quotient size, high limb might be zero */

	if (qn + FUDGE >= dn)
	{
	/* \|________________________\|
	\|_______\| */
	new_np = scratch;

	if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
	{
	count_leading_zeros (cnt, dh);

	cy = mpn_lshift (new_np, np, nn, cnt);
	new_np[nn] = cy;
	new_nn = nn + (cy != 0);

	new_dp = TMP_ALLOC_LIMBS (dn);
	mpn_lshift (new_dp, dp, dn, cnt);

	if (dn == 2)
	{
	qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
	}
	else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) \|\|
	BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
	{
	invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
	qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
	}
	else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) \|\| /* fast condition */
	BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) \|\| /* fast condition */
	(double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
	+ (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */
	{
	invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
	qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
	}
	else
	{
	mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
	mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
	qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
	}
	if (cy == 0)
	qp[qn - 1] = qh;
	else
	ASSERT (qh == 0);
	}
	else /* divisor is already normalised */
	{
	if (new_np != np)
	MPN_COPY (new_np, np, nn);

	if (dn == 2)
	{
	qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
	}
	else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) \|\|
	BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
	{
	invert_pi1 (dinv, dh, dp[dn - 2]);
	qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
	}
	else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) \|\| /* fast condition */
	BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) \|\| /* fast condition */
	(double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
	+ (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */
	{
	invert_pi1 (dinv, dh, dp[dn - 2]);
	qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
	}
	else
	{
	mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
	mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
	qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
	}
	qp[nn - dn] = qh;
	}
	}
	else
	{
	/* \|________________________\|
	\|_________________\| */
	tp = TMP_ALLOC_LIMBS (qn + 1);

	new_np = scratch;
	new_nn = 2 * qn + 1;
	if (new_np == np)
	/* We need {np,nn} to remain untouched until the final adjustment, so
	we need to allocate separate space for new_np. */
	new_np = TMP_ALLOC_LIMBS (new_nn + 1);


	if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
	{
	count_leading_zeros (cnt, dh);

	cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
	new_np[new_nn] = cy;

	new_nn += (cy != 0);

	new_dp = TMP_ALLOC_LIMBS (qn + 1);
	mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
	new_dp[0] \|= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);

	if (qn + 1 == 2)
	{
	qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
	}
	else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
	{
	invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
	qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
	}
	else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
	{
	invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
	qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
	}
	else
	{
	mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
	mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
	qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
	}
	if (cy == 0)
	tp[qn] = qh;
	else if (UNLIKELY (qh != 0))
	{
	/* This happens only when the quotient is close to B^n and
	mpn__divappr_q returned B^n. /
	mp_size_t i, n;
	n = new_nn - (qn + 1);
	for (i = 0; i < n; i++)
	tp[i] = GMP_NUMB_MAX;
	qh = 0; /* currently ignored */
	}
	}
	else /* divisor is already normalised */
	{
	MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless if MU will be used */

	new_dp = (mp_ptr) dp + dn - (qn + 1);

	if (qn == 2 - 1)
	{
	qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
	}
	else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
	{
	invert_pi1 (dinv, dh, new_dp[qn - 1]);
	qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
	}
	else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
	{
	invert_pi1 (dinv, dh, new_dp[qn - 1]);
	qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
	}
	else
	{
	mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
	mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
	qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
	}
	tp[qn] = qh;
	}

	MPN_COPY (qp, tp + 1, qn);
	if (tp[0] <= 4)
	{
	mp_size_t rn;

	rp = TMP_ALLOC_LIMBS (dn + qn);
	mpn_mul (rp, dp, dn, tp + 1, qn);
	rn = dn + qn;
	rn -= rp[rn - 1] == 0;

	if (rn > nn \|\| mpn_cmp (np, rp, nn) < 0)
	MPN_DECR_U (qp, qn, 1);
	}
	}

	TMP_FREE;
	}