| dnl ARM64 mpn_divrem_1 and mpn_preinv_divrem_1. |
| |
| dnl Contributed to the GNU project by Torbjรถrn Granlund. |
| |
| dnl Copyright 2020 Free Software Foundation, Inc. |
| |
| dnl This file is part of the GNU MP Library. |
| dnl |
| dnl The GNU MP Library is free software; you can redistribute it and/or modify |
| dnl it under the terms of either: |
| dnl |
| dnl * the GNU Lesser General Public License as published by the Free |
| dnl Software Foundation; either version 3 of the License, or (at your |
| dnl option) any later version. |
| dnl |
| dnl or |
| dnl |
| dnl * the GNU General Public License as published by the Free Software |
| dnl Foundation; either version 2 of the License, or (at your option) any |
| dnl later version. |
| dnl |
| dnl or both in parallel, as here. |
| dnl |
| dnl The GNU MP Library is distributed in the hope that it will be useful, but |
| dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| dnl for more details. |
| dnl |
| dnl You should have received copies of the GNU General Public License and the |
| dnl GNU Lesser General Public License along with the GNU MP Library. If not, |
| dnl see https://www.gnu.org/licenses/. |
| |
| include(`config.m4') |
| |
| dnl TODO |
| dnl * Handle the most significant quotient limb for the unnormalised case |
| dnl specially, just like in the C code. (It is very often 0.) |
| |
| define(`qp_arg', x0) |
| define(`fn_arg', x1) |
| define(`np_arg', x2) |
| define(`n_arg', x3) |
| define(`d_arg', x4) |
| define(`dinv_arg', x5) |
| define(`cnt_arg', x6) |
| |
| define(`qp', x19) |
| define(`np', x20) |
| define(`n', x21) |
| define(`d', x22) |
| define(`fn', x24) |
| define(`dinv', x0) |
| define(`cnt', x23) |
| define(`tnc', x8) |
| |
| dnl mp_limb_t |
| dnl mpn_divrem_1 (mp_ptr qp, mp_size_t fn, |
| dnl mp_srcptr np, mp_size_t n, |
| dnl mp_limb_t d_unnorm) |
| |
| dnl mp_limb_t |
| dnl mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn, |
| dnl mp_srcptr np, mp_size_t n, |
| dnl mp_limb_t d_unnorm, mp_limb_t dinv, int cnt) |
| |
| ASM_START() |
| |
| PROLOGUE(mpn_preinv_divrem_1) |
| cbz n_arg, L(fz) |
| stp x29, x30, [sp, #-80]! |
| mov x29, sp |
| stp x19, x20, [sp, #16] |
| stp x21, x22, [sp, #32] |
| stp x23, x24, [sp, #48] |
| |
| sub n, n_arg, #1 |
| add x7, n, fn_arg |
| add np, np_arg, n, lsl #3 |
| add qp, qp_arg, x7, lsl #3 |
| mov fn, fn_arg |
| mov d, d_arg |
| mov dinv, dinv_arg |
| tbnz d_arg, #63, L(nentry) |
| mov cnt, cnt_arg |
| b L(uentry) |
| EPILOGUE() |
| |
| PROLOGUE(mpn_divrem_1) |
| cbz n_arg, L(fz) |
| stp x29, x30, [sp, #-80]! |
| mov x29, sp |
| stp x19, x20, [sp, #16] |
| stp x21, x22, [sp, #32] |
| stp x23, x24, [sp, #48] |
| |
| sub n, n_arg, #1 |
| add x7, n, fn_arg |
| add np, np_arg, n, lsl #3 |
| add qp, qp_arg, x7, lsl #3 |
| mov fn, fn_arg |
| mov d, d_arg |
| tbnz d_arg, #63, L(normalised) |
| |
| L(unnorm): |
| clz cnt, d |
| lsl x0, d, cnt |
| bl GSYM_PREFIX`'MPN(invert_limb) |
| L(uentry): |
| lsl d, d, cnt |
| ldr x7, [np], #-8 |
| sub tnc, xzr, cnt |
| lsr x11, x7, tnc C r |
| lsl x1, x7, cnt |
| cbz n, L(uend) |
| |
| L(utop):ldr x7, [np], #-8 |
| add x2, x11, #1 |
| mul x10, x11, dinv |
| umulh x17, x11, dinv |
| lsr x9, x7, tnc |
| orr x1, x1, x9 |
| adds x10, x1, x10 |
| adc x2, x2, x17 |
| msub x11, d, x2, x1 |
| lsl x1, x7, cnt |
| cmp x10, x11 |
| add x14, x11, d |
| csel x11, x14, x11, cc |
| sbc x2, x2, xzr |
| cmp x11, d |
| bcs L(ufx) |
| L(uok): str x2, [qp], #-8 |
| sub n, n, #1 |
| cbnz n, L(utop) |
| |
| L(uend):add x2, x11, #1 |
| mul x10, x11, dinv |
| umulh x17, x11, dinv |
| adds x10, x1, x10 |
| adc x2, x2, x17 |
| msub x11, d, x2, x1 |
| cmp x10, x11 |
| add x14, x11, d |
| csel x11, x14, x11, cc |
| sbc x2, x2, xzr |
| subs x14, x11, d |
| adc x2, x2, xzr |
| csel x11, x14, x11, cs |
| str x2, [qp], #-8 |
| |
| cbnz fn, L(ftop) |
| lsr x0, x11, cnt |
| ldp x19, x20, [sp, #16] |
| ldp x21, x22, [sp, #32] |
| ldp x23, x24, [sp, #48] |
| ldp x29, x30, [sp], #80 |
| ret |
| |
| L(ufx): add x2, x2, #1 |
| sub x11, x11, d |
| b L(uok) |
| |
| |
| L(normalised): |
| mov x0, d |
| bl GSYM_PREFIX`'MPN(invert_limb) |
| L(nentry): |
| ldr x7, [np], #-8 |
| subs x14, x7, d |
| adc x2, xzr, xzr C hi q limb |
| csel x11, x14, x7, cs |
| b L(nok) |
| |
| L(ntop):ldr x1, [np], #-8 |
| add x2, x11, #1 |
| mul x10, x11, dinv |
| umulh x17, x11, dinv |
| adds x10, x1, x10 |
| adc x2, x2, x17 |
| msub x11, d, x2, x1 |
| cmp x10, x11 |
| add x14, x11, d |
| csel x11, x14, x11, cc C remainder |
| sbc x2, x2, xzr |
| cmp x11, d |
| bcs L(nfx) |
| L(nok): str x2, [qp], #-8 |
| sub n, n, #1 |
| tbz n, #63, L(ntop) |
| |
| L(nend):cbnz fn, L(frac) |
| mov x0, x11 |
| ldp x19, x20, [sp, #16] |
| ldp x21, x22, [sp, #32] |
| ldp x23, x24, [sp, #48] |
| ldp x29, x30, [sp], #80 |
| ret |
| |
| L(nfx): add x2, x2, #1 |
| sub x11, x11, d |
| b L(nok) |
| |
| L(frac):mov cnt, #0 |
| L(ftop):add x2, x11, #1 |
| mul x10, x11, dinv |
| umulh x17, x11, dinv |
| add x2, x2, x17 |
| msub x11, d, x2, xzr |
| cmp x10, x11 |
| add x14, x11, d |
| csel x11, x14, x11, cc C remainder |
| sbc x2, x2, xzr |
| str x2, [qp], #-8 |
| sub fn, fn, #1 |
| cbnz fn, L(ftop) |
| |
| lsr x0, x11, cnt |
| ldp x19, x20, [sp, #16] |
| ldp x21, x22, [sp, #32] |
| ldp x23, x24, [sp, #48] |
| ldp x29, x30, [sp], #80 |
| ret |
| |
| C Block zero. We need this for the degenerated case of n = 0, fn != 0. |
| L(fz): cbz fn_arg, L(zend) |
| L(ztop):str xzr, [qp_arg], #8 |
| sub fn_arg, fn_arg, #1 |
| cbnz fn_arg, L(ztop) |
| L(zend):mov x0, #0 |
| ret |
| EPILOGUE() |