google3/third_party/grte/v4_src/glibc-2.19/sysdeps/sparc/sparc32/sparcv8/mul_1.S - GRTEv4 - Git at Google

 ! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
 ! store the product in a second limb vector.

 ! Copyright (C) 1992-2014 Free Software Foundation, Inc.

 ! This file is part of the GNU MP Library.

 ! The GNU MP Library is free software; you can redistribute it and/or modify
 ! it under the terms of the GNU Lesser General Public License as published by
 ! the Free Software Foundation; either version 2.1 of the License, or (at your
 ! option) any later version.

 ! The GNU MP Library is distributed in the hope that it will be useful, but
 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 ! License for more details.

 ! You should have received a copy of the GNU Lesser General Public License
 ! along with the GNU MP Library; see the file COPYING.LIB.  If not,
 ! see <http://www.gnu.org/licenses/>.


 ! INPUT PARAMETERS
 ! res_ptr	o0
 ! s1_ptr	o1
 ! size		o2
 ! s2_limb	o3

 #include <sysdep.h>

 ENTRY(__mpn_mul_1)
 	sll	%o2,4,%g1
 	mov	%o7,%g4			! Save return address register
 	and	%g1,(4-1)<<4,%g1
 1:	call	2f
 	 add	%o7,3f-1b,%g3
 2:	mov	%g4,%o7			! Restore return address register
 	jmp	%g3+%g1
 	 ld	[%o1+0],%o4	! 1

 	.align	4
 3:
 LOC(00):
 	add	%o0,-4,%o0
 	add	%o1,-4,%o1
 	b	LOC(loop00)		/* 4, 8, 12, ... */
 	 orcc	%g0,%g0,%g2
 LOC(01):
 	b	LOC(loop01)		/* 1, 5, 9, ... */
 	 orcc	%g0,%g0,%g2
 	nop
 	nop
 LOC(10):
 	add	%o0,-12,%o0	/* 2, 6, 10, ... */
 	add	%o1,4,%o1
 	b	LOC(loop10)
 	 orcc	%g0,%g0,%g2
 	nop
 LOC(11):
 	add	%o0,-8,%o0	/* 3, 7, 11, ... */
 	add	%o1,-8,%o1
 	b	LOC(loop11)
 	 orcc	%g0,%g0,%g2

 LOC(loop):
 	addcc	%g3,%g2,%g3	! 1
 	ld	[%o1+4],%o4	! 2
 	st	%g3,[%o0+0]	! 1
 	rd	%y,%g2		! 1
 LOC(loop00):
 	umul	%o4,%o3,%g3	! 2
 	addxcc	%g3,%g2,%g3	! 2
 	ld	[%o1+8],%o4	! 3
 	st	%g3,[%o0+4]	! 2
 	rd	%y,%g2		! 2
 LOC(loop11):
 	umul	%o4,%o3,%g3	! 3
 	addxcc	%g3,%g2,%g3	! 3
 	ld	[%o1+12],%o4	! 4
 	add	%o1,16,%o1
 	st	%g3,[%o0+8]	! 3
 	rd	%y,%g2		! 3
 LOC(loop10):
 	umul	%o4,%o3,%g3	! 4
 	addxcc	%g3,%g2,%g3	! 4
 	ld	[%o1+0],%o4	! 1
 	st	%g3,[%o0+12]	! 4
 	add	%o0,16,%o0
 	rd	%y,%g2		! 4
 	addx	%g0,%g2,%g2
 LOC(loop01):
 	addcc	%o2,-4,%o2
 	bg	LOC(loop)
 	 umul	%o4,%o3,%g3	! 1

 	addcc	%g3,%g2,%g3	! 4
 	st	%g3,[%o0+0]	! 4
 	rd	%y,%g2		! 4
 	retl
 	 addx	%g0,%g2,%o0

 END(__mpn_mul_1)
	! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
	! store the product in a second limb vector.

	! Copyright (C) 1992-2014 Free Software Foundation, Inc.

	! This file is part of the GNU MP Library.

	! The GNU MP Library is free software; you can redistribute it and/or modify
	! it under the terms of the GNU Lesser General Public License as published by
	! the Free Software Foundation; either version 2.1 of the License, or (at your
	! option) any later version.

	! The GNU MP Library is distributed in the hope that it will be useful, but
	! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
	! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
	! License for more details.

	! You should have received a copy of the GNU Lesser General Public License
	! along with the GNU MP Library; see the file COPYING.LIB. If not,
	! see <http://www.gnu.org/licenses/>.


	! INPUT PARAMETERS
	! res_ptr o0
	! s1_ptr o1
	! size o2
	! s2_limb o3

	#include <sysdep.h>

	ENTRY(__mpn_mul_1)
	sll %o2,4,%g1
	mov %o7,%g4 ! Save return address register
	and %g1,(4-1)<<4,%g1
	1: call 2f
	add %o7,3f-1b,%g3
	2: mov %g4,%o7 ! Restore return address register
	jmp %g3+%g1
	ld [%o1+0],%o4 ! 1

	.align 4
	3:
	LOC(00):
	add %o0,-4,%o0
	add %o1,-4,%o1
	b LOC(loop00) /* 4, 8, 12, ... */
	orcc %g0,%g0,%g2
	LOC(01):
	b LOC(loop01) /* 1, 5, 9, ... */
	orcc %g0,%g0,%g2
	nop
	nop
	LOC(10):
	add %o0,-12,%o0 /* 2, 6, 10, ... */
	add %o1,4,%o1
	b LOC(loop10)
	orcc %g0,%g0,%g2
	nop
	LOC(11):
	add %o0,-8,%o0 /* 3, 7, 11, ... */
	add %o1,-8,%o1
	b LOC(loop11)
	orcc %g0,%g0,%g2

	LOC(loop):
	addcc %g3,%g2,%g3 ! 1
	ld [%o1+4],%o4 ! 2
	st %g3,[%o0+0] ! 1
	rd %y,%g2 ! 1
	LOC(loop00):
	umul %o4,%o3,%g3 ! 2
	addxcc %g3,%g2,%g3 ! 2
	ld [%o1+8],%o4 ! 3
	st %g3,[%o0+4] ! 2
	rd %y,%g2 ! 2
	LOC(loop11):
	umul %o4,%o3,%g3 ! 3
	addxcc %g3,%g2,%g3 ! 3
	ld [%o1+12],%o4 ! 4
	add %o1,16,%o1
	st %g3,[%o0+8] ! 3
	rd %y,%g2 ! 3
	LOC(loop10):
	umul %o4,%o3,%g3 ! 4
	addxcc %g3,%g2,%g3 ! 4
	ld [%o1+0],%o4 ! 1
	st %g3,[%o0+12] ! 4
	add %o0,16,%o0
	rd %y,%g2 ! 4
	addx %g0,%g2,%g2
	LOC(loop01):
	addcc %o2,-4,%o2
	bg LOC(loop)
	umul %o4,%o3,%g3 ! 1

	addcc %g3,%g2,%g3 ! 4
	st %g3,[%o0+0] ! 4
	rd %y,%g2 ! 4
	retl
	addx %g0,%g2,%o0

	END(__mpn_mul_1)