src/gas/round.S - open64_libacml_mv - Git at Google


 #
 #  (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
 #
 #  This file is part of libacml_mv.
 #
 #  libacml_mv is free software; you can redistribute it and/or
 #  modify it under the terms of the GNU Lesser General Public
 #  License as published by the Free Software Foundation; either
 #  version 2.1 of the License, or (at your option) any later version.
 #
 #  libacml_mv is distributed in the hope that it will be useful,
 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 #  Lesser General Public License for more details.
 #
 #  You should have received a copy of the GNU Lesser General Public
 #  License along with libacml_mv.  If not, see
 #  <http://www.gnu.org/licenses/>.
 #
 #


 # round.S
 #
 # An implementation of the round libm function.
 #
 # Prototype:
 #
 #     double round(double x);
 #

 #
 #   Algorithm: First get the exponent of the input
 #              double precision number.
 #              IF exponent is greater than 51 then return the
 #              input as is.
 #              IF exponent is less than 0 then force an overflow
 #              by adding a huge number and subtracting with the
 #              same number.
 #              IF exponent is greater than 0 then add 0.5 and
 #              and shift the mantissa bits based on the exponent
 #              value to discard the fractional component.
 #
 #

 #include "fn_macros.h"
 #define fname FN_PROTOTYPE(round)
 #define fname_special _round_special


 # local variable storage offsets


 #ifdef __ELF__
 .section .note.GNU-stack,"",@progbits
 #endif

 .text
 .align 16
 .p2align 4,,15
 .globl fname
 .type fname,@function
 #in sse5 there is a roundss,roundsd instruction
 fname:
     movsd .L__2p52_plus_one(%rip),%xmm4
     movsd .L__sign_mask_64(%rip),%xmm5
     mov $52,%r10
     #take 3 copies of the input xmm0
     movsd %xmm0,%xmm1
     movsd %xmm0,%xmm2
     movsd %xmm0,%xmm3
     #get the Most signifacnt half word of the input number in r9
     pand .L__exp_mask_64(%rip), %xmm1
     pextrw $3,%xmm1,%r9
     cmp $0X7FF0,%r9
     #Check for infinity inputs
     jz .L__is_infinity
     movsd .L__sign_mask_64(%rip), %xmm1
     pandn %xmm2,%xmm1 # xmm1 now stores the sign of the input number
     #On shifting r9 and subtracting with 0x3FF
     #r9 stores the exponent.
     shr   $0X4,%r9
     sub $0x3FF,%r9
     cmp $0x00, %r9
     jl .L__number_less_than_zero

     #IF exponent is greater than 0
 .L__number_greater_than_zero:
     cmp $51,%r9
     jg .L__is_greater_than_2p52

     #IF exponent is greater than 0 and less than 2^52
     pand .L__sign_mask_64(%rip),%xmm0
     #add with 0.5
     addsd .L__zero_point_5(%rip),%xmm0
     movsd %xmm0,%xmm5

     pand  .L__exp_mask_64(%rip),%xmm5
     pand  .L__mantissa_mask_64(%rip),%xmm0
     #r10 = r9(input exponent) - r10(52=mantissa length)
     sub %r9,%r10
     movd %r10, %xmm2
     #do right and left shift by (input exp - mantissa length)
     psrlq %xmm2,%xmm0
     psllq %xmm2,%xmm0
     #OR the input exponent with the input sign
     por  %xmm1,%xmm5
     #finally OR with the matissa
     por %xmm5,%xmm0
     ret

     #IF exponent is less than 0
 .L__number_less_than_zero:
     pand %xmm5,%xmm3 # xmm3 =abs(input)
     addsd %xmm4,%xmm3# add (2^52 + 1)
     subsd %xmm4,%xmm3# sub (2^52 + 1)
     por %xmm1, %xmm3 # OR with the sign of the input number
     movsd %xmm3,%xmm0
     ret

     #IF the input is infinity
 .L__is_infinity:
     comisd %xmm4,%xmm0
     jnp .L__is_zero #parity flag is raised
                     #IF one of theinputs is a Nan
 .L__is_nan :
     por .L__qnan_mask_64(%rip),%xmm0 # set the QNan Bit
 .L__is_zero :
 .L__is_greater_than_2p52:
     ret

 .align 16
 .L__sign_mask_64:          .quad 0x7FFFFFFFFFFFFFFF
                            .quad 0

 .L__qnan_mask_64:          .quad 0x0008000000000000
                            .quad 0
 .L__exp_mask_64:           .quad 0x7FF0000000000000
                            .quad 0
 .L__mantissa_mask_64:      .quad 0x000FFFFFFFFFFFFF
                            .quad 0
 .L__zero:                  .quad 0x0000000000000000
                            .quad 0
 .L__2p52_plus_one:         .quad 0x4330000000000001  # = 4503599627370497.0
                            .quad 0
 .L__zero_point_5:          .quad 0x3FE0000000000001  # = 00.5
                            .quad 0

	#
	# (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
	#
	# This file is part of libacml_mv.
	#
	# libacml_mv is free software; you can redistribute it and/or
	# modify it under the terms of the GNU Lesser General Public
	# License as published by the Free Software Foundation; either
	# version 2.1 of the License, or (at your option) any later version.
	#
	# libacml_mv is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	# Lesser General Public License for more details.
	#
	# You should have received a copy of the GNU Lesser General Public
	# License along with libacml_mv. If not, see
	# <http://www.gnu.org/licenses/>.
	#
	#


	# round.S
	#
	# An implementation of the round libm function.
	#
	# Prototype:
	#
	# double round(double x);
	#

	#
	# Algorithm: First get the exponent of the input
	# double precision number.
	# IF exponent is greater than 51 then return the
	# input as is.
	# IF exponent is less than 0 then force an overflow
	# by adding a huge number and subtracting with the
	# same number.
	# IF exponent is greater than 0 then add 0.5 and
	# and shift the mantissa bits based on the exponent
	# value to discard the fractional component.
	#
	#

	#include "fn_macros.h"
	#define fname FN_PROTOTYPE(round)
	#define fname_special _round_special


	# local variable storage offsets


	#ifdef __ELF__
	.section .note.GNU-stack,"",@progbits
	#endif

	.text
	.align 16
	.p2align 4,,15
	.globl fname
	.type fname,@function
	#in sse5 there is a roundss,roundsd instruction
	fname:
	movsd .L__2p52_plus_one(%rip),%xmm4
	movsd .L__sign_mask_64(%rip),%xmm5
	mov $52,%r10
	#take 3 copies of the input xmm0
	movsd %xmm0,%xmm1
	movsd %xmm0,%xmm2
	movsd %xmm0,%xmm3
	#get the Most signifacnt half word of the input number in r9
	pand .L__exp_mask_64(%rip), %xmm1
	pextrw $3,%xmm1,%r9
	cmp $0X7FF0,%r9
	#Check for infinity inputs
	jz .L__is_infinity
	movsd .L__sign_mask_64(%rip), %xmm1
	pandn %xmm2,%xmm1 # xmm1 now stores the sign of the input number
	#On shifting r9 and subtracting with 0x3FF
	#r9 stores the exponent.
	shr $0X4,%r9
	sub $0x3FF,%r9
	cmp $0x00, %r9
	jl .L__number_less_than_zero

	#IF exponent is greater than 0
	.L__number_greater_than_zero:
	cmp $51,%r9
	jg .L__is_greater_than_2p52

	#IF exponent is greater than 0 and less than 2^52
	pand .L__sign_mask_64(%rip),%xmm0
	#add with 0.5
	addsd .L__zero_point_5(%rip),%xmm0
	movsd %xmm0,%xmm5

	pand .L__exp_mask_64(%rip),%xmm5
	pand .L__mantissa_mask_64(%rip),%xmm0
	#r10 = r9(input exponent) - r10(52=mantissa length)
	sub %r9,%r10
	movd %r10, %xmm2
	#do right and left shift by (input exp - mantissa length)
	psrlq %xmm2,%xmm0
	psllq %xmm2,%xmm0
	#OR the input exponent with the input sign
	por %xmm1,%xmm5
	#finally OR with the matissa
	por %xmm5,%xmm0
	ret

	#IF exponent is less than 0
	.L__number_less_than_zero:
	pand %xmm5,%xmm3 # xmm3 =abs(input)
	addsd %xmm4,%xmm3# add (2^52 + 1)
	subsd %xmm4,%xmm3# sub (2^52 + 1)
	por %xmm1, %xmm3 # OR with the sign of the input number
	movsd %xmm3,%xmm0
	ret

	#IF the input is infinity
	.L__is_infinity:
	comisd %xmm4,%xmm0
	jnp .L__is_zero #parity flag is raised
	#IF one of theinputs is a Nan
	.L__is_nan :
	por .L__qnan_mask_64(%rip),%xmm0 # set the QNan Bit
	.L__is_zero :
	.L__is_greater_than_2p52:
	ret

	.align 16
	.L__sign_mask_64: .quad 0x7FFFFFFFFFFFFFFF
	.quad 0

	.L__qnan_mask_64: .quad 0x0008000000000000
	.quad 0
	.L__exp_mask_64: .quad 0x7FF0000000000000
	.quad 0
	.L__mantissa_mask_64: .quad 0x000FFFFFFFFFFFFF
	.quad 0
	.L__zero: .quad 0x0000000000000000
	.quad 0
	.L__2p52_plus_one: .quad 0x4330000000000001 # = 4503599627370497.0
	.quad 0
	.L__zero_point_5: .quad 0x3FE0000000000001 # = 00.5
	.quad 0