src/gas/nearbyint.S - open64_libacml_mv - Git at Google


 #
 #  (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
 #
 #  This file is part of libacml_mv.
 #
 #  libacml_mv is free software; you can redistribute it and/or
 #  modify it under the terms of the GNU Lesser General Public
 #  License as published by the Free Software Foundation; either
 #  version 2.1 of the License, or (at your option) any later version.
 #
 #  libacml_mv is distributed in the hope that it will be useful,
 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 #  Lesser General Public License for more details.
 #
 #  You should have received a copy of the GNU Lesser General Public
 #  License along with libacml_mv.  If not, see
 #  <http://www.gnu.org/licenses/>.
 #
 #


 # fabs.S
 #
 # An implementation of the fabs libm function.
 #
 # Prototype:
 #
 #     double fabs(double x);
 #

 #
 #   Algorithm:
 #

 #include "fn_macros.h"
 #define fname FN_PROTOTYPE(nearbyint)
 #define fname_special _nearbyint_special


 # local variable storage offsets

 #ifdef __ELF__
 .section .note.GNU-stack,"",@progbits
 #endif

 .text
 .align 16
 .p2align 4,,15
 .globl fname
 .type fname,@function
 fname:
     movsd .L__2p52_mask_64(%rip),%xmm2
     movsd .L__sign_mask_64(%rip),%xmm4
     movsd %xmm4,%xmm6
     movsd %xmm0,%xmm1  # move input to xmm register's xmm1 and xmm5
     movsd %xmm0,%xmm5
     pand  %xmm4,%xmm1  # xmm1 = abs(xmm1)
     movsd %xmm1,%xmm3  # move xmm1 to xmm3
     comisd %xmm2,%xmm1 #
     jnc   .L__greater_than_2p52                      #
     jp    .L__is_infinity_nan  # parity flag is raised if one of the xmm2 or
                                # xmm1 is Nan
 .L__normal_input_case:
     #sign.u32 = checkbits.u32[1] & 0x80000000;
     #xmm4 = sign.u32
     pandn %xmm5,%xmm4
     #val_2p52.u32[1] = sign.u32 | 0x43300000;
     #val_2p52.u32[0] = 0;
     por   %xmm4,%xmm2
     #val_2p52.f64 = (x + val_2p52.f64) - val_2p52.f64;
     addpd %xmm2,%xmm5
     subpd %xmm5,%xmm2
     #val_2p52.u32[1] = ((val_2p52.u32[1] << 1) >> 1) | sign.u32;
     pand  %xmm6,%xmm2
     por   %xmm4,%xmm2
     movsd %xmm2,%xmm0 # move the result to xmm0 register
     ret
 .L__special_case:
 .L__greater_than_2p52:
     ret # result is present in xmm0
 .L__is_infinity_nan:
     addpd %xmm0,%xmm0
     ret
 .align 16
 .L__sign_mask_64:          .quad 0x7FFFFFFFFFFFFFFF
                            .quad 0
 .L__2p52_mask_64:          .quad 0x4330000000000000
                            .quad 0
 .L__exp_mask_64:           .quad 0x7FF0000000000000
                            .quad 0

	#
	# (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
	#
	# This file is part of libacml_mv.
	#
	# libacml_mv is free software; you can redistribute it and/or
	# modify it under the terms of the GNU Lesser General Public
	# License as published by the Free Software Foundation; either
	# version 2.1 of the License, or (at your option) any later version.
	#
	# libacml_mv is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	# Lesser General Public License for more details.
	#
	# You should have received a copy of the GNU Lesser General Public
	# License along with libacml_mv. If not, see
	# <http://www.gnu.org/licenses/>.
	#
	#


	# fabs.S
	#
	# An implementation of the fabs libm function.
	#
	# Prototype:
	#
	# double fabs(double x);
	#

	#
	# Algorithm:
	#

	#include "fn_macros.h"
	#define fname FN_PROTOTYPE(nearbyint)
	#define fname_special _nearbyint_special


	# local variable storage offsets

	#ifdef __ELF__
	.section .note.GNU-stack,"",@progbits
	#endif

	.text
	.align 16
	.p2align 4,,15
	.globl fname
	.type fname,@function
	fname:
	movsd .L__2p52_mask_64(%rip),%xmm2
	movsd .L__sign_mask_64(%rip),%xmm4
	movsd %xmm4,%xmm6
	movsd %xmm0,%xmm1 # move input to xmm register's xmm1 and xmm5
	movsd %xmm0,%xmm5
	pand %xmm4,%xmm1 # xmm1 = abs(xmm1)
	movsd %xmm1,%xmm3 # move xmm1 to xmm3
	comisd %xmm2,%xmm1 #
	jnc .L__greater_than_2p52 #
	jp .L__is_infinity_nan # parity flag is raised if one of the xmm2 or
	# xmm1 is Nan
	.L__normal_input_case:
	#sign.u32 = checkbits.u32[1] & 0x80000000;
	#xmm4 = sign.u32
	pandn %xmm5,%xmm4
	#val_2p52.u32[1] = sign.u32 \| 0x43300000;
	#val_2p52.u32[0] = 0;
	por %xmm4,%xmm2
	#val_2p52.f64 = (x + val_2p52.f64) - val_2p52.f64;
	addpd %xmm2,%xmm5
	subpd %xmm5,%xmm2
	#val_2p52.u32[1] = ((val_2p52.u32[1] << 1) >> 1) \| sign.u32;
	pand %xmm6,%xmm2
	por %xmm4,%xmm2
	movsd %xmm2,%xmm0 # move the result to xmm0 register
	ret
	.L__special_case:
	.L__greater_than_2p52:
	ret # result is present in xmm0
	.L__is_infinity_nan:
	addpd %xmm0,%xmm0
	ret
	.align 16
	.L__sign_mask_64: .quad 0x7FFFFFFFFFFFFFFF
	.quad 0
	.L__2p52_mask_64: .quad 0x4330000000000000
	.quad 0
	.L__exp_mask_64: .quad 0x7FF0000000000000
	.quad 0