| /* Copyright (C) 2014-2018 Free Software Foundation, Inc. |
| This file is part of the GNU C Library. |
| |
| The GNU C Library is free software; you can redistribute it and/or |
| modify it under the terms of the GNU Lesser General Public |
| License as published by the Free Software Foundation; either |
| version 2.1 of the License, or (at your option) any later version. |
| |
| The GNU C Library is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| Lesser General Public License for more details. |
| |
| You should have received a copy of the GNU Lesser General Public |
| License along with the GNU C Library; if not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #include <sysdep.h> |
| |
| /* Implements the functions |
| |
| char * [r3] strncpy (char *dst [r3], const char *src [r4], size_t n [r5]) |
| |
| AND |
| |
| char * [r3] stpncpy (char *dst [r3], const char *src [r4], size_t n [r5]) |
| |
| The algorithm is as follows: |
| > if src and dest are 8 byte aligned, perform double word copy |
| else |
| > copy byte by byte on unaligned addresses. |
| |
| The aligned comparison are made using cmpb instructions. */ |
| |
| /* The focus on optimization for performance improvements are as follows: |
| 1. data alignment [gain from aligned memory access on read/write] |
| 2. POWER7 gains performance with loop unrolling/unwinding |
| [gain by reduction of branch penalty]. |
| 3. The final pad with null bytes is done by calling an optimized |
| memset. */ |
| |
| #ifdef USE_AS_STPNCPY |
| # ifndef STPNCPY |
| # define FUNC_NAME __stpncpy |
| # else |
| # define FUNC_NAME STPNCPY |
| # endif |
| #else |
| # ifndef STRNCPY |
| # define FUNC_NAME strncpy |
| # else |
| # define FUNC_NAME STRNCPY |
| # endif |
| #endif /* !USE_AS_STPNCPY */ |
| |
| #define FRAMESIZE (FRAME_MIN_SIZE+16) |
| |
| #ifndef MEMSET |
| /* For builds with no IFUNC support, local calls should be made to internal |
| GLIBC symbol (created by libc_hidden_builtin_def). */ |
| # ifdef SHARED |
| # define MEMSET_is_local |
| # define MEMSET __GI_memset |
| # else |
| # define MEMSET memset |
| # endif |
| #endif |
| |
| .machine power7 |
| #ifdef MEMSET_is_local |
| ENTRY_TOCLESS (FUNC_NAME, 4) |
| #else |
| ENTRY (FUNC_NAME, 4) |
| #endif |
| CALL_MCOUNT 3 |
| |
| or r10, r3, r4 /* to verify source and destination */ |
| rldicl. r8, r10, 0, 61 /* is double word aligned .. ? */ |
| |
| std r19, -8(r1) /* save callers register , r19 */ |
| std r18, -16(r1) /* save callers register , r18 */ |
| cfi_offset(r19, -8) |
| cfi_offset(r18, -16) |
| |
| mr r9, r3 /* save r3 into r9 for use */ |
| mr r18, r3 /* save r3 for retCode of strncpy */ |
| bne 0, L(unaligned) |
| |
| L(aligned): |
| srdi r11, r5, 3 /* compute count for CTR ; count = n/8 */ |
| cmpldi cr7, r11, 3 /* if count > 4 ; perform unrolling 4 times */ |
| ble 7, L(update1) |
| |
| ld r10, 0(r4) /* load doubleWord from src */ |
| cmpb r8, r10, r8 /* compare src with NULL ,we read just now */ |
| cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */ |
| bne cr7, L(update3) |
| |
| std r10, 0(r3) /* copy doubleword at offset=0 */ |
| ld r10, 8(r4) /* load next doubleword from offset=8 */ |
| cmpb r8, r10, r8 /* compare src with NULL , we read just now */ |
| cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */ |
| bne 7,L(HopBy8) |
| |
| addi r8, r11, -4 |
| mr r7, r3 |
| srdi r8, r8, 2 |
| mr r6, r4 |
| addi r8, r8, 1 |
| li r12, 0 |
| mtctr r8 |
| b L(dwordCopy) |
| |
| .p2align 4 |
| L(dWordUnroll): |
| std r8, 16(r9) |
| ld r8, 24(r4) /* load dword,perform loop unrolling again */ |
| cmpb r10, r8, r10 |
| cmpdi cr7, r10, 0 |
| bne cr7, L(HopBy24) |
| |
| std r8, 24(r7) /* copy dword at offset=24 */ |
| addi r9, r9, 32 |
| addi r4, r4, 32 |
| bdz L(leftDwords) /* continue with loop on counter */ |
| |
| ld r3, 32(r6) |
| cmpb r8, r3, r10 |
| cmpdi cr7, r8, 0 |
| bne cr7, L(update2) |
| |
| std r3, 32(r7) |
| ld r10, 40(r6) |
| cmpb r8, r10, r8 |
| cmpdi cr7, r8, 0 |
| bne cr7, L(HopBy40) |
| |
| mr r6, r4 /* update values */ |
| mr r7, r9 |
| mr r11, r0 |
| mr r5, r19 |
| |
| L(dwordCopy): |
| std r10, 8(r9) /* copy dword at offset=8 */ |
| addi r19, r5, -32 |
| addi r0, r11, -4 |
| ld r8, 16(r4) |
| cmpb r10, r8, r12 |
| cmpdi cr7, r10, 0 |
| beq cr7, L(dWordUnroll) |
| |
| addi r9, r9, 16 /* increment dst by 16 */ |
| addi r4, r4, 16 /* increment src by 16 */ |
| addi r5, r5, -16 /* decrement length 'n' by 16 */ |
| addi r0, r11, -2 /* decrement loop counter */ |
| |
| L(dWordUnrollOFF): |
| ld r10, 0(r4) /* load first dword */ |
| li r8, 0 /* load mask */ |
| cmpb r8, r10, r8 |
| cmpdi cr7, r8, 0 |
| bne cr7, L(byte_by_byte) |
| mtctr r0 |
| li r7, 0 |
| b L(CopyDword) |
| |
| .p2align 4 |
| L(loadDWordandCompare): |
| ld r10, 0(r4) |
| cmpb r8, r10, r7 |
| cmpdi cr7, r8, 0 |
| bne cr7, L(byte_by_byte) |
| |
| L(CopyDword): |
| addi r9, r9, 8 |
| std r10, -8(r9) |
| addi r4, r4, 8 |
| addi r5, r5, -8 |
| bdnz L(loadDWordandCompare) |
| |
| L(byte_by_byte): |
| cmpldi cr7, r5, 3 |
| ble cr7, L(verifyByte) |
| srdi r10, r5, 2 |
| mr r19, r9 |
| mtctr r10 |
| b L(firstByteUnroll) |
| |
| .p2align 4 |
| L(bytes_unroll): |
| lbz r10, 1(r4) /* load byte from src */ |
| cmpdi cr7, r10, 0 /* compare for NULL */ |
| stb r10, 1(r19) /* store byte to dst */ |
| beq cr7, L(updtDestComputeN2ndByte) |
| |
| addi r4, r4, 4 /* advance src */ |
| |
| lbz r10, -2(r4) /* perform loop unrolling for byte r/w */ |
| cmpdi cr7, r10, 0 |
| stb r10, 2(r19) |
| beq cr7, L(updtDestComputeN3rdByte) |
| |
| lbz r10, -1(r4) /* perform loop unrolling for byte r/w */ |
| addi r19, r19, 4 |
| cmpdi cr7, r10, 0 |
| stb r10, -1(r19) |
| beq cr7, L(ComputeNByte) |
| |
| bdz L(update0) |
| |
| L(firstByteUnroll): |
| lbz r10, 0(r4) /* perform loop unrolling for byte r/w */ |
| cmpdi cr7, 10, 0 |
| stb r10, 0(r19) |
| bne cr7, L(bytes_unroll) |
| addi r19, r19, 1 |
| |
| L(ComputeNByte): |
| subf r9, r19, r9 /* compute 'n'n bytes to fill */ |
| add r8, r9, r5 |
| |
| L(zeroFill): |
| cmpdi cr7, r8, 0 /* compare if length is zero */ |
| beq cr7, L(update3return) |
| |
| mflr r0 /* load link register LR to r0 */ |
| std r0, 16(r1) /* store the link register */ |
| stdu r1, -FRAMESIZE(r1) /* create the stack frame */ |
| cfi_adjust_cfa_offset(FRAMESIZE) |
| cfi_offset(lr, 16) |
| mr r3, r19 /* fill buffer with */ |
| li r4, 0 /* zero fill buffer */ |
| mr r5, r8 /* how many bytes to fill buffer with */ |
| bl MEMSET /* call optimized memset */ |
| #ifndef MEMSET_is_local |
| nop |
| #endif |
| ld r0, FRAMESIZE+16(r1) /* read the saved link register */ |
| addi r1, r1, FRAMESIZE /* restore stack pointer */ |
| cfi_adjust_cfa_offset(-FRAMESIZE) |
| mtlr r0 |
| cfi_restore(lr) |
| |
| L(update3return): |
| #ifdef USE_AS_STPNCPY |
| addi r3, r19, -1 /* update return value */ |
| #endif |
| |
| L(hop2return): |
| #ifndef USE_AS_STPNCPY |
| mr r3, r18 /* set return value */ |
| #endif |
| ld r18, -16(r1) /* restore callers save register, r18 */ |
| ld r19, -8(r1) /* restore callers save register, r19 */ |
| blr /* return */ |
| |
| .p2align 4 |
| L(update0): |
| mr r9, r19 |
| |
| .p2align 4 |
| L(verifyByte): |
| rldicl. r8, r5, 0, 62 |
| #ifdef USE_AS_STPNCPY |
| mr r3, r9 |
| #endif |
| beq cr0, L(hop2return) |
| mtctr r8 |
| addi r4, r4, -1 |
| mr r19, r9 |
| b L(oneBYone) |
| |
| .p2align 4 |
| L(proceed): |
| bdz L(done) |
| |
| L(oneBYone): |
| lbzu r10, 1(r4) /* copy byte */ |
| addi r19, r19, 1 |
| addi r8, r8, -1 |
| cmpdi cr7, r10, 0 |
| stb r10, -1(r19) |
| bne cr7, L(proceed) |
| b L(zeroFill) |
| |
| .p2align 4 |
| L(done): |
| #ifdef USE_AS_STPNCPY |
| mr r3, r19 /* set the return value */ |
| #else |
| mr r3, r18 /* set the return value */ |
| #endif |
| ld r18, -16(r1) /* restore callers save register, r18 */ |
| ld r19, -8(r1) /* restore callers save register, r19 */ |
| blr /* return */ |
| |
| L(update1): |
| mr r0, r11 |
| mr r19, r5 |
| |
| .p2align 4 |
| L(leftDwords): |
| cmpdi cr7, r0, 0 |
| mr r5, r19 |
| bne cr7, L(dWordUnrollOFF) |
| b L(byte_by_byte) |
| |
| .p2align 4 |
| L(updtDestComputeN2ndByte): |
| addi r19, r19, 2 /* update dst by 2 */ |
| subf r9, r19, r9 /* compute distance covered */ |
| add r8, r9, r5 |
| b L(zeroFill) |
| |
| .p2align 4 |
| L(updtDestComputeN3rdByte): |
| addi r19, r19, 3 /* update dst by 3 */ |
| subf r9, r19, r9 /* compute distance covered */ |
| add r8, r9, r5 |
| b L(zeroFill) |
| |
| .p2align 4 |
| L(HopBy24): |
| addi r9, r9, 24 /* increment dst by 24 */ |
| addi r4, r4, 24 /* increment src by 24 */ |
| addi r5, r5, -24 /* decrement length 'n' by 24 */ |
| addi r0, r11, -3 /* decrement loop counter */ |
| b L(dWordUnrollOFF) |
| |
| .p2align 4 |
| L(update2): |
| mr r5, r19 |
| b L(dWordUnrollOFF) |
| |
| .p2align 4 |
| L(HopBy40): |
| addi r9, r7, 40 /* increment dst by 40 */ |
| addi r4, r6, 40 /* increment src by 40 */ |
| addi r5, r5, -40 /* decrement length 'n' by 40 */ |
| addi r0, r11, -5 /* decrement loop counter */ |
| b L(dWordUnrollOFF) |
| |
| L(update3): |
| mr r0, r11 |
| b L(dWordUnrollOFF) |
| |
| L(HopBy8): |
| addi r9, r3, 8 /* increment dst by 8 */ |
| addi r4, r4, 8 /* increment src by 8 */ |
| addi r5, r5, -8 /* decrement length 'n' by 8 */ |
| addi r0, r11, -1 /* decrement loop counter */ |
| b L(dWordUnrollOFF) |
| |
| L(unaligned): |
| cmpdi r5, 16 /* Proceed byte by byte for less than 16 */ |
| ble L(byte_by_byte) |
| rldicl r7, r3, 0, 61 |
| rldicl r6, r4, 0, 61 |
| cmpdi r6, 0 /* Check src alignment */ |
| beq L(srcaligndstunalign) |
| /* src is unaligned */ |
| rlwinm r10, r4, 3,26,28 /* Calculate padding. */ |
| clrrdi r4, r4, 3 /* Align the addr to dw boundary */ |
| ld r8, 0(r4) /* Load doubleword from memory. */ |
| li r0, 0 |
| /* Discard bits not part of the string */ |
| #ifdef __LITTLE_ENDIAN__ |
| srd r7, r8, r10 |
| #else |
| sld r7, r8, r10 |
| #endif |
| cmpb r0, r7, r0 /* Compare each byte against null */ |
| /* Discard bits not part of the string */ |
| #ifdef __LITTLE_ENDIAN__ |
| sld r0, r0, r10 |
| #else |
| srd r0, r0, r10 |
| #endif |
| cmpdi r0, 0 |
| bne L(bytebybyte) /* if it has null, copy byte by byte */ |
| subfic r6, r6, 8 |
| rlwinm r12, r3, 3,26,28 /* Calculate padding in bits. */ |
| rldicl r9, r3, 0, 61 /* Calculate padding in bytes. */ |
| addi r3, r3, -1 |
| |
| cmpdi r12, 0 /* check dest alignment */ |
| beq L(srcunaligndstalign) |
| |
| /* both src and dst unaligned */ |
| #ifdef __LITTLE_ENDIAN__ |
| sld r8, r7, r10 |
| mr r11, r10 |
| addi r11, r11, -8 /* Adjust byte pointer on loaded dw */ |
| #else |
| srd r8, r7, r10 |
| subfic r11, r10, 64 |
| #endif |
| /* dst alignment is greater then src alignment? */ |
| cmpd cr7, r12, r10 |
| ble cr7, L(dst_align_small) |
| /* src alignment is less than dst */ |
| |
| /* Calculate the dst alignment difference */ |
| subfic r7, r9, 8 |
| mtctr r7 |
| |
| /* Write until dst is aligned */ |
| cmpdi r0, r7, 4 |
| blt L(storebyte1) /* less than 4, store byte by byte */ |
| beq L(equal1) /* if its 4, store word */ |
| addi r0, r7, -4 /* greater than 4, so stb and stw */ |
| mtctr r0 |
| L(storebyte1): |
| #ifdef __LITTLE_ENDIAN__ |
| addi r11, r11, 8 /* Adjust byte pointer on loaded dw */ |
| #else |
| addi r11, r11, -8 |
| #endif |
| srd r7, r8, r11 |
| stbu r7, 1(r3) |
| addi r5, r5, -1 |
| bdnz L(storebyte1) |
| |
| subfic r7, r9, 8 /* Check the remaining bytes */ |
| cmpdi r0, r7, 4 |
| blt L(proceed1) |
| |
| .align 4 |
| L(equal1): |
| #ifdef __LITTLE_ENDIAN__ |
| addi r11, r11, 8 /* Adjust byte pointer on loaded dw */ |
| srd r7, r8, r11 |
| #else |
| subfic r11, r11, 64 |
| sld r7, r8, r11 |
| srdi r7, r7, 32 |
| #endif |
| stw r7, 1(r3) |
| addi r3, r3, 4 |
| addi r5, r5, -4 |
| |
| L(proceed1): |
| mr r7, r8 |
| /* calculate the Left over bytes to be written */ |
| subfic r11, r10, 64 |
| subfic r12, r12, 64 |
| subf r12, r12, r11 /* remaining bytes on second dw */ |
| subfic r10, r12, 64 /* remaining bytes on first dw */ |
| subfic r9, r9, 8 |
| subf r6, r9, r6 /* recalculate padding */ |
| L(srcunaligndstalign): |
| addi r3, r3, 1 |
| subfic r12, r10, 64 /* remaining bytes on second dw */ |
| addi r4, r4, 8 |
| li r0,0 |
| b L(storedouble) |
| |
| .align 4 |
| L(dst_align_small): |
| mtctr r6 |
| /* Write until src is aligned */ |
| L(storebyte2): |
| #ifdef __LITTLE_ENDIAN__ |
| addi r11, r11, 8 /* Adjust byte pointer on dw */ |
| #else |
| addi r11, r11, -8 |
| #endif |
| srd r7, r8, r11 |
| stbu r7, 1(r3) |
| addi r5, r5, -1 |
| bdnz L(storebyte2) |
| |
| addi r4, r4, 8 /* Increment src pointer */ |
| addi r3, r3, 1 /* Increment dst pointer */ |
| mr r9, r3 |
| li r8, 0 |
| cmpd cr7, r12, r10 |
| beq cr7, L(aligned) |
| rldicl r6, r3, 0, 61 /* Recalculate padding */ |
| mr r7, r6 |
| |
| /* src is algined */ |
| L(srcaligndstunalign): |
| mr r9, r3 |
| mr r6, r7 |
| ld r8, 0(r4) |
| subfic r10, r7, 8 |
| mr r7, r8 |
| li r0, 0 /* Check null */ |
| cmpb r0, r8, r0 |
| cmpdi r0, 0 |
| bne L(byte_by_byte) /* Do byte by byte if there is NULL */ |
| rlwinm r12, r3, 3,26,28 /* Calculate padding */ |
| addi r3, r3, -1 |
| /* write byte by byte until aligned */ |
| #ifdef __LITTLE_ENDIAN__ |
| li r11, -8 |
| #else |
| li r11, 64 |
| #endif |
| mtctr r10 |
| cmpdi r0, r10, 4 |
| blt L(storebyte) |
| beq L(equal) |
| addi r0, r10, -4 |
| mtctr r0 |
| L(storebyte): |
| #ifdef __LITTLE_ENDIAN__ |
| addi r11, r11, 8 /* Adjust byte pointer on dw */ |
| #else |
| addi r11, r11, -8 |
| #endif |
| srd r7, r8, r11 |
| stbu r7, 1(r3) |
| addi r5, r5, -1 |
| bdnz L(storebyte) |
| |
| cmpdi r0, r10, 4 |
| blt L(align) |
| |
| .align 4 |
| L(equal): |
| #ifdef __LITTLE_ENDIAN__ |
| addi r11, r11, 8 |
| srd r7, r8, r11 |
| #else |
| subfic r11, r11, 64 |
| sld r7, r8, r11 |
| srdi r7, r7, 32 |
| #endif |
| stw r7, 1(r3) |
| addi r5, r5, -4 |
| addi r3, r3, 4 |
| L(align): |
| addi r3, r3, 1 |
| addi r4, r4, 8 /* Increment src pointer */ |
| subfic r10, r12, 64 |
| li r0, 0 |
| /* dst addr aligned to 8 */ |
| L(storedouble): |
| cmpdi r5, 8 |
| ble L(null1) |
| ld r7, 0(r4) /* load next dw */ |
| cmpb r0, r7, r0 |
| cmpdi r0, 0 /* check for null on each new dw */ |
| bne L(null) |
| #ifdef __LITTLE_ENDIAN__ |
| srd r9, r8, r10 /* bytes from first dw */ |
| sld r11, r7, r12 /* bytes from second dw */ |
| #else |
| sld r9, r8, r10 |
| srd r11, r7, r12 |
| #endif |
| or r11, r9, r11 /* make as a single dw */ |
| std r11, 0(r3) /* store as std on aligned addr */ |
| mr r8, r7 /* still few bytes left to be written */ |
| addi r3, r3, 8 /* increment dst addr */ |
| addi r4, r4, 8 /* increment src addr */ |
| addi r5, r5, -8 |
| b L(storedouble) /* Loop until NULL */ |
| |
| .align 4 |
| |
| /* We've hit the end of the string. Do the rest byte-by-byte. */ |
| L(null): |
| addi r3, r3, -1 |
| mr r10, r12 |
| mtctr r6 |
| #ifdef __LITTLE_ENDIAN__ |
| subfic r10, r10, 64 |
| addi r10, r10, -8 |
| #endif |
| cmpdi r0, r5, 4 |
| blt L(loop) |
| cmpdi r0, r6, 4 |
| blt L(loop) |
| |
| /* we can still use stw if leftover >= 4 */ |
| #ifdef __LITTLE_ENDIAN__ |
| addi r10, r10, 8 |
| srd r11, r8, r10 |
| #else |
| subfic r10, r10, 64 |
| sld r11, r8, r10 |
| srdi r11, r11, 32 |
| #endif |
| stw r11, 1(r3) |
| addi r5, r5, -4 |
| addi r3, r3, 4 |
| cmpdi r0, r5, 0 |
| beq L(g1) |
| cmpdi r0, r6, 4 |
| beq L(bytebybyte1) |
| addi r10, r10, 32 |
| #ifdef __LITTLE_ENDIAN__ |
| addi r10, r10, -8 |
| #else |
| subfic r10, r10, 64 |
| #endif |
| addi r0, r6, -4 |
| mtctr r0 |
| /* remaining byte by byte part of first dw */ |
| L(loop): |
| #ifdef __LITTLE_ENDIAN__ |
| addi r10, r10, 8 |
| #else |
| addi r10, r10, -8 |
| #endif |
| srd r0, r8, r10 |
| stbu r0, 1(r3) |
| addi r5, r5, -1 |
| cmpdi r0, r5, 0 |
| beq L(g1) |
| bdnz L(loop) |
| L(bytebybyte1): |
| addi r3, r3, 1 |
| /* remaining byte by byte part of second dw */ |
| L(bytebybyte): |
| addi r3, r3, -8 |
| addi r4, r4, -1 |
| |
| #ifdef __LITTLE_ENDIAN__ |
| extrdi. r0, r7, 8, 56 |
| stbu r7, 8(r3) |
| addi r5, r5, -1 |
| beq L(g2) |
| cmpdi r5, 0 |
| beq L(g1) |
| extrdi. r0, r7, 8, 48 |
| stbu r0, 1(r3) |
| addi r5, r5, -1 |
| beq L(g2) |
| cmpdi r5, 0 |
| beq L(g1) |
| extrdi. r0, r7, 8, 40 |
| stbu r0, 1(r3) |
| addi r5, r5, -1 |
| beq L(g2) |
| cmpdi r5, 0 |
| beq L(g1) |
| extrdi. r0, r7, 8, 32 |
| stbu r0, 1(r3) |
| addi r5, r5, -1 |
| beq L(g2) |
| cmpdi r5, 0 |
| beq L(g1) |
| extrdi. r0, r7, 8, 24 |
| stbu r0, 1(r3) |
| addi r5, r5, -1 |
| beq L(g2) |
| cmpdi r5, 0 |
| beq L(g1) |
| extrdi. r0, r7, 8, 16 |
| stbu r0, 1(r3) |
| addi r5, r5, -1 |
| beq L(g2) |
| cmpdi r5, 0 |
| beq L(g1) |
| extrdi. r0, r7, 8, 8 |
| stbu r0, 1(r3) |
| addi r5, r5, -1 |
| beq L(g2) |
| cmpdi r5, 0 |
| beq L(g1) |
| extrdi r0, r7, 8, 0 |
| stbu r0, 1(r3) |
| addi r5, r5, -1 |
| b L(g2) |
| #else |
| extrdi. r0, r7, 8, 0 |
| stbu r0, 8(r3) |
| addi r5, r5, -1 |
| beq L(g2) |
| cmpdi r5, 0 |
| beq L(g1) |
| extrdi. r0, r7, 8, 8 |
| stbu r0, 1(r3) |
| addi r5, r5, -1 |
| beq L(g2) |
| cmpdi r5, 0 |
| beq L(g1) |
| extrdi. r0, r7, 8, 16 |
| stbu r0, 1(r3) |
| addi r5, r5, -1 |
| beq L(g2) |
| cmpdi r5, 0 |
| beq L(g1) |
| extrdi. r0, r7, 8, 24 |
| stbu r0, 1(r3) |
| addi r5, r5, -1 |
| beq L(g2) |
| cmpdi r5, 0 |
| beq L(g1) |
| extrdi. r0, r7, 8, 32 |
| stbu r0, 1(r3) |
| addi r5, r5, -1 |
| beq L(g2) |
| cmpdi r5, 0 |
| beq L(g1) |
| extrdi. r0, r7, 8, 40 |
| stbu r0, 1(r3) |
| addi r5, r5, -1 |
| beq L(g2) |
| cmpdi r5, 0 |
| beq L(g1) |
| extrdi. r0, r7, 8, 48 |
| stbu r0, 1(r3) |
| addi r5, r5, -1 |
| beq L(g2) |
| cmpdi r5, 0 |
| beq L(g1) |
| stbu r7, 1(r3) |
| addi r5, r5, -1 |
| b L(g2) |
| #endif |
| L(g1): |
| #ifdef USE_AS_STPNCPY |
| addi r3, r3, 1 |
| #endif |
| L(g2): |
| addi r3, r3, 1 |
| mr r19, r3 |
| mr r8, r5 |
| b L(zeroFill) |
| L(null1): |
| mr r9, r3 |
| subf r4, r6, r4 |
| b L(byte_by_byte) |
| END(FUNC_NAME) |
| #ifndef USE_AS_STPNCPY |
| libc_hidden_builtin_def (strncpy) |
| #endif |