| /* Optimized version of the standard strlen() function. |
| This file is part of the GNU C Library. |
| Copyright (C) 2000-2014 Free Software Foundation, Inc. |
| Contributed by Dan Pop <Dan.Pop@cern.ch>. |
| |
| The GNU C Library is free software; you can redistribute it and/or |
| modify it under the terms of the GNU Lesser General Public |
| License as published by the Free Software Foundation; either |
| version 2.1 of the License, or (at your option) any later version. |
| |
| The GNU C Library is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| Lesser General Public License for more details. |
| |
| You should have received a copy of the GNU Lesser General Public |
| License along with the GNU C Library; if not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| /* Return: the length of the input string |
| |
| Input: |
| in0: str |
| |
| Look for the null character byte by byte, until we reach a word aligned |
| address, then search word by word, using the czx instruction. We're |
| also doing one word of read ahead, which could cause problems if the |
| null character is on the last word of a page and the next page is not |
| mapped in the process address space. Hence the use of the speculative |
| load. |
| |
| This implementation assumes little endian mode. For big endian mode, |
| the instruction czx1.r should be replaced by czx1.l. */ |
| |
| #include <sysdep.h> |
| #undef ret |
| |
| #define saved_lc r18 |
| #define str r19 |
| #define pos0 r20 |
| #define val1 r21 |
| #define val2 r22 |
| #define origadd r23 |
| #define tmp r24 |
| #define loopcnt r30 |
| #define len ret0 |
| |
| ENTRY(strlen) |
| .prologue |
| alloc r2 = ar.pfs, 1, 0, 0, 0 |
| .save ar.lc, saved_lc |
| mov saved_lc = ar.lc // save the loop counter |
| .body |
| mov str = in0 |
| mov len = r0 // len = 0 |
| and tmp = 7, in0 // tmp = str % 8 |
| ;; |
| sub loopcnt = 8, tmp // loopcnt = 8 - tmp |
| cmp.eq p6, p0 = tmp, r0 |
| (p6) br.cond.sptk .str_aligned;; |
| adds loopcnt = -1, loopcnt;; |
| mov ar.lc = loopcnt |
| .l1: |
| ld1 val2 = [str], 1 |
| ;; |
| cmp.eq p6, p0 = val2, r0 |
| (p6) br.cond.spnt .restore_and_exit |
| adds len = 1, len |
| br.cloop.dptk .l1 |
| .str_aligned: |
| mov origadd = str // origadd = orig |
| ld8 val1 = [str], 8;; |
| nop.b 0 |
| nop.b 0 |
| .l2: ld8.s val2 = [str], 8 // don't bomb out here |
| czx1.r pos0 = val1 |
| ;; |
| cmp.ne p6, p0 = 8, pos0 |
| (p6) br.cond.spnt .foundit |
| chk.s val2, .recovery |
| .back: |
| mov val1 = val2 |
| br.cond.dptk .l2 |
| .foundit: |
| sub tmp = str, origadd // tmp = crt address - orig |
| add len = len, pos0;; |
| add len = len, tmp;; |
| adds len = -16, len |
| .restore_and_exit: |
| mov ar.lc = saved_lc // restore the loop counter |
| br.ret.sptk.many b0 |
| .recovery: |
| adds str = -8, str;; |
| ld8 val2 = [str], 8 // bomb out here |
| br.cond.sptk .back |
| END(strlen) |
| libc_hidden_builtin_def (strlen) |