strlen.s revision b1593d50e783f7d66722dde093752b74ffa95176
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* strlen(s)
*
* Given string s, return length (not including the terminating null).
*
* Fast assembler language version of the following C-program strlen
* which represents the `standard' for the C-library.
*
* size_t
* strlen(s)
* register const char *s;
* {
* register const char *s0 = s + 1;
*
* while (*s++ != '\0')
* ;
* return (s - s0);
* }
*/
#include <sys/asm_linkage.h>
/*
* There are two key optimizations in the routine below.
* First, all memory accesses are 8 bytes wide. The time
* for long strings is dominated by the latency of load
* instructions in the inner loop, and going 8 bytes at
* a time means 1/8th as much latency.
*
* Scanning an 8 byte word for a '\0' is made fast by
* this formula (due to Alan Mycroft):
* ~x & 0x808080808080 & (x - 0x0101010101010101)
* The result of this formula is non-zero iff there's
* a '\0' somewhere in x.
*
* Second, the cost of short strings is dominated by the
* cost of figuring out which byte out of the last 8
* contained the '\0' that terminated the string. We use
* properties of the formula above to convert scanning the
* word for '\0' into a single LZD instruction.
*/
.align 64
/*
* The result of Mycroft's formula is a pattern of 0x80 and
* 0x00 bytes. There's a 0x80 at every byte position where
* there was a '\0' character, but a string of 0x01 bytes
* immediately preceding a '\0' becomes a corresponding
* string of 0x80 bytes. (e.g. 0x0101010101010100 becomes
* 0x8080808080808080). We need one final step to discount
* any leading 0x01 bytes, and then LZD can tell us how many
* characters there were before the terminating '\0'.
*/