strlen.s revision 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * CDDL HEADER START
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * The contents of this file are subject to the terms of the
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Common Development and Distribution License (the "License").
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * You may not use this file except in compliance with the License.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * or http://www.opensolaris.org/os/licensing.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * See the License for the specific language governing permissions
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * and limitations under the License.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * When distributing Covered Code, include this CDDL HEADER in each
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * If applicable, add the following below this CDDL HEADER, with the
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * fields enclosed by brackets "[]" replaced with your own identifying
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * information: Portions Copyright [yyyy] [name of copyright owner]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * CDDL HEADER END
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Use is subject to license terms.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .file "strlen.s"
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * strlen(s)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Given string s, return length (not including the terminating null).
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Fast assembler language version of the following C-program strlen
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * which represents the `standard' for the C-library.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * size_t
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * strlen(s)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * register const char *s;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * {
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * register const char *s0 = s + 1;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * while (*s++ != '\0')
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * return (s - s0);
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#include <sys/asm_linkage.h>
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! The object of strlen is to, as quickly as possible, find the
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! null byte. To this end, we attempt to get our string aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! and then blast across it using Alan Mycroft's algorithm for
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! finding null bytes. If we are not aligned, the string is
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! checked a byte at a time until it is. Once this occurs,
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! we can proceed word-wise across it. Once a word with a
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! zero byte has been found, we then check the word a byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! at a time until we've located the zero byte, and return
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! the proper length.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(strlen)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc %o0, 3, %o4 ! is src word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %icc, .nowalgnd
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %o0, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o4, 2 ! is src half-word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl be,a,pn %icc, .s2algn
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduh [%o2], %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o2], %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o1 ! byte zero?
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .done
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o4, 3 ! src is byte aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl be,pn %icc, .nowalgnd
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc 1, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduh [%o2], %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.s2algn:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl srl %o1, 8, %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .done
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc %o1, 0xff, %g0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .done
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc 1, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc 1, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.nowalgnd:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o2], %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(0x01010101), %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(0x80808080), %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %o4, %lo(0x01010101), %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %o5, %lo(0x80808080), %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andn %o5, %o1, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o1, %o4, %g1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc %o3, %g1, %g0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,a,pn %icc, .nullfound
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(0xff000000), %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o2+4], %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc 4, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.loop: ! this should be aligned to 32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc 4, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andn %o5, %o1, %o3 ! %o5 = ~word & 0x80808080
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o1, %o4, %g1 ! %g1 = word - 0x01010101
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc %o3, %g1, %g0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,a,pt %icc, .loop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o2], %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl dec 4, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(0xff000000), %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.nullfound:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc %o1, %o4, %g0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .done ! first byte zero
25cf1a301a396c38e8adf52c15f537b80d2483f7jl srl %o4, 8, %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc %o1, %o4, %g0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .done ! second byte zero
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc 1, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl srl %o4, 8, %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc %o1, %o4, %g0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .done ! thrid byte zero
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc 1, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc 1, %o2 ! fourth byte zero
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.done:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, %o0, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(strlen)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl