/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
.file "strncmp.s"
/*
* strncmp(s1, s2, n)
*
* Compare strings (at most n bytes): s1>s2: >0 s1==s2: 0 s1<s2: <0
*
* Fast assembler language version of the following C-program for strncmp
* which represents the `standard' for the C-library.
*
* int
* strncmp(const char *s1, const char *s2, size_t n)
* {
* n++;
* if (s1 == s2)
* return (0);
* while (--n != 0 && *s1 == *s2++)
* if(*s1++ == '\0')
* return(0);
* return ((n == 0) ? 0 : (*s1 - s2[-1]));
* }
*/
#include <sys/asm_linkage.h>
ENTRY(strncmp)
save %sp, -SA(WINDOWSIZE), %sp
cmp %i2, 8
blu,a,pn %xcc, .cmp_bytes ! for small counts go do bytes
sub %i0, %i1, %i0 ! delay slot, get diff from s1 - s2
andcc %i0, 3, %g0 ! is s1 aligned
1: bz,pn %icc, .iss2 ! if so go check s2
andcc %i1, 3, %i3 ! is s2 aligned
deccc %i2 ! --n >= 0 ?
bcs,pn %xcc, .doneq
nop ! delay slot
ldub [%i0], %i4 ! else cmp one byte
ldub [%i1], %i5
inc %i0
cmp %i4, %i5
bne,pn %icc, .noteqb
inc %i1
tst %i4 ! terminating zero
bnz,pt %icc, 1b
andcc %i0, 3, %g0
b,a .doneq
.iss2:
set 0x7efefeff, %l6
set 0x81010100, %l7
sethi %hi(0xff000000), %l0 ! masks to test for terminating null
sethi %hi(0x00ff0000), %l1
srl %l1, 8, %l2 ! generate 0x0000ff00 mask
bz,pn %icc, .w4cmp ! if s2 word aligned, compare words
cmp %i3, 2 ! check if s2 half aligned
be,pn %icc, .w2cmp
cmp %i3, 1 ! check if aligned to 1 or 3 bytes
.w3cmp: ldub [%i1], %i5
inc 1, %i1
be,pt %icc, .w1cmp
sll %i5, 24, %i5
sub %i0, %i1, %i0
2:
deccc 4, %i2 ! n >= 4 ?
bgeu,a,pt %xcc, 3f
lduw [%i1], %i3 ! delay slot
dec %i1 ! reset s2
inc %i0 ! reset s1 diff
b .cmp_bytes ! do a byte at a time if n < 4
inc 4, %i2
3:
lduw [%i0 + %i1], %i4
inc 4, %i1
srl %i3, 8, %l4 ! merge with the other half
or %l4, %i5, %i5
cmp %i4, %i5
be,pn %icc, 1f
add %i4, %l6, %l3
b,a .noteq
1: xor %l3, %i4, %l3
and %l3, %l7, %l3
cmp %l3, %l7
be,a,pt %icc, 2b
sll %i3, 24, %i5
!
! For 7-bit characters, we know one of the bytes is zero, but for
! 8-bit characters, the zero detection algorithm gives some false
! triggers ... check every byte individually.
!
andcc %i4, %l0, %g0 ! check if first byte was zero
bnz,pt %icc, 1f
andcc %i4, %l1, %g0 ! check if second byte was zero
b,a .doneq
1: bnz,pt %icc, 1f
andcc %i4, %l2, %g0 ! check if third byte was zero
b,a .doneq
1: bnz,pt %icc, 1f
andcc %i4, 0xff, %g0 ! check if last byte is zero
b,a .doneq
1: bnz,pn %icc, 2b
sll %i3, 24, %i5
b,a .doneq
.w1cmp: clr %l4
lduh [%i1], %l4
inc 2, %i1
sll %l4, 8, %l4
or %i5, %l4, %i5
sub %i0, %i1, %i0
3:
deccc 4, %i2 ! n >= 4 ?
bgeu,a,pt %xcc, 4f
lduw [%i1], %i3 ! delay slot
dec 3, %i1 ! reset s2
inc 3, %i0 ! reset s1 diff
b .cmp_bytes ! do a byte at a time if n < 4
inc 4, %i2
4:
lduw [%i0 + %i1], %i4
inc 4, %i1
srl %i3, 24, %l4 ! merge with the other half
or %l4, %i5, %i5
cmp %i4, %i5
be,pt %icc, 1f
add %i4, %l6, %l3
b,a .noteq
1: xor %l3, %i4, %l3
and %l3, %l7, %l3
cmp %l3, %l7
be,a,pt %icc, 3b
sll %i3, 8, %i5
andcc %i4, %l0, %g0 ! check if first byte was zero
bnz,pt %icc, 1f
andcc %i4, %l1, %g0 ! check if second byte was zero
b,a .doneq
1: bnz,pt %icc, 1f
andcc %i4, %l2, %g0 ! check if third byte was zero
b,a .doneq
1: bnz,pt %icc, 1f
andcc %i4, 0xff, %g0 ! check if last byte is zero
b,a .doneq
1: bnz,pn %icc, 3b
sll %i3, 8, %i5
b,a .doneq
.w2cmp:
lduh [%i1], %i5 ! read a halfword to align s2
inc 2, %i1
sll %i5, 16, %i5
sub %i0, %i1, %i0
4:
deccc 4, %i2 ! n >= 4 ?
bgeu,a,pt %xcc, 5f
lduw [%i1], %i3 ! delay slot
dec 2, %i1 ! reset s2
inc 2, %i0 ! reset s1 diff
b .cmp_bytes ! do a byte at a time if n < 4
inc 4, %i2 ! delay slot
5:
lduw [%i1 + %i0], %i4 ! read a word from s2
inc 4, %i1
srl %i3, 16, %l4 ! merge with the other half
or %l4, %i5, %i5
cmp %i4, %i5
be,pt %icc, 1f
add %i4, %l6, %l3
b,a .noteq
1: xor %l3, %i4, %l3 ! are any bytes 0?
and %l3, %l7, %l3
cmp %l3, %l7
be,a,pt %icc, 4b
sll %i3, 16, %i5
andcc %i4, %l0, %g0 ! check if first byte was zero
bnz,pt %icc, 1f
andcc %i4, %l1, %g0 ! check if second byte was zero
b,a .doneq
1: bnz,pt %icc, 1f
andcc %i4, %l2, %g0 ! check if third byte was zero
b,a .doneq
1: bnz,pt %icc, 1f
andcc %i4, 0xff, %g0 ! check if last byte is zero
b,a .doneq
1: bnz,pn %icc, 4b
sll %i3, 16, %i5
b,a .doneq
.w4cmp: sub %i0, %i1, %i0
lduw [%i1], %i5 ! read a word from s1
5: cmp %i2, 0
be,a,pn %xcc, .doneq
nop
lduw [%i1], %i5 ! read a word from s1
deccc 4, %i2 ! n >= 4 ?
bcs,a,pn %xcc, .cmp_bytes ! do a byte at a time if n < 4
inc 4, %i2
lduw [%i1 + %i0], %i4 ! read a word from s2
cmp %i4, %i5
inc 4, %i1
be,pt %icc, 1f
add %i4, %l6, %l3
b,a .noteq
1: xor %l3, %i4, %l3
and %l3, %l7, %l3
cmp %l3, %l7
be,pt %icc, 5b
nop
andcc %i4, %l0, %g0 ! check if first byte was zero
bnz,pt %icc, 1f
andcc %i4, %l1, %g0 ! check if second byte was zero
b,a .doneq
1: bnz,pt %icc, 1f
andcc %i4, %l2, %g0 ! check if third byte was zero
b,a .doneq
1: bnz,pt %icc, 1f
andcc %i4, 0xff, %g0 ! check if last byte is zero
b,a .doneq
1: bnz,a,pn %icc, 5b
lduw [%i1], %i5
.doneq: ret
restore %g0, %g0, %o0 ! equal return zero
.noteq: srl %i4, 24, %l4
srl %i5, 24, %l5
subcc %l4, %l5, %i0
bne,pt %icc, 6f
andcc %l4, 0xff, %g0
bz .doneq
sll %i4, 8, %l4
sll %i5, 8, %l5
srl %l4, 24, %l4
srl %l5, 24, %l5
subcc %l4, %l5, %i0
bne,pt %icc, 6f
andcc %l4, 0xff, %g0
bz,pt %icc, .doneq
sll %i4, 16, %l4
sll %i5, 16, %l5
srl %l4, 24, %l4
srl %l5, 24, %l5
subcc %l4, %l5, %i0
bne,pt %icc, 6f
andcc %l4, 0xff, %g0
bz,pt %icc, .doneq
nop
.noteqb:
and %i4, 0xff, %l4
and %i5, 0xff, %l5
subcc %l4, %l5, %i0
6: ret
restore %i0, %g0, %o0
! Do a byte by byte comparison, disregarding alignments
.cmp_bytes:
deccc %i2 ! --n >= 0 ?
1:
bcs,pn %xcc, .doneq
nop ! delay slot
ldub [%i1 + %i0], %i4 ! read a byte from s1
ldub [%i1], %i5 ! read a byte from s2
inc %i1
cmp %i4, %i5
bne,pt %icc, .noteqb
tst %i4 ! terminating zero
bnz,pt %icc, 1b
deccc %i2 ! --n >= 0
b,a .doneq
SET_SIZE(strncmp)