memcmp.s revision 9a70fc3be3b1e966bf78825cdb8d509963a6f0a1
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
.file "memcmp.s"
/*
* memcmp(s1, s2, len)
*
* Compare n bytes: s1>s2: >0 s1==s2: 0 s1<s2: <0
*
* Fast assembler language version of the following C-program for memcmp
* which represents the `standard' for the C-library.
*
* int
* memcmp(const void *s1, const void *s2, size_t n)
* {
* if (s1 != s2 && n != 0) {
* const char *ps1 = s1;
* const char *ps2 = s2;
* do {
* if (*ps1++ != *ps2++)
* return(ps1[-1] - ps2[-1]);
* } while (--n != 0);
* }
* return (0);
* }
*/
#include <sys/asm_linkage.h>
#include <sys/machasi.h>
#define BLOCK_SIZE 64
ANSI_PRAGMA_WEAK(memcmp,function)
ENTRY(memcmp)
cmp %o0, %o1 ! s1 == s2?
be %ncc, .cmpeq
prefetch [%o0], #one_read
prefetch [%o1], #one_read
! for small counts byte compare immediately
cmp %o2, 48
bleu,a %ncc, .bytcmp
mov %o2, %o3 ! o3 <= 48
! Count > 48. We will byte compare (8 + num of bytes to dbl align)
! bytes. We assume that most miscompares will occur in the 1st 8 bytes
prefetch [%o0 + (1 * BLOCK_SIZE)], #one_read
prefetch [%o1 + (1 * BLOCK_SIZE)], #one_read
.chkdbl:
and %o0, 7, %o4 ! is s1 aligned on a 8 byte bound
mov 8, %o3 ! o2 > 48; o3 = 8
sub %o4, 8, %o4 ! o4 = -(num of bytes to dbl align)
ba %ncc, .bytcmp
sub %o3, %o4, %o3 ! o3 = 8 + (num of bytes to dbl align)
1: ldub [%o1], %o5 ! byte compare loop
inc %o1
inc %o0
dec %o2
cmp %o4, %o5
bne %ncc, .noteq
.bytcmp:
deccc %o3
bgeu,a %ncc, 1b
ldub [%o0], %o4
! Check to see if there are more bytes to compare
cmp %o2, 0 ! is o2 > 0
bgu %ncc, .dwcmp ! we should already be dbl aligned
nop
.cmpeq:
retl ! strings compare equal
sub %g0, %g0, %o0
.noteq:
retl ! strings aren't equal
sub %o4, %o5, %o0 ! return(*s1 - *s2)
! double word compare - using ldd and faligndata. Compares upto
! 8 byte multiple count and does byte compare for the residual.
.dwcmp:
prefetch [%o0 + (2 * BLOCK_SIZE)], #one_read
prefetch [%o1 + (2 * BLOCK_SIZE)], #one_read
! if fprs.fef == 0, set it. Checking it, reqires 2 instructions.
! So set it anyway, without checking.
rd %fprs, %o3 ! o3 = fprs
wr %g0, 0x4, %fprs ! fprs.fef = 1
andn %o2, 7, %o4 ! o4 has 8 byte aligned cnt
sub %o4, 8, %o4
alignaddr %o1, %g0, %g1
ldd [%g1], %d0
4:
add %g1, 8, %g1
ldd [%g1], %d2
ldd [%o0], %d6
prefetch [%g1 + (3 * BLOCK_SIZE)], #one_read
prefetch [%o0 + (3 * BLOCK_SIZE)], #one_read
faligndata %d0, %d2, %d8
fcmpne32 %d6, %d8, %o5
fsrc1 %d6, %d6 ! 2 fsrc1's added since o5 cannot
fsrc1 %d8, %d8 ! be used for 3 cycles else we
fmovd %d2, %d0 ! create 9 bubbles in the pipeline
brnz,a,pn %o5, 6f
sub %o1, %o0, %o1 ! o1 gets the difference
subcc %o4, 8, %o4
add %o0, 8, %o0
add %o1, 8, %o1
bgu,pt %ncc, 4b
sub %o2, 8, %o2
.residcmp:
ba 6f
sub %o1, %o0, %o1 ! o1 gets the difference
5: ldub [%o0 + %o1], %o5 ! byte compare loop
inc %o0
cmp %o4, %o5
bne %ncc, .dnoteq
6:
deccc %o2
bgeu,a %ncc, 5b
ldub [%o0], %o4
and %o3, 0x4, %o3 ! fprs.du = fprs.dl = 0
wr %o3, %g0, %fprs ! fprs = o3 - restore fprs
retl
sub %g0, %g0, %o0 ! strings compare equal
.dnoteq:
and %o3, 0x4, %o3 ! fprs.du = fprs.dl = 0
wr %o3, %g0, %fprs ! fprs = o3 - restore fprs
retl
sub %o4, %o5, %o0 ! return(*s1 - *s2)
SET_SIZE(memcmp)