2N/A/*
2N/A * CDDL HEADER START
2N/A *
2N/A * The contents of this file are subject to the terms of the
2N/A * Common Development and Distribution License (the "License").
2N/A * You may not use this file except in compliance with the License.
2N/A *
2N/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
2N/A * or http://www.opensolaris.org/os/licensing.
2N/A * See the License for the specific language governing permissions
2N/A * and limitations under the License.
2N/A *
2N/A * When distributing Covered Code, include this CDDL HEADER in each
2N/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
2N/A * If applicable, add the following below this CDDL HEADER, with the
2N/A * fields enclosed by brackets "[]" replaced with your own identifying
2N/A * information: Portions Copyright [yyyy] [name of copyright owner]
2N/A *
2N/A * CDDL HEADER END
2N/A */
2N/A
2N/A/*
2N/A * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
2N/A */
2N/A
2N/A .file "memcmp.s"
2N/A
2N/A/*
2N/A * memcmp(s1, s2, len)
2N/A *
2N/A * Compare n bytes: s1>s2: >0 s1==s2: 0 s1<s2: <0
2N/A *
2N/A * Fast assembler language version of the following C-program for memcmp
2N/A * which represents the `standard' for the C-library.
2N/A *
2N/A * int
2N/A * memcmp(const void *s1, const void *s2, size_t n)
2N/A * {
2N/A * if (s1 != s2 && n != 0) {
2N/A * const char *ps1 = s1;
2N/A * const char *ps2 = s2;
2N/A * do {
2N/A * if (*ps1++ != *ps2++)
2N/A * return(ps1[-1] - ps2[-1]);
2N/A * } while (--n != 0);
2N/A * }
2N/A * return (0);
2N/A * }
2N/A */
2N/A
2N/A#include <sys/asm_linkage.h>
2N/A#include <sys/machasi.h>
2N/A
2N/A#define BLOCK_SIZE 64
2N/A
2N/A ANSI_PRAGMA_WEAK(memcmp,function)
2N/A
2N/A ENTRY(memcmp)
2N/A cmp %o0, %o1 ! s1 == s2?
2N/A be %ncc, .cmpeq
2N/A prefetch [%o0], #one_read
2N/A prefetch [%o1], #one_read
2N/A
2N/A ! for small counts byte compare immediately
2N/A cmp %o2, 48
2N/A bleu,a %ncc, .bytcmp
2N/A mov %o2, %o3 ! o3 <= 48
2N/A
2N/A ! Count > 48. We will byte compare (8 + num of bytes to dbl align)
2N/A ! bytes. We assume that most miscompares will occur in the 1st 8 bytes
2N/A
2N/A prefetch [%o0 + (1 * BLOCK_SIZE)], #one_read
2N/A prefetch [%o1 + (1 * BLOCK_SIZE)], #one_read
2N/A
2N/A.chkdbl:
2N/A and %o0, 7, %o4 ! is s1 aligned on a 8 byte bound
2N/A mov 8, %o3 ! o2 > 48; o3 = 8
2N/A sub %o4, 8, %o4 ! o4 = -(num of bytes to dbl align)
2N/A ba %ncc, .bytcmp
2N/A sub %o3, %o4, %o3 ! o3 = 8 + (num of bytes to dbl align)
2N/A
2N/A1: ldub [%o1], %o5 ! byte compare loop
2N/A inc %o1
2N/A inc %o0
2N/A dec %o2
2N/A cmp %o4, %o5
2N/A bne %ncc, .noteq
2N/A.bytcmp:
2N/A deccc %o3
2N/A bgeu,a %ncc, 1b
2N/A ldub [%o0], %o4
2N/A
2N/A ! Check to see if there are more bytes to compare
2N/A cmp %o2, 0 ! is o2 > 0
2N/A bgu %ncc, .dwcmp ! we should already be dbl aligned
2N/A nop
2N/A.cmpeq:
2N/A retl ! strings compare equal
2N/A sub %g0, %g0, %o0
2N/A
2N/A.noteq:
2N/A retl ! strings aren't equal
2N/A sub %o4, %o5, %o0 ! return(*s1 - *s2)
2N/A
2N/A
2N/A ! double word compare - using ldd and faligndata. Compares upto
2N/A ! 8 byte multiple count and does byte compare for the residual.
2N/A
2N/A.dwcmp:
2N/A prefetch [%o0 + (2 * BLOCK_SIZE)], #one_read
2N/A prefetch [%o1 + (2 * BLOCK_SIZE)], #one_read
2N/A
2N/A ! if fprs.fef == 0, set it. Checking it, reqires 2 instructions.
2N/A ! So set it anyway, without checking.
2N/A rd %fprs, %o3 ! o3 = fprs
2N/A wr %g0, 0x4, %fprs ! fprs.fef = 1
2N/A
2N/A andn %o2, 7, %o4 ! o4 has 8 byte aligned cnt
2N/A sub %o4, 8, %o4
2N/A alignaddr %o1, %g0, %g1
2N/A ldd [%g1], %d0
2N/A4:
2N/A add %g1, 8, %g1
2N/A ldd [%g1], %d2
2N/A ldd [%o0], %d6
2N/A prefetch [%g1 + (3 * BLOCK_SIZE)], #one_read
2N/A prefetch [%o0 + (3 * BLOCK_SIZE)], #one_read
2N/A faligndata %d0, %d2, %d8
2N/A fcmpne32 %d6, %d8, %o5
2N/A fsrc1 %d6, %d6 ! 2 fsrc1's added since o5 cannot
2N/A fsrc1 %d8, %d8 ! be used for 3 cycles else we
2N/A fmovd %d2, %d0 ! create 9 bubbles in the pipeline
2N/A brnz,a,pn %o5, 6f
2N/A sub %o1, %o0, %o1 ! o1 gets the difference
2N/A subcc %o4, 8, %o4
2N/A add %o0, 8, %o0
2N/A add %o1, 8, %o1
2N/A bgu,pt %ncc, 4b
2N/A sub %o2, 8, %o2
2N/A
2N/A.residcmp:
2N/A ba 6f
2N/A sub %o1, %o0, %o1 ! o1 gets the difference
2N/A
2N/A5: ldub [%o0 + %o1], %o5 ! byte compare loop
2N/A inc %o0
2N/A cmp %o4, %o5
2N/A bne %ncc, .dnoteq
2N/A6:
2N/A deccc %o2
2N/A bgeu,a %ncc, 5b
2N/A ldub [%o0], %o4
2N/A
2N/A and %o3, 0x4, %o3 ! fprs.du = fprs.dl = 0
2N/A wr %o3, %g0, %fprs ! fprs = o3 - restore fprs
2N/A retl
2N/A sub %g0, %g0, %o0 ! strings compare equal
2N/A
2N/A.dnoteq:
2N/A and %o3, 0x4, %o3 ! fprs.du = fprs.dl = 0
2N/A wr %o3, %g0, %fprs ! fprs = o3 - restore fprs
2N/A retl
2N/A sub %o4, %o5, %o0 ! return(*s1 - *s2)
2N/A
2N/A SET_SIZE(memcmp)