memcmp.s revision 2
2N/A * The contents of this file are subject to the terms of the 2N/A * Common Development and Distribution License (the "License"). 2N/A * You may not use this file except in compliance with the License. 2N/A * See the License for the specific language governing permissions 2N/A * and limitations under the License. 2N/A * When distributing Covered Code, include this CDDL HEADER in each 2N/A * If applicable, add the following below this CDDL HEADER, with the 2N/A * fields enclosed by brackets "[]" replaced with your own identifying 2N/A * information: Portions Copyright [yyyy] [name of copyright owner] 2N/A * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 2N/A * memcmp(s1, s2, len) 2N/A * Compare n bytes: s1>s2: >0 s1==s2: 0 s1<s2: <0 2N/A * Fast assembler language version of the following C-program for memcmp 2N/A * which represents the `standard' for the C-library. 2N/A * memcmp(const void *s1, const void *s2, size_t n) 2N/A * if (s1 != s2 && n != 0) { 2N/A * const char *ps1 = s1; 2N/A * const char *ps2 = s2; 2N/A * if (*ps1++ != *ps2++) 2N/A * return(ps1[-1] - ps2[-1]); 2N/A * } while (--n != 0); 2N/A sub %o4, %o5, %o0 ! return(*s1 - *s2) 2N/A ! Now src1 is Double word aligned 2N/A bgeu,a %ncc, blkcmp ! do block cmp 2N/A andcc %o0, 63, %o3 ! is src1 block aligned 2N/A ! double word compare - using ldd and faligndata. Compares upto 2N/A ! 8 byte multiple count and does byte compare for the residual. 2N/A rd %fprs, %o3 ! o3 = fprs 2N/A ! if fprs.fef == 0, set it. Checking it, reqires 2 instructions. 2N/A ! So set it anyway, without checking. 2N/A wr %g0, 0x4, %fprs ! fprs.fef = 1 2N/A andn %o2, 7, %o4 ! o4 has 8 byte aligned cnt 2N/A alignaddr %o1, %g0, %g1 2N/A faligndata %d0, %d2, %d8 2N/A fcmpne32 %d6, %d8, %o5 2N/A fsrc1 %d8, %d8 ! be used for 3 cycles else we 2N/A fmovd %d2, %d0 ! create 9 bubbles in the pipeline 2N/A brnz,a,pn %i5, .remcmp 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A ! src1 is block aligned 2N/A srl %i1, 3, %l6 ! bits 3,4,5 are now least sig in %l6 2N/A andcc %l6, 7, %l6 ! mask everything except bits 1,2 3 2N/A andn %i2, 63, %i3 ! calc number of blocks 2N/A alignaddr %i1, %g0, %g0 ! gen %gsr 2N/A andn %i1, 0x3F, %l7 ! blk aligned address 2N/A andn %l2, 7, %i4 ! calc doubles left after blkcpy 2N/A be,a %ncc, 1f ! branch taken if src2 is 64-byte aligned 2N/A ldda [%l7]ASI_BLK_P, %d0 2N/A call .+8 ! get the address of this instruction in %o7 2N/A jmp %o7 + 16 ! jump to the starting ldd instruction 2N/A ldda [%l7]ASI_BLK_P, %d16 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ! switch statement to get us to the right 8 byte blk within a 2N/A! The fsrc1 instructions are to make sure that the results of the fcmpne32 2N/A! are used 3 cycles later - else spitfire adds 9 bubbles. 2N/A#define FCMPNE32_D32_D48 \ 2N/A fcmpne32 %d48, %d32, %l0 ;\ 2N/A fcmpne32 %d50, %d34, %l1 ;\ 2N/A fcmpne32 %d52, %d36, %l2 ;\ 2N/A fcmpne32 %d54, %d38, %l3 ;\ 2N/A fcmpne32 %d56, %d40, %l0 ;\ 2N/A fcmpne32 %d58, %d42, %l1 ;\ 2N/A fcmpne32 %d60, %d44, %l2 ;\ 2N/A fcmpne32 %d62, %d46, %l3 ;\ 2N/A faligndata %d0, %d2, %d48 ;\ 2N/A faligndata %d2, %d4, %d50 ;\ 2N/A faligndata %d4, %d6, %d52 ;\ 2N/A faligndata %d6, %d8, %d54 ;\ 2N/A faligndata %d8, %d10, %d56 ;\ 2N/A faligndata %d10, %d12, %d58 ;\ 2N/A faligndata %d12, %d14, %d60 ;\ 2N/A faligndata %d14, %d16, %d62 2N/A faligndata %d16, %d18, %d48 ;\ 2N/A faligndata %d18, %d20, %d50 ;\ 2N/A faligndata %d20, %d22, %d52 ;\ 2N/A faligndata %d22, %d24, %d54 ;\ 2N/A faligndata %d24, %d26, %d56 ;\ 2N/A faligndata %d26, %d28, %d58 ;\ 2N/A faligndata %d28, %d30, %d60 ;\ 2N/A faligndata %d30, %d0, %d62 2N/A ldda [%l7]ASI_BLK_P, %d0 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%l7]ASI_BLK_P, %d16 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A faligndata %d2, %d4, %d48 ;\ 2N/A faligndata %d4, %d6, %d50 ;\ 2N/A faligndata %d6, %d8, %d52 ;\ 2N/A faligndata %d8, %d10, %d54 ;\ 2N/A faligndata %d10, %d12, %d56 ;\ 2N/A faligndata %d12, %d14, %d58 ;\ 2N/A faligndata %d14, %d16, %d60 ;\ 2N/A faligndata %d16, %d18, %d62 2N/A faligndata %d18, %d20, %d48 ;\ 2N/A faligndata %d20, %d22, %d50 ;\ 2N/A faligndata %d22, %d24, %d52 ;\ 2N/A faligndata %d24, %d26, %d54 ;\ 2N/A faligndata %d26, %d28, %d56 ;\ 2N/A faligndata %d28, %d30, %d58 ;\ 2N/A faligndata %d30, %d0, %d60 ;\ 2N/A faligndata %d0, %d2, %d62 2N/A ldda [%l7]ASI_BLK_P, %d0 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%l7]ASI_BLK_P, %d16 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A faligndata %d4, %d6, %d48 ;\ 2N/A faligndata %d6, %d8, %d50 ;\ 2N/A faligndata %d8, %d10, %d52 ;\ 2N/A faligndata %d10, %d12, %d54 ;\ 2N/A faligndata %d12, %d14, %d56 ;\ 2N/A faligndata %d14, %d16, %d58 ;\ 2N/A faligndata %d16, %d18, %d60 ;\ 2N/A faligndata %d18, %d20, %d62 2N/A faligndata %d20, %d22, %d48 ;\ 2N/A faligndata %d22, %d24, %d50 ;\ 2N/A faligndata %d24, %d26, %d52 ;\ 2N/A faligndata %d26, %d28, %d54 ;\ 2N/A faligndata %d28, %d30, %d56 ;\ 2N/A faligndata %d30, %d0, %d58 ;\ 2N/A faligndata %d0, %d2, %d60 ;\ 2N/A faligndata %d2, %d4, %d62 2N/A ldda [%l7]ASI_BLK_P, %d0 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%l7]ASI_BLK_P, %d16 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A faligndata %d6, %d8, %d48 ;\ 2N/A faligndata %d8, %d10, %d50 ;\ 2N/A faligndata %d10, %d12, %d52 ;\ 2N/A faligndata %d12, %d14, %d54 ;\ 2N/A faligndata %d14, %d16, %d56 ;\ 2N/A faligndata %d16, %d18, %d58 ;\ 2N/A faligndata %d18, %d20, %d60 ;\ 2N/A faligndata %d20, %d22, %d62 2N/A faligndata %d22, %d24, %d48 ;\ 2N/A faligndata %d24, %d26, %d50 ;\ 2N/A faligndata %d26, %d28, %d52 ;\ 2N/A faligndata %d28, %d30, %d54 ;\ 2N/A faligndata %d30, %d0, %d56 ;\ 2N/A faligndata %d0, %d2, %d58 ;\ 2N/A faligndata %d2, %d4, %d60 ;\ 2N/A faligndata %d4, %d6, %d62 2N/A ldda [%l7]ASI_BLK_P, %d0 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%l7]ASI_BLK_P, %d16 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A faligndata %d8, %d10, %d48 ;\ 2N/A faligndata %d10, %d12, %d50 ;\ 2N/A faligndata %d12, %d14, %d52 ;\ 2N/A faligndata %d14, %d16, %d54 ;\ 2N/A faligndata %d16, %d18, %d56 ;\ 2N/A faligndata %d18, %d20, %d58 ;\ 2N/A faligndata %d20, %d22, %d60 ;\ 2N/A faligndata %d22, %d24, %d62 2N/A faligndata %d24, %d26, %d48 ;\ 2N/A faligndata %d26, %d28, %d50 ;\ 2N/A faligndata %d28, %d30, %d52 ;\ 2N/A faligndata %d30, %d0, %d54 ;\ 2N/A faligndata %d0, %d2, %d56 ;\ 2N/A faligndata %d2, %d4, %d58 ;\ 2N/A faligndata %d4, %d6, %d60 ;\ 2N/A faligndata %d6, %d8, %d62 2N/A ldda [%l7]ASI_BLK_P, %d0 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%l7]ASI_BLK_P, %d16 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A faligndata %d10, %d12, %d48 ;\ 2N/A faligndata %d12, %d14, %d50 ;\ 2N/A faligndata %d14, %d16, %d52 ;\ 2N/A faligndata %d16, %d18, %d54 ;\ 2N/A faligndata %d18, %d20, %d56 ;\ 2N/A faligndata %d20, %d22, %d58 ;\ 2N/A faligndata %d22, %d24, %d60 ;\ 2N/A faligndata %d24, %d26, %d62 2N/A faligndata %d26, %d28, %d48 ;\ 2N/A faligndata %d28, %d30, %d50 ;\ 2N/A faligndata %d30, %d0, %d52 ;\ 2N/A faligndata %d0, %d2, %d54 ;\ 2N/A faligndata %d2, %d4, %d56 ;\ 2N/A faligndata %d4, %d6, %d58 ;\ 2N/A faligndata %d6, %d8, %d60 ;\ 2N/A faligndata %d8, %d10, %d62 2N/A ldda [%l7]ASI_BLK_P, %d0 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%l7]ASI_BLK_P, %d16 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A faligndata %d12, %d14, %d48 ;\ 2N/A faligndata %d14, %d16, %d50 ;\ 2N/A faligndata %d16, %d18, %d52 ;\ 2N/A faligndata %d18, %d20, %d54 ;\ 2N/A faligndata %d20, %d22, %d56 ;\ 2N/A faligndata %d22, %d24, %d58 ;\ 2N/A faligndata %d24, %d26, %d60 ;\ 2N/A faligndata %d26, %d28, %d62 2N/A faligndata %d28, %d30, %d48 ;\ 2N/A faligndata %d30, %d0, %d50 ;\ 2N/A faligndata %d0, %d2, %d52 ;\ 2N/A faligndata %d2, %d4, %d54 ;\ 2N/A faligndata %d4, %d6, %d56 ;\ 2N/A faligndata %d6, %d8, %d58 ;\ 2N/A faligndata %d8, %d10, %d60 ;\ 2N/A faligndata %d10, %d12, %d62 2N/A ldda [%l7]ASI_BLK_P, %d0 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%l7]ASI_BLK_P, %d16 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A faligndata %d14, %d16, %d48 ;\ 2N/A faligndata %d16, %d18, %d50 ;\ 2N/A faligndata %d18, %d20, %d52 ;\ 2N/A faligndata %d20, %d22, %d54 ;\ 2N/A faligndata %d22, %d24, %d56 ;\ 2N/A faligndata %d24, %d26, %d58 ;\ 2N/A faligndata %d26, %d28, %d60 ;\ 2N/A faligndata %d28, %d30, %d62 2N/A faligndata %d30, %d0, %d48 ;\ 2N/A faligndata %d0, %d2, %d50 ;\ 2N/A faligndata %d2, %d4, %d52 ;\ 2N/A faligndata %d4, %d6, %d54 ;\ 2N/A faligndata %d6, %d8, %d56 ;\ 2N/A faligndata %d8, %d10, %d58 ;\ 2N/A faligndata %d10, %d12, %d60 ;\ 2N/A faligndata %d12, %d14, %d62 2N/A ldda [%l7]ASI_BLK_P, %d0 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%l7]ASI_BLK_P, %d16 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A ldda [%i0]ASI_BLK_P, %d32 2N/A blu,a,pn %ncc, .remcmp 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A faligndata %d0, %d2, %d48 2N/A fcmpne32 %d32, %d48, %l1 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A blu,a,pn %ncc, .remcmp 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A faligndata %d2, %d4, %d48 2N/A fcmpne32 %d32, %d48, %l1 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A blu,a,pn %ncc, .remcmp 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A faligndata %d4, %d6, %d48 2N/A fcmpne32 %d32, %d48, %l1 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A blu,a,pn %ncc, .remcmp 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A faligndata %d6, %d8, %d48 2N/A fcmpne32 %d32, %d48, %l1 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A blu,a,pn %ncc, .remcmp 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A faligndata %d8, %d10, %d48 2N/A fcmpne32 %d32, %d48, %l1 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A blu,a,pn %ncc, .remcmp 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A faligndata %d10, %d12, %d48 2N/A fcmpne32 %d32, %d48, %l1 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A blu,a,pn %ncc, .remcmp 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A faligndata %d12, %d14, %d48 2N/A fcmpne32 %d32, %d48, %l1 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A blu,a,pn %ncc, .remcmp 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A blu,a,pn %ncc, .remcmp 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A faligndata %d16, %d18, %d48 2N/A fcmpne32 %d32, %d48, %l1 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A blu,a,pn %ncc, .remcmp 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A faligndata %d18, %d20, %d48 2N/A fcmpne32 %d32, %d48, %l1 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A blu,a,pn %ncc, .remcmp 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A faligndata %d20, %d22, %d48 2N/A fcmpne32 %d32, %d48, %l1 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A blu,a,pn %ncc, .remcmp 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A faligndata %d22, %d24, %d48 2N/A fcmpne32 %d32, %d48, %l1 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A blu,a,pn %ncc, .remcmp 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A faligndata %d24, %d26, %d48 2N/A fcmpne32 %d32, %d48, %l1 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A blu,a,pn %ncc, .remcmp 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A faligndata %d26, %d28, %d48 2N/A fcmpne32 %d32, %d48, %l1 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A blu,a,pn %ncc, .remcmp 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A faligndata %d28, %d30, %d48 2N/A fcmpne32 %d32, %d48, %l1 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A blu,a,pn %ncc, .remcmp 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A ! This loop handles doubles remaining that were not loaded(ldda`ed) 2N/A ! in the Block Compare loop 2N/A faligndata %d0, %d2, %d8 2N/A fcmpne32 %d32, %d8, %l1 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A bgeu,pt %ncc, blkleft 2N/A sub %i1, %i0, %i1 ! i1 gets the difference 2N/A6: ldub [%i0 + %i1], %i5 ! byte compare loop 2N/A and %l5, 0x4, %l5 ! fprs.du = fprs.dl = 0 2N/A wr %l5, %g0, %fprs ! fprs = l5 - restore fprs 2N/A membar #StoreLoad|#StoreStore 2N/A restore %g0, %g0, %o0 2N/A and %l5, 0x4, %l5 ! fprs.du = fprs.dl = 0 2N/A wr %l5, %g0, %fprs ! fprs = l5 - restore fprs 2N/A membar #StoreLoad|#StoreStore 2N/A sub %i4, %i5, %i0 ! return(*s1 - *s2)