/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2009, Intel Corporation
* All rights reserved.
*/
/*
* str[n]cmp - compare chars between two string
*/
#include "SYS.h"
#include "proc64_id.h"
#define LABEL(s) .strcmp/**/s
#ifdef USE_AS_STRNCMP
/*
* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
* if the new counter > the old one or is 0.
*/
#define UPDATE_STRNCMP_COUNTER \
/* calculate left number to compare */ \
lea -16(%rcx, %r11), %r9; \
cmp %r9, %r11; \
jb LABEL(strcmp_exitz); \
test %r9, %r9; \
je LABEL(strcmp_exitz); \
mov %r9, %r11
#else
#define UPDATE_STRNCMP_COUNTER
#endif
/*
* This implementation uses SSE to compare up to 16 bytes at a time.
*/
#ifdef USE_AS_STRNCMP
ENTRY(strncmp)
test %rdx, %rdx
je LABEL(strcmp_exitz)
mov %rdx, %r11
#else
ENTRY(strcmp) /* (const char *, const char *) */
#endif
mov %esi, %ecx
mov %edi, %eax
and $0x3f, %rcx /* rsi alignment in cache line */
and $0x3f, %rax /* rdi alignment in cache line */
cmp $0x30, %ecx
ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
cmp $0x30, %eax
ja LABEL(crosscache) /* rdi: 16-byte load will cross cache line */
movlpd (%rdi), %xmm1
movlpd (%rsi), %xmm2
movhpd 8(%rdi), %xmm1
movhpd 8(%rsi), %xmm2
pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
jnz LABEL(less16bytes) /* If not, found mismatch or null char */
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz) /* finish comparision */
#endif
add $16, %rsi /* prepare to search next 16 bytes */
add $16, %rdi /* prepare to search next 16 bytes */
/*
* Determine rdi and rsi string offsets from 16-byte alignment.
* Use relative offset difference between the two to determine which case
* below to use.
*/
.p2align 4
LABEL(crosscache):
and $0xfffffffffffffff0, %rsi /* force %rsi to be 16 byte aligned */
and $0xfffffffffffffff0, %rdi /* force %rdi to be 16 byte aligned */
mov $0xffff, %edx /* for equivalent offset */
xor %r8d, %r8d
and $0xf, %ecx /* offset of rsi */
and $0xf, %eax /* offset of rdi */
cmp %eax, %ecx
je LABEL(ashr_0) /* both strings have the same alignment */
ja LABEL(bigger)
mov %edx, %r8d /* r8d is offset flag for exit tail */
xchg %ecx, %eax
xchg %rsi, %rdi
LABEL(bigger):
mov %rcx, %r9
sub %rax, %r9
lea LABEL(unaligned_table)(%rip), %r10
movslq (%r10, %r9, 4), %r9
lea (%r10, %r9), %r10
jmp *%r10 /* jump to corresponding case */
/*
* ashr_0 handles the following cases:
* str1 offset = str2 offset
*/
.p2align 4
LABEL(ashr_0):
movdqa (%rsi), %xmm1
pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %r9d
shr %cl, %edx /* adjust 0xffff for offset */
shr %cl, %r9d /* adjust for 16-byte offset */
sub %r9d, %edx
/*
* edx must be the same with r9d if in left byte (16-rcx) is equal to
* the start from (16-rax) and no null char was seen.
*/
jne LABEL(less32bytes) /* mismatch or null char */
UPDATE_STRNCMP_COUNTER
mov $16, %rcx
mov $16, %r9
pxor %xmm0, %xmm0 /* clear xmm0, may have changed above */
/*
* Now both strings are aligned at 16-byte boundary. Loop over strings
* checking 32-bytes per iteration.
*/
.p2align 4
LABEL(loop_ashr_0):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit) /* mismatch or null char seen */
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
jmp LABEL(loop_ashr_0)
/*
* ashr_1 handles the following cases:
* abs(str1 offset - str2 offset) = 15
*/
.p2align 4
LABEL(ashr_1):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
pslldq $15, %xmm2 /* shift first string to align with second */
pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
pmovmskb %xmm2, %r9d
shr %cl, %edx /* adjust 0xffff for offset */
shr %cl, %r9d /* adjust for 16-byte offset */
sub %r9d, %edx
jnz LABEL(less32bytes) /* mismatch or null char seen */
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $1, %r9d /* rdi bytes already examined. Used in exit code */
/*
* Setup %r10 value allows us to detect crossing a page boundary.
* When %r10 goes positive we are crossing a page boundary and
* need to do a nibble.
*/
lea 1(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
movdqa %xmm3, %xmm4
.p2align 4
LABEL(loop_ashr_1):
add $16, %r10
jg LABEL(nibble_ashr_1) /* cross page boundary */
LABEL(gobble_ashr_1):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4 /* store for next cycle */
psrldq $1, %xmm3
pslldq $15, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
add $16, %r10
jg LABEL(nibble_ashr_1) /* cross page boundary */
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4 /* store for next cycle */
psrldq $1, %xmm3
pslldq $15, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
jmp LABEL(loop_ashr_1)
/*
* Nibble avoids loads across page boundary. This is to avoid a potential
* access into unmapped memory.
*/
.p2align 4
LABEL(nibble_ashr_1):
psrldq $1, %xmm4
movdqa (%rsi, %rcx), %xmm1
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm4, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0x7fff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
cmp $15, %r11
jbe LABEL(strcmp_exitz)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* subtract 4K from %r10 */
jmp LABEL(gobble_ashr_1)
/*
* ashr_2 handles the following cases:
* abs(str1 offset - str2 offset) = 14
*/
.p2align 4
LABEL(ashr_2):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $14, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
jnz LABEL(less32bytes)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $2, %r9d /* rdi bytes already examined. Used in exit code */
/*
* Setup %r10 value allows us to detect crossing a page boundary.
* When %r10 goes positive we are crossing a page boundary and
* need to do a nibble.
*/
lea 2(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
movdqa %xmm3, %xmm4
.p2align 4
LABEL(loop_ashr_2):
add $16, %r10
jg LABEL(nibble_ashr_2)
LABEL(gobble_ashr_2):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $2, %xmm3
pslldq $14, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
add $16, %r10
jg LABEL(nibble_ashr_2) /* cross page boundary */
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $2, %xmm3
pslldq $14, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
jmp LABEL(loop_ashr_2)
.p2align 4
LABEL(nibble_ashr_2):
psrldq $2, %xmm4
movdqa (%rsi, %rcx), %xmm1
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm4, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0x3fff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
cmp $14, %r11
jbe LABEL(strcmp_exitz)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* subtract 4K from %r10 */
jmp LABEL(gobble_ashr_2)
/*
* ashr_3 handles the following cases:
* abs(str1 offset - str2 offset) = 13
*/
.p2align 4
LABEL(ashr_3):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $13, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
jnz LABEL(less32bytes)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $3, %r9d /* rdi bytes already examined. Used in exit code */
/*
* Setup %r10 value allows us to detect crossing a page boundary.
* When %r10 goes positive we are crossing a page boundary and
* need to do a nibble.
*/
lea 3(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
movdqa %xmm3, %xmm4
.p2align 4
LABEL(loop_ashr_3):
add $16, %r10
jg LABEL(nibble_ashr_3)
LABEL(gobble_ashr_3):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $3, %xmm3
pslldq $13, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
add $16, %r10
jg LABEL(nibble_ashr_3) /* cross page boundary */
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $3, %xmm3
pslldq $13, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
jmp LABEL(loop_ashr_3)
.p2align 4
LABEL(nibble_ashr_3):
psrldq $3, %xmm4
movdqa (%rsi, %rcx), %xmm1
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm4, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0x1fff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
cmp $13, %r11
jbe LABEL(strcmp_exitz)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* subtract 4K from %r10 */
jmp LABEL(gobble_ashr_3)
/*
* ashr_4 handles the following cases:
* abs(str1 offset - str2 offset) = 12
*/
.p2align 4
LABEL(ashr_4):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $12, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
jnz LABEL(less32bytes)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $4, %r9d /* rdi bytes already examined. Used in exit code */
/*
* Setup %r10 value allows us to detect crossing a page boundary.
* When %r10 goes positive we are crossing a page boundary and
* need to do a nibble.
*/
lea 4(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
movdqa %xmm3, %xmm4
.p2align 4
LABEL(loop_ashr_4):
add $16, %r10
jg LABEL(nibble_ashr_4)
LABEL(gobble_ashr_4):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $4, %xmm3
pslldq $12, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
add $16, %r10
jg LABEL(nibble_ashr_4) /* cross page boundary */
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $4, %xmm3
pslldq $12, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
jmp LABEL(loop_ashr_4)
.p2align 4
LABEL(nibble_ashr_4):
psrldq $4, %xmm4
movdqa (%rsi, %rcx), %xmm1
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm4, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0x0fff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
cmp $12, %r11
jbe LABEL(strcmp_exitz)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* subtract 4K from %r10 */
jmp LABEL(gobble_ashr_4)
/*
* ashr_5 handles the following cases:
* abs(str1 offset - str2 offset) = 11
*/
.p2align 4
LABEL(ashr_5):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $11, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
jnz LABEL(less32bytes)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $5, %r9d /* rdi bytes already examined. Used in exit code */
/*
* Setup %r10 value allows us to detect crossing a page boundary.
* When %r10 goes positive we are crossing a page boundary and
* need to do a nibble.
*/
lea 5(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
movdqa %xmm3, %xmm4
.p2align 4
LABEL(loop_ashr_5):
add $16, %r10
jg LABEL(nibble_ashr_5)
LABEL(gobble_ashr_5):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $5, %xmm3
pslldq $11, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
add $16, %r10
jg LABEL(nibble_ashr_5) /* cross page boundary */
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $5, %xmm3
pslldq $11, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
jmp LABEL(loop_ashr_5)
.p2align 4
LABEL(nibble_ashr_5):
psrldq $5, %xmm4
movdqa (%rsi, %rcx), %xmm1
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm4, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0x07ff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
cmp $11, %r11
jbe LABEL(strcmp_exitz)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* subtract 4K from %r10 */
jmp LABEL(gobble_ashr_5)
/*
* ashr_6 handles the following cases:
* abs(str1 offset - str2 offset) = 10
*/
.p2align 4
LABEL(ashr_6):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $10, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
jnz LABEL(less32bytes)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $6, %r9d /* rdi bytes already examined. Used in exit code */
/*
* Setup %r10 value allows us to detect crossing a page boundary.
* When %r10 goes positive we are crossing a page boundary and
* need to do a nibble.
*/
lea 6(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
movdqa %xmm3, %xmm4
.p2align 4
LABEL(loop_ashr_6):
add $16, %r10
jg LABEL(nibble_ashr_6)
LABEL(gobble_ashr_6):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $6, %xmm3
pslldq $10, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
add $16, %r10
jg LABEL(nibble_ashr_6) /* cross page boundary */
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $6, %xmm3
pslldq $10, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
jmp LABEL(loop_ashr_6)
.p2align 4
LABEL(nibble_ashr_6):
psrldq $6, %xmm4
movdqa (%rsi, %rcx), %xmm1
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm4, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0x03ff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
cmp $10, %r11
jbe LABEL(strcmp_exitz)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* subtract 4K from %r10 */
jmp LABEL(gobble_ashr_6)
/*
* ashr_7 handles the following cases:
* abs(str1 offset - str2 offset) = 9
*/
.p2align 4
LABEL(ashr_7):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $9, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
jnz LABEL(less32bytes)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $7, %r9d /* rdi bytes already examined. Used in exit code */
/*
* Setup %r10 value allows us to detect crossing a page boundary.
* When %r10 goes positive we are crossing a page boundary and
* need to do a nibble.
*/
lea 7(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
movdqa %xmm3, %xmm4
.p2align 4
LABEL(loop_ashr_7):
add $16, %r10
jg LABEL(nibble_ashr_7)
LABEL(gobble_ashr_7):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $7, %xmm3
pslldq $9, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
add $16, %r10
jg LABEL(nibble_ashr_7) /* cross page boundary */
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $7, %xmm3
pslldq $9, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
jmp LABEL(loop_ashr_7)
.p2align 4
LABEL(nibble_ashr_7):
psrldq $7, %xmm4
movdqa (%rsi, %rcx), %xmm1
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm4, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0x01ff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
cmp $9, %r11
jbe LABEL(strcmp_exitz)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* subtract 4K from %r10 */
jmp LABEL(gobble_ashr_7)
/*
* ashr_8 handles the following cases:
* abs(str1 offset - str2 offset) = 8
*/
.p2align 4
LABEL(ashr_8):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $8, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
jnz LABEL(less32bytes)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $8, %r9d /* rdi bytes already examined. Used in exit code */
/*
* Setup %r10 value allows us to detect crossing a page boundary.
* When %r10 goes positive we are crossing a page boundary and
* need to do a nibble.
*/
lea 8(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
movdqa %xmm3, %xmm4
.p2align 4
LABEL(loop_ashr_8):
add $16, %r10
jg LABEL(nibble_ashr_8)
LABEL(gobble_ashr_8):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $8, %xmm3
pslldq $8, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
add $16, %r10
jg LABEL(nibble_ashr_8) /* cross page boundary */
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $8, %xmm3
pslldq $8, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
jmp LABEL(loop_ashr_8)
.p2align 4
LABEL(nibble_ashr_8):
psrldq $8, %xmm4
movdqa (%rsi, %rcx), %xmm1
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm4, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0x00ff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
cmp $8, %r11
jbe LABEL(strcmp_exitz)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* subtract 4K from %r10 */
jmp LABEL(gobble_ashr_8)
/*
* ashr_9 handles the following cases:
* abs(str1 offset - str2 offset) = 7
*/
.p2align 4
LABEL(ashr_9):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $7, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
jnz LABEL(less32bytes)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $9, %r9d /* rdi bytes already examined. Used in exit code */
/*
* Setup %r10 value allows us to detect crossing a page boundary.
* When %r10 goes positive we are crossing a page boundary and
* need to do a nibble.
*/
lea 9(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
movdqa %xmm3, %xmm4
.p2align 4
LABEL(loop_ashr_9):
add $16, %r10
jg LABEL(nibble_ashr_9)
LABEL(gobble_ashr_9):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $9, %xmm3
pslldq $7, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
add $16, %r10
jg LABEL(nibble_ashr_9) /* cross page boundary */
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $9, %xmm3
pslldq $7, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3 /* store for next cycle */
jmp LABEL(loop_ashr_9)
.p2align 4
LABEL(nibble_ashr_9):
psrldq $9, %xmm4
movdqa (%rsi, %rcx), %xmm1
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm4, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0x007f, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
cmp $7, %r11
jbe LABEL(strcmp_exitz)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* subtract 4K from %r10 */
jmp LABEL(gobble_ashr_9)
/*
* ashr_10 handles the following cases:
* abs(str1 offset - str2 offset) = 6
*/
.p2align 4
LABEL(ashr_10):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $6, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
jnz LABEL(less32bytes)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $10, %r9d /* rdi bytes already examined. Used in exit code */
/*
* Setup %r10 value allows us to detect crossing a page boundary.
* When %r10 goes positive we are crossing a page boundary and
* need to do a nibble.
*/
lea 10(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
movdqa %xmm3, %xmm4
.p2align 4
LABEL(loop_ashr_10):
add $16, %r10
jg LABEL(nibble_ashr_10)
LABEL(gobble_ashr_10):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $10, %xmm3
pslldq $6, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
add $16, %r10
jg LABEL(nibble_ashr_10) /* cross page boundary */
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $10, %xmm3
pslldq $6, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
jmp LABEL(loop_ashr_10)
.p2align 4
LABEL(nibble_ashr_10):
psrldq $10, %xmm4
movdqa (%rsi, %rcx), %xmm1
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm4, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0x003f, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
cmp $6, %r11
jbe LABEL(strcmp_exitz)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* subtract 4K from %r10 */
jmp LABEL(gobble_ashr_10)
/*
* ashr_11 handles the following cases:
* abs(str1 offset - str2 offset) = 5
*/
.p2align 4
LABEL(ashr_11):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $5, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
jnz LABEL(less32bytes)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $11, %r9d /* rdi bytes already examined. Used in exit code */
/*
* Setup %r10 value allows us to detect crossing a page boundary.
* When %r10 goes positive we are crossing a page boundary and
* need to do a nibble.
*/
lea 11(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
movdqa %xmm3, %xmm4
.p2align 4
LABEL(loop_ashr_11):
add $16, %r10
jg LABEL(nibble_ashr_11)
LABEL(gobble_ashr_11):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $11, %xmm3
pslldq $5, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
add $16, %r10
jg LABEL(nibble_ashr_11) /* cross page boundary */
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $11, %xmm3
pslldq $5, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
jmp LABEL(loop_ashr_11)
.p2align 4
LABEL(nibble_ashr_11):
psrldq $11, %xmm4
movdqa (%rsi, %rcx), %xmm1
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm4, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0x001f, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
cmp $5, %r11
jbe LABEL(strcmp_exitz)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* subtract 4K from %r10 */
jmp LABEL(gobble_ashr_11)
/*
* ashr_12 handles the following cases:
* abs(str1 offset - str2 offset) = 4
*/
.p2align 4
LABEL(ashr_12):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $4, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
jnz LABEL(less32bytes)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $12, %r9d /* rdi bytes already examined. Used in exit code */
/*
* Setup %r10 value allows us to detect crossing a page boundary.
* When %r10 goes positive we are crossing a page boundary and
* need to do a nibble.
*/
lea 12(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
movdqa %xmm3, %xmm4
.p2align 4
LABEL(loop_ashr_12):
add $16, %r10
jg LABEL(nibble_ashr_12)
LABEL(gobble_ashr_12):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $12, %xmm3
pslldq $4, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
add $16, %r10
jg LABEL(nibble_ashr_12) /* cross page boundary */
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $12, %xmm3
pslldq $4, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
jmp LABEL(loop_ashr_12)
.p2align 4
LABEL(nibble_ashr_12):
psrldq $12, %xmm4
movdqa (%rsi, %rcx), %xmm1
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm4, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0x000f, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
cmp $4, %r11
jbe LABEL(strcmp_exitz)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* subtract 4K from %r10 */
jmp LABEL(gobble_ashr_12)
/*
* ashr_13 handles the following cases:
* abs(str1 offset - str2 offset) = 3
*/
.p2align 4
LABEL(ashr_13):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $3, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
jnz LABEL(less32bytes)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $13, %r9d /* rdi bytes already examined. Used in exit code */
/*
* Setup %r10 value allows us to detect crossing a page boundary.
* When %r10 goes positive we are crossing a page boundary and
* need to do a nibble.
*/
lea 13(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
movdqa %xmm3, %xmm4
.p2align 4
LABEL(loop_ashr_13):
add $16, %r10
jg LABEL(nibble_ashr_13)
LABEL(gobble_ashr_13):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $13, %xmm3
pslldq $3, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
add $16, %r10
jg LABEL(nibble_ashr_13) /* cross page boundary */
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $13, %xmm3
pslldq $3, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
jmp LABEL(loop_ashr_13)
.p2align 4
LABEL(nibble_ashr_13):
psrldq $13, %xmm4
movdqa (%rsi, %rcx), %xmm1
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm4, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0x0007, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
cmp $3, %r11
jbe LABEL(strcmp_exitz)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* subtract 4K from %r10 */
jmp LABEL(gobble_ashr_13)
/*
* ashr_14 handles the following cases:
* abs(str1 offset - str2 offset) = 2
*/
.p2align 4
LABEL(ashr_14):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $2, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
jnz LABEL(less32bytes)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $14, %r9d /* rdi bytes already examined. Used in exit code */
/*
* Setup %r10 value allows us to detect crossing a page boundary.
* When %r10 goes positive we are crossing a page boundary and
* need to do a nibble.
*/
lea 14(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
movdqa %xmm3, %xmm4
.p2align 4
LABEL(loop_ashr_14):
add $16, %r10
jg LABEL(nibble_ashr_14)
LABEL(gobble_ashr_14):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $14, %xmm3
pslldq $2, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
add $16, %r10
jg LABEL(nibble_ashr_14) /* cross page boundary */
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $14, %xmm3
pslldq $2, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
jmp LABEL(loop_ashr_14)
.p2align 4
LABEL(nibble_ashr_14):
psrldq $14, %xmm4
movdqa (%rsi, %rcx), %xmm1
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm4, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0x0003, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
cmp $2, %r11
jbe LABEL(strcmp_exitz)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* subtract 4K from %r10 */
jmp LABEL(gobble_ashr_14)
/*
* ashr_15 handles the following cases:
* abs(str1 offset - str2 offset) = 1
*/
.p2align 4
LABEL(ashr_15):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $1, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
jnz LABEL(less32bytes)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $15, %r9d /* rdi bytes already examined. Used in exit code */
/*
* Setup %r10 value allows us to detect crossing a page boundary.
* When %r10 goes positive we are crossing a page boundary and
* need to do a nibble.
*/
lea 15(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
movdqa %xmm3, %xmm4
.p2align 4
LABEL(loop_ashr_15):
add $16, %r10
jg LABEL(nibble_ashr_15)
LABEL(gobble_ashr_15):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $15, %xmm3
pslldq $1, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
add $16, %r10
jg LABEL(nibble_ashr_15) /* cross page boundary */
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
psrldq $15, %xmm3
pslldq $1, %xmm2
por %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
jbe LABEL(strcmp_exitz)
#endif
add $16, %rcx
movdqa %xmm4, %xmm3
jmp LABEL(loop_ashr_15)
.p2align 4
LABEL(nibble_ashr_15):
psrldq $15, %xmm4
movdqa (%rsi, %rcx), %xmm1
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm4, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
sub $0x0001, %edx
jnz LABEL(exit)
#ifdef USE_AS_STRNCMP
cmp $1, %r11
jbe LABEL(strcmp_exitz)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* subtract 4K from %r10 */
jmp LABEL(gobble_ashr_15)
.p2align 4
LABEL(exit):
lea -16(%r9, %rcx), %rax /* locate the exact offset for rdi */
LABEL(less32bytes):
lea (%rdi, %rax), %rdi /* locate the exact address for first operand(rdi) */
lea (%rsi, %rcx), %rsi /* locate the exact address for second operand(rsi) */
test %r8d, %r8d
jz LABEL(ret)
xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
.p2align 4
LABEL(ret):
LABEL(less16bytes):
/*
* Check to see if BSF is fast on this processor. If not, use a different
* exit tail.
*/
testl $USE_BSF,.memops_method(%rip)
jz LABEL(AMD_exit)
bsf %rdx, %rdx /* find and store bit index in %rdx */
#ifdef USE_AS_STRNCMP
sub %rdx, %r11
jbe LABEL(strcmp_exitz)
#endif
xor %ecx, %ecx /* clear %ecx */
xor %eax, %eax /* clear %eax */
movb (%rsi, %rdx), %cl
movb (%rdi, %rdx), %al
sub %ecx, %eax
ret
#ifdef USE_AS_STRNCMP
LABEL(strcmp_exitz):
xor %eax, %eax
ret
#endif
/*
* This exit tail does not use the bsf instruction.
*/
.p2align 4
LABEL(AMD_exit):
test %dl, %dl
jz LABEL(next_8_bytes)
test $0x01, %dl
jnz LABEL(Byte0)
test $0x02, %dl
jnz LABEL(Byte1)
test $0x04, %dl
jnz LABEL(Byte2)
test $0x08, %dl
jnz LABEL(Byte3)
test $0x10, %dl
jnz LABEL(Byte4)
test $0x20, %dl
jnz LABEL(Byte5)
test $0x40, %dl
jnz LABEL(Byte6)
#ifdef USE_AS_STRNCMP
sub $7, %r11
jbe LABEL(strcmp_exitz)
#endif
movzx 7(%rsi), %ecx
movzx 7(%rdi), %eax
sub %ecx, %eax
ret
.p2align 4
LABEL(Byte0):
/*
* never need to handle byte 0 for strncmpy
#ifdef USE_AS_STRNCMP
sub $0, %r11
jbe LABEL(strcmp_exitz)
#endif
*/
movzx (%rsi), %ecx
movzx (%rdi), %eax
sub %ecx, %eax
ret
.p2align 4
LABEL(Byte1):
#ifdef USE_AS_STRNCMP
sub $1, %r11
jbe LABEL(strcmp_exitz)
#endif
movzx 1(%rsi), %ecx
movzx 1(%rdi), %eax
sub %ecx, %eax
ret
.p2align 4
LABEL(Byte2):
#ifdef USE_AS_STRNCMP
sub $2, %r11
jbe LABEL(strcmp_exitz)
#endif
movzx 2(%rsi), %ecx
movzx 2(%rdi), %eax
sub %ecx, %eax
ret
.p2align 4
LABEL(Byte3):
#ifdef USE_AS_STRNCMP
sub $3, %r11
jbe LABEL(strcmp_exitz)
#endif
movzx 3(%rsi), %ecx
movzx 3(%rdi), %eax
sub %ecx, %eax
ret
.p2align 4
LABEL(Byte4):
#ifdef USE_AS_STRNCMP
sub $4, %r11
jbe LABEL(strcmp_exitz)
#endif
movzx 4(%rsi), %ecx
movzx 4(%rdi), %eax
sub %ecx, %eax
ret
.p2align 4
LABEL(Byte5):
#ifdef USE_AS_STRNCMP
sub $5, %r11
jbe LABEL(strcmp_exitz)
#endif
movzx 5(%rsi), %ecx
movzx 5(%rdi), %eax
sub %ecx, %eax
ret
.p2align 4
LABEL(Byte6):
#ifdef USE_AS_STRNCMP
sub $6, %r11
jbe LABEL(strcmp_exitz)
#endif
movzx 6(%rsi), %ecx
movzx 6(%rdi), %eax
sub %ecx, %eax
ret
.p2align 4
LABEL(next_8_bytes):
add $8, %rdi
add $8, %rsi
#ifdef USE_AS_STRNCMP
sub $8, %r11
jbe LABEL(strcmp_exitz)
#endif
test $0x01, %dh
jnz LABEL(Byte0)
test $0x02, %dh
jnz LABEL(Byte1)
test $0x04, %dh
jnz LABEL(Byte2)
test $0x08, %dh
jnz LABEL(Byte3)
test $0x10, %dh
jnz LABEL(Byte4)
test $0x20, %dh
jnz LABEL(Byte5)
test $0x40, %dh
jnz LABEL(Byte6)
#ifdef USE_AS_STRNCMP
sub $7, %r11
jbe LABEL(strcmp_exitz)
#endif
movzx 7(%rsi), %ecx
movzx 7(%rdi), %eax
sub %ecx, %eax
ret
.pushsection .rodata
.p2align 4
LABEL(unaligned_table):
.int LABEL(ashr_0) - LABEL(unaligned_table)
.int LABEL(ashr_15) - LABEL(unaligned_table)
.int LABEL(ashr_14) - LABEL(unaligned_table)
.int LABEL(ashr_13) - LABEL(unaligned_table)
.int LABEL(ashr_12) - LABEL(unaligned_table)
.int LABEL(ashr_11) - LABEL(unaligned_table)
.int LABEL(ashr_10) - LABEL(unaligned_table)
.int LABEL(ashr_9) - LABEL(unaligned_table)
.int LABEL(ashr_8) - LABEL(unaligned_table)
.int LABEL(ashr_7) - LABEL(unaligned_table)
.int LABEL(ashr_6) - LABEL(unaligned_table)
.int LABEL(ashr_5) - LABEL(unaligned_table)
.int LABEL(ashr_4) - LABEL(unaligned_table)
.int LABEL(ashr_3) - LABEL(unaligned_table)
.int LABEL(ashr_2) - LABEL(unaligned_table)
.int LABEL(ashr_1) - LABEL(unaligned_table)
.popsection
#ifdef USE_AS_STRNCMP
SET_SIZE(strncmp)
#else
SET_SIZE(strcmp) /* (const char *, const char *) */
#endif