2N/A/*
2N/A * CDDL HEADER START
2N/A *
2N/A * The contents of this file are subject to the terms of the
2N/A * Common Development and Distribution License (the "License").
2N/A * You may not use this file except in compliance with the License.
2N/A *
2N/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
2N/A * or http://www.opensolaris.org/os/licensing.
2N/A * See the License for the specific language governing permissions
2N/A * and limitations under the License.
2N/A *
2N/A * When distributing Covered Code, include this CDDL HEADER in each
2N/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
2N/A * If applicable, add the following below this CDDL HEADER, with the
2N/A * fields enclosed by brackets "[]" replaced with your own identifying
2N/A * information: Portions Copyright [yyyy] [name of copyright owner]
2N/A *
2N/A * CDDL HEADER END
2N/A */
2N/A
2N/A/*
2N/A * Copyright (c) 2009, Intel Corporation
2N/A * All rights reserved.
2N/A */
2N/A
2N/A/*
2N/A * strlen - calculate the length of string
2N/A */
2N/A
2N/A#include "SYS.h"
2N/A#include "proc64_id.h"
2N/A
2N/A#define LABEL(s) .strlen/**/s
2N/A
2N/A /*
2N/A * This implementation uses SSE instructions to compare up to 16 bytes
2N/A * at a time looking for the end of string (null char).
2N/A */
2N/A ENTRY(strlen) /* (const char *s) */
2N/A mov %rdi, %rsi /* keep original %rdi value */
2N/A mov %rsi, %rcx
2N/A pxor %xmm0, %xmm0 /* 16 null chars */
2N/A and $15, %rcx
2N/A jz LABEL(align16_loop) /* string is 16 byte aligned */
2N/A
2N/A /*
2N/A * Unaligned case. Round down to 16-byte boundary before comparing
2N/A * 16 bytes for a null char. The code then compensates for any extra chars
2N/A * preceding the start of the string.
2N/A */
2N/ALABEL(unalign16):
2N/A and $0xfffffffffffffff0, %rsi
2N/A
2N/A pcmpeqb (%rsi), %xmm0
2N/A lea 16(%rdi), %rsi
2N/A pmovmskb %xmm0, %edx
2N/A
2N/A shr %cl, %edx /* Compensate for bytes preceding the string */
2N/A test %edx, %edx
2N/A jnz LABEL(exit)
2N/A sub %rcx, %rsi /* no null, adjust to next 16-byte boundary */
2N/A pxor %xmm0, %xmm0 /* clear xmm0, may have been changed... */
2N/A
2N/A .p2align 4
2N/ALABEL(align16_loop): /* 16 byte aligned */
2N/A pcmpeqb (%rsi), %xmm0 /* look for null bytes */
2N/A pmovmskb %xmm0, %edx /* move each byte mask of %xmm0 to edx */
2N/A
2N/A add $16, %rsi /* prepare to search next 16 bytes */
2N/A test %edx, %edx /* if no null byte, %edx must be 0 */
2N/A jnz LABEL(exit) /* found a null */
2N/A
2N/A pcmpeqb (%rsi), %xmm0
2N/A pmovmskb %xmm0, %edx
2N/A add $16, %rsi
2N/A test %edx, %edx
2N/A jnz LABEL(exit)
2N/A
2N/A pcmpeqb (%rsi), %xmm0
2N/A pmovmskb %xmm0, %edx
2N/A add $16, %rsi
2N/A test %edx, %edx
2N/A jnz LABEL(exit)
2N/A
2N/A pcmpeqb (%rsi), %xmm0
2N/A pmovmskb %xmm0, %edx
2N/A add $16, %rsi
2N/A test %edx, %edx
2N/A jz LABEL(align16_loop)
2N/A
2N/A .p2align 4
2N/ALABEL(exit):
2N/A neg %rdi
2N/A /*
2N/A * Check to see if BSF is fast on this processor. If not, use a different
2N/A * exit tail to find first bit set indicating null byte match.
2N/A */
2N/A testl $USE_BSF, .memops_method(%rip)
2N/A jz LABEL(AMD_exit)
2N/A
2N/A lea -16(%rdi, %rsi), %rax /* calculate exact offset */
2N/A bsf %edx, %ecx /* Least significant 1 bit is index of null */
2N/A lea (%rax, %rcx),%rax
2N/A ret
2N/A
2N/A /*
2N/A * This exit tail does not use the bsf instruction.
2N/A */
2N/A .p2align 4
2N/ALABEL(AMD_exit):
2N/A lea -16(%rdi, %rsi), %rax
2N/A test %dl, %dl
2N/A jz LABEL(exit_high)
2N/A test $0x01, %dl
2N/A jnz LABEL(exit_tail0)
2N/A
2N/A test $0x02, %dl
2N/A jnz LABEL(exit_tail1)
2N/A
2N/A .p2align 4
2N/A test $0x04, %dl
2N/A jnz LABEL(exit_tail2)
2N/A
2N/A test $0x08, %dl
2N/A jnz LABEL(exit_tail3)
2N/A
2N/A test $0x10, %dl
2N/A jnz LABEL(exit_tail4)
2N/A
2N/A test $0x20, %dl
2N/A jnz LABEL(exit_tail5)
2N/A
2N/A test $0x40, %dl
2N/A jnz LABEL(exit_tail6)
2N/A add $7, %rax
2N/A ret
2N/A
2N/A .p2align 4
2N/ALABEL(exit_high):
2N/A add $8, %rax
2N/A test $0x01, %dh
2N/A jnz LABEL(exit_tail0)
2N/A
2N/A test $0x02, %dh
2N/A jnz LABEL(exit_tail1)
2N/A
2N/A test $0x04, %dh
2N/A jnz LABEL(exit_tail2)
2N/A
2N/A test $0x08, %dh
2N/A jnz LABEL(exit_tail3)
2N/A
2N/A test $0x10, %dh
2N/A jnz LABEL(exit_tail4)
2N/A
2N/A test $0x20, %dh
2N/A jnz LABEL(exit_tail5)
2N/A
2N/A test $0x40, %dh
2N/A jnz LABEL(exit_tail6)
2N/A add $7, %rax
2N/A ret
2N/A
2N/A .p2align 4
2N/ALABEL(exit_tail0):
2N/A xor %ecx, %ecx
2N/A ret
2N/A
2N/A .p2align 4
2N/ALABEL(exit_tail1):
2N/A add $1, %rax
2N/A ret
2N/A
2N/A .p2align 4
2N/ALABEL(exit_tail2):
2N/A add $2, %rax
2N/A ret
2N/A
2N/A .p2align 4
2N/ALABEL(exit_tail3):
2N/A add $3, %rax
2N/A ret
2N/A
2N/A .p2align 4
2N/ALABEL(exit_tail4):
2N/A add $4, %rax
2N/A ret
2N/A
2N/A .p2align 4
2N/ALABEL(exit_tail5):
2N/A add $5, %rax
2N/A ret
2N/A
2N/A .p2align 4
2N/ALABEL(exit_tail6):
2N/A add $6, %rax
2N/A ret
2N/A SET_SIZE(strlen)