/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/*
* The ascii_strcasecmp() function is a case insensitive versions of strcmp().
* It assumes the ASCII character set and ignores differences in case
* when comparing lower and upper case characters. In other words, it
* behaves as if both strings had been converted to lower case using
* tolower() in the "C" locale on each byte, and the results had then
* been compared using strcmp().
*
* The assembly code below is an optimized version of the following C
* reference:
*
* static const char charmap[] = {
* '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
* '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
* '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
* '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
* '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
* '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
* '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
* '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
* '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
* '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
* '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
* '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
* '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
* '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
* '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
* '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
* '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
* '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
* '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
* '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
* '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
* '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
* '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
* '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
* '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
* '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
* '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
* '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
* '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
* '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
* '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
* '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
* };
*
* int
* ascii_strcasecmp(const char *s1, const char *s2)
* {
* const unsigned char *cm = (const unsigned char *)charmap;
* const unsigned char *us1 = (const unsigned char *)s1;
* const unsigned char *us2 = (const unsigned char *)s2;
*
* while (cm[*us1] == cm[*us2++])
* if (*us1++ == '\0')
* return (0);
* return (cm[*us1] - cm[*(us2 - 1)]);
* }
*
* The following algorithm, from a 1987 news posting by Alan Mycroft, is
* used for finding null bytes in a word:
*
* #define has_null(word) ((word - 0x01010101) & (~word & 0x80808080))
*
* The following algorithm is used for a wordwise tolower() operation:
*
* unsigned int
* parallel_tolower (unsigned int x)
* {
* unsigned int p;
* unsigned int q;
*
* unsigned int m1 = 0x80808080;
* unsigned int m2 = 0x3f3f3f3f;
* unsigned int m3 = 0x25252525;
*
* q = x & ~m1;// newb = byte & 0x7F
* p = q + m2; // newb > 0x5A --> MSB set
* q = q + m3; // newb < 0x41 --> MSB clear
* p = p & ~q; // newb > 0x40 && newb < 0x5B --> MSB set
* q = m1 & ~x;// byte < 0x80 --> 0x80
* q = p & q; // newb > 0x40 && newb < 0x5B && byte < 0x80 -> 0x80,else 0
* q = q >> 2; // newb > 0x40 && newb < 0x5B && byte < 0x80 -> 0x20,else 0
* return (x + q); // translate uppercase characters to lowercase
* }
*
* Both algorithms have been tested exhaustively for all possible 2^32 inputs.
*/
#include <sys/asm_linkage.h>
.align 32
ldub [%i1 + %i2], %i0 ! s1[0]
ldub [%i1], %g1 ! s2[0]
sub %i0, 'A', %l0 ! transform for faster uppercase check
sub %g1, 'A', %l1 ! transform for faster uppercase check
cmp %l0, ('Z' - 'A') ! s1[0] uppercase?
bleu,a .noxlate11 ! yes
add %i0, ('a' - 'A'), %i0 ! s1[0] = tolower(s1[0])
.noxlate11:
cmp %l1, ('Z' - 'A') ! s2[0] uppercase?
bleu,a .noxlate12 ! yes
add %g1, ('a' - 'A'), %g1 ! s2[0] = tolower(s2[0])
.noxlate12:
subcc %i0, %g1, %i0 ! tolower(s1[0]) != tolower(s2[0]) ?
bne .done ! yup, done
inc %i1 ! s1++, s2++
addcc %i0, %g1, %i0 ! s1[0] == 0 ?
bz .done ! yup, done, strings equal
cmp %i3, 3 ! s1 aligned now?
bz .s1aligned2 ! yup
sethi %hi(0x01010101), %i5 ! start loading Mycroft's magic2
ldub [%i1 + %i2], %i0 ! s1[2]
ldub [%i1], %g1 ! s2[2]
sub %i0, 'A', %l0 ! transform for faster uppercase check
sub %g1, 'A', %l1 ! transform for faster uppercase check
cmp %l0, ('Z' - 'A') ! s1[2] uppercase?
bleu,a .noxlate31 ! yes
add %i0, ('a' - 'A'), %i0 ! s1[2] = tolower(s1[2])
.noxlate31:
cmp %l1, ('Z' - 'A') ! s2[2] uppercase?
bleu,a .noxlate32 ! yes
add %g1, ('a' - 'A'), %g1 ! s2[2] = tolower(s2[2])
.noxlate32:
subcc %i0, %g1, %i0 ! tolower(s1[2]) != tolower(s2[2]) ?
bne .done ! yup, done
inc %i1 ! s1++, s2++
addcc %i0, %g1, %i0 ! s1[2] == 0 ?
bz .done ! yup, done, strings equal
or %i5, %lo(0x01010101),%i5! finish loading Mycroft's magic2
! enough to have a word aligned desintation
.s1aligned1:
sethi %hi(0x01010101), %i5 ! start loading Mycroft's magic2
.s1aligned3:
or %i5, %lo(0x01010101),%i5! finish loading Mycroft's magic2
.cmp:
.doload:
ret ! return
.word4:
.cmp4:
ret ! return
.done:
ret ! return