2N/A/*
2N/A * CDDL HEADER START
2N/A *
2N/A * The contents of this file are subject to the terms of the
2N/A * Common Development and Distribution License (the "License").
2N/A * You may not use this file except in compliance with the License.
2N/A *
2N/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
2N/A * or http://www.opensolaris.org/os/licensing.
2N/A * See the License for the specific language governing permissions
2N/A * and limitations under the License.
2N/A *
2N/A * When distributing Covered Code, include this CDDL HEADER in each
2N/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
2N/A * If applicable, add the following below this CDDL HEADER, with the
2N/A * fields enclosed by brackets "[]" replaced with your own identifying
2N/A * information: Portions Copyright [yyyy] [name of copyright owner]
2N/A *
2N/A * CDDL HEADER END
2N/A */
2N/A
2N/A/*
2N/A * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
2N/A * Use is subject to license terms.
2N/A */
2N/A
2N/A .file "strncpy.s"
2N/A
2N/A/*
2N/A * strncpy(s1, s2)
2N/A *
2N/A * Copy string s2 to s1, truncating or null-padding to always copy n bytes
2N/A * return s1.
2N/A *
2N/A * Fast assembler language version of the following C-program for strncpy
2N/A * which represents the `standard' for the C-library.
2N/A *
2N/A * char *
2N/A * strncpy(char *s1, const char *s2, size_t n)
2N/A * {
2N/A * char *os1 = s1;
2N/A *
2N/A * n++;
2N/A * while ((--n != 0) && ((*s1++ = *s2++) != '\0'))
2N/A * ;
2N/A * if (n != 0)
2N/A * while (--n != 0)
2N/A * *s1++ = '\0';
2N/A * return (os1);
2N/A * }
2N/A */
2N/A
2N/A#include <sys/asm_linkage.h>
2N/A
2N/A ! strncpy works similarly to strcpy, except that n bytes of s2
2N/A ! are copied to s1. If a null character is reached in s2 yet more
2N/A ! bytes remain to be copied, strncpy will copy null bytes into
2N/A ! the destination string.
2N/A !
2N/A ! This implementation works by first aligning the src ptr and
2N/A ! performing small copies until it is aligned. Then, the string
2N/A ! is copied based upon destination alignment. (byte, half-word,
2N/A ! word, etc.)
2N/A
2N/A ENTRY(strncpy)
2N/A
2N/A .align 32
2N/A subcc %g0, %o2, %o4 ! n = -n
2N/A bz .doneshort ! if n == 0, done
2N/A cmp %o2, 7 ! n < 7 ?
2N/A add %o1, %o2, %o3 ! src = src + n
2N/A blu .shortcpy ! n < 7, use byte-wise copy
2N/A add %o0, %o2, %o2 ! dst = dst + n
2N/A andcc %o1, 3, %o5 ! src word aligned ?
2N/A bz .wordaligned ! yup
2N/A save %sp, -0x40, %sp ! create new register window
2N/A sub %i5, 4, %i5 ! bytes until src aligned
2N/A nop ! align loop on 16-byte boundary
2N/A nop ! align loop on 16-byte boundary
2N/A
2N/A.alignsrc:
2N/A ldub [%i3 + %i4], %i1 ! src[]
2N/A stb %i1, [%i2 + %i4] ! dst[] = src[]
2N/A inccc %i4 ! src++, dst++, n--
2N/A bz .done ! n == 0, done
2N/A tst %i1 ! end of src reached (null byte) ?
2N/A bz,a .bytepad ! yes, at least one byte to pad here
2N/A add %i2, %i4, %l0 ! need single dest pointer for fill
2N/A inccc %i5 ! src aligned now?
2N/A bnz .alignsrc ! no, copy another byte
2N/A .empty
2N/A
2N/A.wordaligned:
2N/A add %i2, %i4, %l0 ! dst
2N/A sethi %hi(0x01010101), %l1 ! Alan Mycroft's magic1
2N/A sub %i2, 4, %i2 ! adjust for dest pre-incr in cpy loops
2N/A or %l1, %lo(0x01010101),%l1! finish loading magic1
2N/A andcc %l0, 3, %g1 ! destination word aligned ?
2N/A bnz .dstnotaligned ! nope
2N/A sll %l1, 7, %i5 ! create Alan Mycroft's magic2
2N/A
2N/A.storeword:
2N/A lduw [%i3 + %i4], %i1 ! src dword
2N/A addcc %i4, 4, %i4 ! n += 4, src += 4, dst += 4
2N/A bcs .lastword ! if counter wraps, last word
2N/A andn %i5, %i1, %g1 ! ~dword & 0x80808080
2N/A sub %i1, %l1, %l0 ! dword - 0x01010101
2N/A andcc %l0, %g1, %g0 ! ((dword - 0x01010101) & ~dword & 0x80808080)
2N/A bz,a .storeword ! no zero byte if magic expression == 0
2N/A stw %i1, [%i2 + %i4] ! store word to dst (address pre-incremented)
2N/A
2N/A ! n has not expired, but src is at the end. we need to push out the
2N/A ! remaining src bytes and then start padding with null bytes
2N/A
2N/A.zerobyte:
2N/A add %i2, %i4, %l0 ! pointer to dest string
2N/A srl %i1, 24, %g1 ! first byte
2N/A stb %g1, [%l0] ! store it
2N/A sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1
2N/A sra %g1, 31, %g1 ! byte == 0 ? -1 : 0
2N/A andn %i1, %g1, %i1 ! if byte == 0, start padding with null bytes
2N/A srl %i1, 16, %g1 ! second byte
2N/A stb %g1, [%l0 + 1] ! store it
2N/A and %g1, 0xff, %g1 ! isolate byte
2N/A sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1
2N/A sra %g1, 31, %g1 ! byte == 0 ? -1 : 0
2N/A andn %i1, %g1, %i1 ! if byte == 0, start padding with null bytes
2N/A srl %i1, 8, %g1 ! third byte
2N/A stb %g1, [%l0 + 2] ! store it
2N/A and %g1, 0xff, %g1 ! isolate byte
2N/A sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1
2N/A sra %g1, 31, %g1 ! byte == 0 ? -1 : 0
2N/A andn %i1, %g1, %i1 ! if byte == 0, start padding with null bytes
2N/A stb %i1, [%l0 + 3] ! store fourth byte
2N/A addcc %i4, 8, %g0 ! number of pad bytes < 8 ?
2N/A bcs .bytepad ! yes, do simple byte wise fill
2N/A add %l0, 4, %l0 ! dst += 4
2N/A andcc %l0, 3, %l1 ! dst offset relative to word boundary
2N/A bz .fillaligned ! dst already word aligned
2N/A
2N/A ! here there is a least one more byte to zero out: otherwise we would
2N/A ! have exited through label .lastword
2N/A
2N/A sub %l1, 4, %l1 ! bytes to align dst to word boundary
2N/A.makealigned:
2N/A stb %g0, [%l0] ! dst[] = 0
2N/A addcc %i4, 1, %i4 ! n--
2N/A bz .done ! n == 0, we are done
2N/A addcc %l1, 1, %l1 ! any more byte needed to align
2N/A bnz .makealigned ! yup, pad another byte
2N/A add %l0, 1, %l0 ! dst++
2N/A nop ! pad to align copy loop below
2N/A
2N/A ! here we know that there at least another 4 bytes to pad, since
2N/A ! we don't get here unless there were >= 8 bytes to pad to begin
2N/A ! with, and we have padded at most 3 bytes suring dst aligning
2N/A
2N/A.fillaligned:
2N/A add %i4, 3, %i2 ! round up to next word boundary
2N/A and %i2, -4, %l1 ! pointer to next word boundary
2N/A and %i2, 4, %i2 ! word count odd ? 4 : 0
2N/A stw %g0, [%l0] ! store first word
2N/A addcc %l1, %i2, %l1 ! dword count == 1 ?
2N/A add %i4, %i2, %i4 ! if word count odd, n -= 4
2N/A bz .bytepad ! if word count == 1, pad bytes left
2N/A add %l0, %i2, %l0 ! bump dst if word count odd
2N/A
2N/A.fillword:
2N/A addcc %l1, 8, %l1 ! count -= 8
2N/A stw %g0, [%l0] ! dst[n] = 0
2N/A stw %g0, [%l0 + 4] ! dst[n+4] = 0
2N/A add %l0, 8, %l0 ! dst += 8
2N/A bcc .fillword ! fill words until count == 0
2N/A addcc %i4, 8, %i4 ! n -= 8
2N/A bz .done ! if n == 0, we are done
2N/A .empty
2N/A
2N/A.bytepad:
2N/A and %i4, 1, %i2 ! byte count odd ? 1 : 0
2N/A stb %g0, [%l0] ! store first byte
2N/A addcc %i4, %i2, %i4 ! byte count == 1 ?
2N/A bz .done ! yup, we are done
2N/A add %l0, %i2, %l0 ! bump pointer if odd
2N/A
2N/A.fillbyte:
2N/A addcc %i4, 2, %i4 ! n -= 2
2N/A stb %g0, [%l0] ! dst[n] = 0
2N/A stb %g0, [%l0 + 1] ! dst[n+1] = 0
2N/A bnz .fillbyte ! fill until n == 0
2N/A add %l0, 2, %l0 ! dst += 2
2N/A
2N/A.done:
2N/A ret ! done
2N/A restore %i0, %g0, %o0 ! restore reg window, return dst
2N/A
2N/A ! this is the last word. It may contain null bytes. store bytes
2N/A ! until n == 0. if null byte encountered, continue
2N/A
2N/A.lastword:
2N/A sub %i4, 4, %i4 ! undo counter pre-increment
2N/A add %i2, 4, %i2 ! adjust dst for counter un-bumping
2N/A
2N/A srl %i1, 24, %g1 ! first byte
2N/A stb %g1, [%i2 + %i4] ! store it
2N/A inccc %i4 ! n--
2N/A bz .done ! if n == 0, we're done
2N/A sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1
2N/A sra %g1, 31, %g1 ! byte == 0 ? -1 : 0
2N/A andn %i1, %g1, %i1 ! if byte == 0, start padding with null
2N/A srl %i1, 16, %g1 ! second byte
2N/A stb %g1, [%i2 + %i4] ! store it
2N/A inccc %i4 ! n--
2N/A bz .done ! if n == 0, we're done
2N/A and %g1, 0xff, %g1 ! isolate byte
2N/A sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1
2N/A sra %g1, 31, %g1 ! byte == 0 ? -1 : 0
2N/A andn %i1, %g1, %i1 ! if byte == 0, start padding with null
2N/A srl %i1, 8, %g1 ! third byte
2N/A stb %g1, [%i2 + %i4] ! store it
2N/A inccc %i4 ! n--
2N/A bz .done ! if n == 0, we're done
2N/A and %g1, 0xff, %g1 ! isolate byte
2N/A sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1
2N/A sra %g1, 31, %g1 ! byte == 0 ? -1 : 0
2N/A andn %i1, %g1, %i1 ! if byte == 0, start padding with null
2N/A ba .done ! here n must be zero, we are done
2N/A stb %i1, [%i2 + %i4] ! store fourth byte
2N/A
2N/A.dstnotaligned:
2N/A cmp %g1, 2 ! dst half word aligned?
2N/A be .storehalfword2 ! yup, store half word at a time
2N/A .empty
2N/A.storebyte:
2N/A lduw [%i3 + %i4], %i1 ! x = src[]
2N/A addcc %i4, 4, %i4 ! src += 4, dst += 4, n -= 4
2N/A bcs .lastword ! if counter wraps, last word
2N/A andn %i5, %i1, %g1 ! ~x & 0x80808080
2N/A sub %i1, %l1, %l0 ! x - 0x01010101
2N/A andcc %l0, %g1, %g0 ! ((x - 0x01010101) & ~x & 0x80808080)
2N/A bnz .zerobyte ! end of src found, may need to pad
2N/A add %i2, %i4, %l0 ! dst (in pointer form)
2N/A srl %i1, 24, %g1 ! %g1<7:0> = 1st byte; half-word aligned now
2N/A stb %g1, [%l0] ! store first byte
2N/A srl %i1, 8, %g1 ! %g1<15:0> = bytes 2, 3
2N/A sth %g1, [%l0 + 1] ! store bytes 2, 3
2N/A ba .storebyte ! next word
2N/A stb %i1, [%l0 + 3] ! store fourth byte
2N/A nop
2N/A nop
2N/A
2N/A.storehalfword:
2N/A lduw [%i3 + %i4], %i1 ! x = src[]
2N/A.storehalfword2:
2N/A addcc %i4, 4, %i4 ! src += 4, dst += 4, n -= 4
2N/A bcs .lastword ! if counter wraps, last word
2N/A andn %i5, %i1, %g1 ! ~x & 0x80808080
2N/A sub %i1, %l1, %l0 ! x - 0x01010101
2N/A andcc %l0, %g1, %g0 ! ((x -0x01010101) & ~x & 0x8080808080)
2N/A bnz .zerobyte ! x has zero byte, handle end cases
2N/A add %i2, %i4, %l0 ! dst (in pointer form)
2N/A srl %i1, 16, %g1 ! %g1<15:0> = bytes 1, 2
2N/A sth %g1, [%l0] ! store bytes 1, 2
2N/A ba .storehalfword ! next dword
2N/A sth %i1, [%l0 + 2] ! store bytes 3, 4
2N/A
2N/A.shortcpy:
2N/A ldub [%o3 + %o4], %o5 ! src[]
2N/A stb %o5, [%o2 + %o4] ! dst[] = src[]
2N/A inccc %o4 ! src++, dst++, n--
2N/A bz .doneshort ! if n == 0, done
2N/A tst %o5 ! src[] == 0 ?
2N/A bnz,a .shortcpy ! nope, next byte
2N/A nop ! empty delay slot
2N/A
2N/A.padbyte:
2N/A stb %g0, [%o2 + %o4] ! dst[] = 0
2N/A.padbyte2:
2N/A addcc %o4, 1, %o4 ! dst++, n--
2N/A bnz,a .padbyte2 ! if n != 0, next byte
2N/A stb %g0, [%o2 + %o4] ! dst[] = 0
2N/A nop ! align label below to 16-byte boundary
2N/A
2N/A.doneshort:
2N/A retl ! return from leaf
2N/A nop ! empty delay slot
2N/A SET_SIZE(strncpy)