memset.s revision b1593d50e783f7d66722dde093752b74ffa95176
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* char *memset(sp, c, n)
*
* Set an array of n chars starting at sp to the character c.
* Return sp.
*
* Fast assembler language version of the following C-program for memset
* which represents the `standard' for the C-library.
*
* void *
* memset(void *sp1, int c, size_t n)
* {
* if (n != 0) {
* char *sp = sp1;
* do {
* *sp++ = (char)c;
* } while (--n != 0);
* }
* return (sp1);
* }
*/
#include <sys/asm_linkage.h>
#ifdef __sparcv9
#define STACK_OFFSET (STACK_BIAS + 0)
#else
#define STACK_OFFSET (STACK_BIAS + 0 + 0)
#endif
#define scratch_offset 0
#define ASI_CACHE_SPARING_PRIMARY 0xf4
#define ICACHE_LINE_SIZE 64
#define FPRS_FEF 0x4
#define PF_FAR 2048
.section ".text"
/*
* Optimizations done:
*
* No stores in delay slot of branch instructions.
* conditional stores where possible
* prefetch before doing stxa
* Bank interleaved writing.
*/
/*
* If 0 bytes to xfer return
*/
continue:
/*
* If the count is multiple of 8 and buffer is aligned to 8
* we don't have to look at fprs
*/
1:
/*
* Do a partial store of %o2 bytes
*/
1:
1:
3:
1:
4:
/*
* if g5 is < 4096 do start_128 only.
*/
6:
.align 64
5:
6:
.exit:
1:
init:
/* Local register usage:
%l3 save %o5 at start of inner loop.
%l5 iteration counter to make buddy loop execute 2 times.
%l6 iteration counter to make inner loop execute 32 times.
%l7 address at far ahead of current %o5 for prefetching destination into L2 cache.
*/
.align 64
/* Section 1 */
/* Each iteration of the inner loop below writes 8 sequential lines. This loop is iterated 4 times,
to move a total of 32 lines, all of which have the same value of PA[9], so we increment the base
address by 1024 bytes in each iteration, which varies PA[10]. */
/* -------- Now increment by 256 + 512 so we don't toggle PA[9] -------- */
/* ------------------------ END OF INNER LOOP -------------------------- */
/****larryalg_end_here*************/
1:
.align 64
5:
6:
1: