25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * CDDL HEADER START
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * The contents of this file are subject to the terms of the
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Common Development and Distribution License (the "License").
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * You may not use this file except in compliance with the License.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * or http://www.opensolaris.org/os/licensing.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * See the License for the specific language governing permissions
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * and limitations under the License.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * When distributing Covered Code, include this CDDL HEADER in each
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * If applicable, add the following below this CDDL HEADER, with the
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * fields enclosed by brackets "[]" replaced with your own identifying
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * information: Portions Copyright [yyyy] [name of copyright owner]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * CDDL HEADER END
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
e64c6c3f1a2cffb126880e90fb7670805896f505Michael Bergknoff * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Use is subject to license terms.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#include <sys/param.h>
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#include <sys/errno.h>
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#include <sys/asm_linkage.h>
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#include <sys/vtrace.h>
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#include <sys/machthread.h>
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#include <sys/clock.h>
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#include <sys/asi.h>
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#include <sys/fsr.h>
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#include <sys/privregs.h>
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#if !defined(lint)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#include "assym.h"
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Pseudo-code to aid in understanding the control flow of the
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * bcopy/copyin/copyout routines.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * On entry:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! Determine whether to use the FP register version
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! or the leaf routine version depending on size
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! of copy and flags. Set up error handling accordingly.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! The transition point depends on whether the src and
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! dst addresses can be aligned to long word, word,
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! half word, or byte boundaries.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! WARNING: <Register usage convention>
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! For FP version, %l6 holds previous error handling and
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! a flag: TRAMP_FLAG (low bits)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! for leaf routine version, %o4 holds those values.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! So either %l6 or %o4 is reserved and not available for
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! any other use.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (length <= VIS_COPY_THRESHOLD) ! start with a quick test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to small_copy; ! to speed short copies
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! src, dst long word alignable
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (hw_copy_limit_8 == 0) ! hw_copy disabled
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to small_copy;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (length <= hw_copy_limit_8)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to small_copy;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to FPBLK_copy;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (src,dst not alignable) {
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (hw_copy_limit_1 == 0) ! hw_copy disabled
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to small_copy;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (length <= hw_copy_limit_1)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to small_copy;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to FPBLK_copy;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (src,dst halfword alignable) {
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (hw_copy_limit_2 == 0) ! hw_copy disabled
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to small_copy;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (length <= hw_copy_limit_2)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to small_copy;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to FPBLK_copy;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (src,dst word alignable) {
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (hw_copy_limit_4 == 0) ! hw_copy disabled
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to small_copy;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (length <= hw_copy_limit_4)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to small_copy;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to FPBLK_copy;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * small_copy:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Setup_leaf_rtn_error_handler; ! diffs for each entry point
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (count <= 3) ! fast path for tiny copies
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to sm_left; ! special finish up code
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * else
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (count > CHKSIZE) ! medium sized copies
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to sm_med ! tuned by alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if(src&dst not both word aligned) {
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * sm_movebytes:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * move byte by byte in 4-way unrolled loop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * fall into sm_left;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * sm_left:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * move 0-3 bytes byte at a time as needed.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * restore error handler and exit.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * } else { ! src&dst are word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * check for at least 8 bytes left,
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * move word at a time, unrolled by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * when fewer than 8 bytes left,
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * sm_half: move half word at a time while 2 or more bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * sm_byte: move final byte if necessary
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * sm_exit:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * restore error handler and exit.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! Medium length cases with at least CHKSIZE bytes available
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! method: line up src and dst as best possible, then
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! move data in 4-way unrolled loops.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * sm_med:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if(src&dst unalignable)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to sm_movebytes
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if(src&dst halfword alignable)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to sm_movehalf
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if(src&dst word alignable)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * go to sm_moveword
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! fall into long word movement
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * move bytes until src is word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if not long word aligned, move a word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * move long words in 4-way unrolled loop until < 32 bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * move long words in 1-way unrolled loop until < 8 bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if zero bytes left, goto sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if one byte left, go to sm_byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * else go to sm_half
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * sm_moveword:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * move bytes until src is word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * move words in 4-way unrolled loop until < 16 bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * move words in 1-way unrolled loop until < 4 bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if zero bytes left, goto sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if one byte left, go to sm_byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * else go to sm_half
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * sm_movehalf:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * move a byte if needed to align src on halfword
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * move halfwords in 4-way unrolled loop until < 8 bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if zero bytes left, goto sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if one byte left, go to sm_byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * else go to sm_half
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * FPBLK_copy:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * %l6 = curthread->t_lofault;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (%l6 != NULL) {
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * curthread->t_lofault = .copyerr;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * caller_error_handler = TRUE ! %l6 |= 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! for FPU testing we must not migrate cpus
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (curthread->t_lwp == NULL) {
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! Kernel threads do not have pcb's in which to store
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! the floating point state, so disallow preemption during
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! the copy. This also prevents cpu migration.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * kpreempt_disable(curthread);
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * } else {
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * thread_nomigrate();
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * old_fprs = %fprs;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * old_gsr = %gsr;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (%fprs.fef) {
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * %fprs.fef = 1;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * save current fpregs on stack using blockstore
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * } else {
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * %fprs.fef = 1;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * do_blockcopy_here;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * In lofault handler:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * curthread->t_lofault = .copyerr2;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Continue on with the normal exit handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * On normal exit:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * %gsr = old_gsr;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (old_fprs & FPRS_FEF)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * restore fpregs from stack using blockload
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * else
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * zero fpregs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * %fprs = old_fprs;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * curthread->t_lofault = (%l6 & ~3);
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! following test omitted from copyin/copyout as they
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ! will always have a current thread
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (curthread->t_lwp == NULL)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * kpreempt_enable(curthread);
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * else
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * thread_allowmigrate();
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * return (0)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * In second lofault handler (.copyerr2):
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * We've tried to restore fp state from the stack and failed. To
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * prevent from returning with a corrupted fp state, we will panic.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Comments about optimization choices
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * The initial optimization decision in this code is to determine
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * whether to use the FP registers for a copy or not. If we don't
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * use the FP registers, we can execute the copy as a leaf routine,
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * saving a register save and restore. Also, less elaborate setup
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * is required, allowing short copies to be completed more quickly.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * For longer copies, especially unaligned ones (where the src and
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * dst do not align to allow simple ldx,stx operation), the FP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * registers allow much faster copy operations.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * The estimated extra cost of the FP path will vary depending on
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * src/dst alignment, dst offset from the next 64 byte FPblock store
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * boundary, remaining src data after the last full dst cache line is
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * moved whether the FP registers need to be saved, and some other
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * minor issues. The average additional overhead is estimated to be
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * 400 clocks. Since each non-repeated/predicted tst and branch costs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * around 10 clocks, elaborate calculation would slow down to all
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * longer copies and only benefit a small portion of medium sized
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * copies. Rather than incur such cost, we chose fixed transition
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * points for each of the alignment choices.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * For the inner loop, here is a comparison of the per cache line
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * costs for each alignment when src&dst are in cache:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * byte aligned: 108 clocks slower for non-FPBLK
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * half aligned: 44 clocks slower for non-FPBLK
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * word aligned: 12 clocks slower for non-FPBLK
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * long aligned: 4 clocks >>faster<< for non-FPBLK
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * The long aligned loop runs faster because it does no prefetching.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * That wins if the data is not in cache or there is too little
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * data to gain much benefit from prefetching. But when there
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * is more data and that data is not in cache, failing to prefetch
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * can run much slower. In addition, there is a 2 Kbyte store queue
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * which will cause the non-FPBLK inner loop to slow for larger copies.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * The exact tradeoff is strongly load and application dependent, with
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * increasing risk of a customer visible performance regression if the
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * non-FPBLK code is used for larger copies. Studies of synthetic in-cache
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * vs out-of-cache copy tests in user space suggest 1024 bytes as a safe
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * upper limit for the non-FPBLK code. To minimize performance regression
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * risk while still gaining the primary benefits of the improvements to
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * the non-FPBLK code, we set an upper bound of 1024 bytes for the various
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * hw_copy_limit_*. Later experimental studies using different values
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * of hw_copy_limit_* can be used to make further adjustments if
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * appropriate.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * hw_copy_limit_1 = src and dst are byte aligned but not halfword aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * hw_copy_limit_2 = src and dst are halfword aligned but not word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * hw_copy_limit_4 = src and dst are word aligned but not longword aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * hw_copy_limit_8 = src and dst are longword aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * To say that src and dst are word aligned means that after
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * some initial alignment activity of moving 0 to 3 bytes,
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * both the src and dst will be on word boundaries so that
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * word loads and stores may be used.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Default values at May,2005 are:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * hw_copy_limit_1 = 256
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * hw_copy_limit_2 = 512
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * hw_copy_limit_4 = 1024
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * hw_copy_limit_8 = 1024 (or 1536 on some systems)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * If hw_copy_limit_? is set to zero, then use of FPBLK copy is
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * disabled for that alignment choice.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * If hw_copy_limit_? is set to a value between 1 and VIS_COPY_THRESHOLD (256)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * the value of VIS_COPY_THRESHOLD is used.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * It is not envisioned that hw_copy_limit_? will be changed in the field
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * It is provided to allow for disabling FPBLK copies and to allow
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * easy testing of alternate values on future HW implementations
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * that might have different cache sizes, clock rates or instruction
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * timing rules.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Our first test for FPBLK copies vs non-FPBLK copies checks a minimum
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * threshold to speedup all shorter copies (less than 256). That
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * saves an alignment test, memory reference, and enabling test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * for all short copies, or an estimated 24 clocks.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * The order in which these limits are checked does matter since each
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * non-predicted tst and branch costs around 10 clocks.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * If src and dst are randomly selected addresses,
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * 4 of 8 will not be alignable.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * 2 of 8 will be half word alignable.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * 1 of 8 will be word alignable.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * 1 of 8 will be long word alignable.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * But, tests on running kernels show that src and dst to copy code
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * are typically not on random alignments. Structure copies and
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * copies of larger data sizes are often on long word boundaries.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * So we test the long word alignment case first, then
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * the byte alignment, then halfword, then word alignment.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Several times, tests for length are made to split the code
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * into subcases. These tests often allow later tests to be
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * avoided. For example, within the non-FPBLK copy, we first
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * check for tiny copies of 3 bytes or less. That allows us
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * to use a 4-way unrolled loop for the general byte copy case
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * without a test on loop entry.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * We subdivide the non-FPBLK case further into CHKSIZE bytes and less
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * vs longer cases. For the really short case, we don't attempt
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * align src and dst. We try to minimize special case tests in
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * the shortest loops as each test adds a significant percentage
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * to the total time.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * For the medium sized cases, we allow ourselves to adjust the
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * src and dst alignment and provide special cases for each of
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * the four adjusted alignment cases. The CHKSIZE that was used
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * to decide between short and medium size was chosen to be 39
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * as that allows for the worst case of 7 bytes of alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * shift and 4 times 8 bytes for the first long word unrolling.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * That knowledge saves an initial test for length on entry into
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * the medium cases. If the general loop unrolling factor were
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * to be increases, this number would also need to be adjusted.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * For all cases in the non-FPBLK code where it is known that at
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * least 4 chunks of data are available for movement, the
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * loop is unrolled by four. This 4-way loop runs in 8 clocks
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * or 2 clocks per data element.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Instruction alignment is forced by used of .align 16 directives
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * and nops which are not executed in the code. This
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * combination of operations shifts the alignment of following
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * loops to insure that loops are aligned so that their instructions
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * fall within the minimum number of 4 instruction fetch groups.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * If instructions are inserted or removed between the .align
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * instruction and the unrolled loops, then the alignment needs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * to be readjusted. Misaligned loops can add a clock per loop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * iteration to the loop timing.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * In a few cases, code is duplicated to avoid a branch. Since
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * a non-predicted tst and branch takes 10 clocks, this savings
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * is judged an appropriate time-space tradeoff.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Within the FPBLK-code, the prefetch method in the inner
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * loop needs to be explained as it is not standard. Two
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * prefetches are issued for each cache line instead of one.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * The primary one is at the maximum reach of 8 cache lines.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Most of the time, that maximum prefetch reach gives the
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * cache line more time to reach the processor for systems with
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * higher processor clocks. But, sometimes memory interference
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * can cause that prefetch to be dropped. Putting a second
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * prefetch at a reach of 5 cache lines catches the drops
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * three iterations later and shows a measured improvement
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * in performance over any similar loop with a single prefetch.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * The prefetches are placed in the loop so they overlap with
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * non-memory instructions, so that there is no extra cost
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * when the data is already in-cache.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Notes on preserving existing fp state and on membars.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * When a copyOP decides to use fp we may have to preserve existing
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * floating point state. It is not the caller's state that we need to
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * preserve - the rest of the kernel does not use fp and, anyway, fp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * registers are volatile across a call. Some examples:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * - userland has fp state and is interrupted (device interrupt
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * or trap) and within the interrupt/trap handling we use
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * bcopy()
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * - another (higher level) interrupt or trap handler uses bcopy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * while a bcopy from an earlier interrupt is still active
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * - an asynchronous error trap occurs while fp state exists (in
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * userland or in kernel copy) and the tl0 component of the handling
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * uses bcopy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * - a user process with fp state incurs a copy-on-write fault and
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * hwblkpagecopy always uses fp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * We therefore need a per-call place in which to preserve fp state -
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * using our stack is ideal (and since fp copy cannot be leaf optimized
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * because of calls it makes, this is no hardship).
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * When we have finished fp copy (with it's repeated block stores)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * we must membar #Sync so that our block stores may complete before
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * we either restore the original fp state into the fp registers or
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * return to a caller which may initiate other fp operations that could
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * modify the fp regs we used before the block stores complete.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Synchronous faults (eg, unresolvable DMMU miss) that occur while
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * t_lofault is not NULL will not panic but will instead trampoline
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * to the registered lofault handler. There is no need for any
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * membars for these - eg, our store to t_lofault will always be visible to
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ourselves and it is our cpu which will take any trap.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Asynchronous faults (eg, uncorrectable ECC error from memory) that occur
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * while t_lofault is not NULL will also not panic. Since we're copying
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * to or from userland the extent of the damage is known - the destination
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * buffer is incomplete. So trap handlers will trampoline to the lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * handler in this case which should take some form of error action to
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * avoid using the incomplete buffer. The trap handler also flags the
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * fault so that later return-from-trap handling (for the trap that brought
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * this thread into the kernel in the first place) can notify the process
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * and reboot the system (or restart the service with Greenline/Contracts).
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Asynchronous faults (eg, uncorrectable ECC error from memory) can
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * result in deferred error traps - the trap is taken sometime after
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * the event and the trap PC may not be the PC of the faulting access.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Delivery of such pending traps can be forced by a membar #Sync, acting
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * as an "error barrier" in this role. To accurately apply the user/kernel
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * separation described in the preceding paragraph we must force delivery
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * of deferred traps affecting kernel state before we install a lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * handler (if we interpose a new lofault handler on an existing one there
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * is no need to repeat this), and we must force delivery of deferred
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * errors affecting the lofault-protected region before we clear t_lofault.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Failure to do so results in lost kernel state being interpreted as
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * affecting a copyin/copyout only, or of an error that really only
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * affects copy data being interpreted as losing kernel state.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Since the copy operations may preserve and later restore floating
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * point state that does not belong to the caller (see examples above),
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * we must be careful in how we do this in order to prevent corruption
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * of another program.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * To make sure that floating point state is always saved and restored
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * correctly, the following "big rules" must be followed when the floating
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * point registers will be used:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * 1. %l6 always holds the caller's lofault handler. Also in this register,
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Bit 1 (FPUSED_FLAG) indicates that the floating point registers are in
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * use. Bit 2 (TRAMP_FLAG) indicates that the call was to bcopy, and a
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * lofault handler was set coming in.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * 2. The FPUSED flag indicates that all FP state has been successfully stored
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * on the stack. It should not be set until this save has been completed.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * 3. The FPUSED flag should not be cleared on exit until all FP state has
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * been restored from the stack. If an error occurs while restoring
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * data from the stack, the error handler can check this flag to see if
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * a restore is necessary.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * 4. Code run under the new lofault handler must be kept to a minimum. In
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * particular, any calls to FP_ALLOWMIGRATE, which could result in a call
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * to kpreempt(), should not be made until after the lofault handler has
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * been restored.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * VIS_COPY_THRESHOLD indicates the minimum number of bytes needed
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * to "break even" using FP/VIS-accelerated memory operations.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * The FPBLK code assumes a minimum number of bytes are available
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * to be moved on entry. Check that code carefully before
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * reducing VIS_COPY_THRESHOLD below 256.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * This shadows sys/machsystm.h which can't be included due to the lack of
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * _ASM guards in include files it references. Change it here, change it there.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define VIS_COPY_THRESHOLD 256
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * TEST for very short copies
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Be aware that the maximum unroll for the short unaligned case
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * is SHORTCOPY+1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define SHORTCOPY 3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define CHKSIZE 39
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Indicates that we're to trampoline to the error handler.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Entry points bcopy, copyin_noerr, and copyout_noerr use this flag.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * kcopy, copyout, xcopyout, copyin, and xcopyin do not set this flag.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define FPUSED_FLAG 1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define TRAMP_FLAG 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define MASK_FLAGS 3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Number of outstanding prefetches.
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm * first prefetch moves data from L2 to L1 (n_reads)
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm * second prefetch moves data from memory to L2 (one_read)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm#define OLYMPUS_C_PREFETCH 24
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm#define OLYMPUS_C_2ND_PREFETCH 12
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define VIS_BLOCKSIZE 64
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Size of stack frame in order to accomodate a 64-byte aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * floating-point register save area and 2 64-bit temp locations.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * All copy functions use two quadrants of fp registers; to assure a
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * block-aligned two block buffer in which to save we must reserve
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * three blocks on stack. Not all functions preserve %pfrs on stack
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * or need to preserve %gsr but we use HWCOPYFRAMESIZE for all.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * _______________________________________ <-- %fp + STACK_BIAS
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * | We may need to preserve 2 quadrants |
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * | of fp regs, but since we do so with |
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * | BST/BLD we need room in which to |
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * | align to VIS_BLOCKSIZE bytes. So |
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * | this area is 3 * VIS_BLOCKSIZE. | <-- - SAVED_FPREGS_OFFSET
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * |-------------------------------------|
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * | 8 bytes to save %fprs | <-- - SAVED_FPRS_OFFSET
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * |-------------------------------------|
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * | 8 bytes to save %gsr | <-- - SAVED_GSR_OFFSET
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * ---------------------------------------
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define HWCOPYFRAMESIZE ((VIS_BLOCKSIZE * (2 + 1)) + (2 * 8))
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define SAVED_FPREGS_OFFSET (VIS_BLOCKSIZE * 3)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define SAVED_FPREGS_ADJUST ((VIS_BLOCKSIZE * 2) - 1)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define SAVED_FPRS_OFFSET (SAVED_FPREGS_OFFSET + 8)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define SAVED_GSR_OFFSET (SAVED_FPRS_OFFSET + 8)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Common macros used by the various versions of the block copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * routines in this file.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * In FP copies if we do not have preserved data to restore over
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * the fp regs we used then we must zero those regs to avoid
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * exposing portions of the data to later threads (data security).
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Copy functions use either quadrants 1 and 3 or 2 and 4.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * FZEROQ1Q3: Zero quadrants 1 and 3, ie %f0 - %f15 and %f32 - %f47
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * FZEROQ2Q4: Zero quadrants 2 and 4, ie %f16 - %f31 and %f48 - %f63
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * The instructions below are quicker than repeated fzero instructions
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * since they can dispatch down two fp pipelines.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define FZEROQ1Q3 \
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fzero %f0 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f2 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f4 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f6 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f8 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f10 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f12 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f14 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f32 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f34 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f36 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f38 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f40 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f42 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f44 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f46
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define FZEROQ2Q4 \
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fzero %f16 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f18 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f20 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f22 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f24 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f26 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f28 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f30 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f48 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f50 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f52 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f54 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f56 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f58 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f60 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f62
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Macros to save and restore quadrants 1 and 3 or 2 and 4 to/from the stack.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Used to save and restore in-use fp registers when we want to use FP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * and find fp already in use and copy size still large enough to justify
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * the additional overhead of this save and restore.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * A membar #Sync is needed before save to sync fp ops initiated before
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * the call to the copy function (by whoever has fp in use); for example
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * an earlier block load to the quadrant we are about to save may still be
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * "in flight". A membar #Sync is required at the end of the save to
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * sync our block store (the copy code is about to begin ldd's to the
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * first quadrant).
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Similarly: a membar #Sync before restore allows the block stores of
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * the copy operation to complete before we fill the quadrants with their
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * original data, and a membar #Sync after restore lets the block loads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * of the restore complete before we return to whoever has the fp regs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * in use. To avoid repeated membar #Sync we make it the responsibility
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * of the copy code to membar #Sync immediately after copy is complete
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * and before using the BLD_*_FROMSTACK macro.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#if !defined(lint)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define BST_FPQ1Q3_TOSTACK(tmp1) \
25cf1a301a396c38e8adf52c15f537b80d2483f7jl /* membar #Sync */ ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f0, [tmp1]ASI_BLK_P ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add tmp1, VIS_BLOCKSIZE, tmp1 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f32, [tmp1]ASI_BLK_P ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define BLD_FPQ1Q3_FROMSTACK(tmp1) \
25cf1a301a396c38e8adf52c15f537b80d2483f7jl /* membar #Sync - provided at copy completion */ ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [tmp1]ASI_BLK_P, %f0 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add tmp1, VIS_BLOCKSIZE, tmp1 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [tmp1]ASI_BLK_P, %f32 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define BST_FPQ2Q4_TOSTACK(tmp1) \
25cf1a301a396c38e8adf52c15f537b80d2483f7jl /* membar #Sync */ ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f16, [tmp1]ASI_BLK_P ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add tmp1, VIS_BLOCKSIZE, tmp1 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f48, [tmp1]ASI_BLK_P ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define BLD_FPQ2Q4_FROMSTACK(tmp1) \
25cf1a301a396c38e8adf52c15f537b80d2483f7jl /* membar #Sync - provided at copy completion */ ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [tmp1]ASI_BLK_P, %f16 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add tmp1, VIS_BLOCKSIZE, tmp1 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [tmp1]ASI_BLK_P, %f48 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * FP_NOMIGRATE and FP_ALLOWMIGRATE. Prevent migration (or, stronger,
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * prevent preemption if there is no t_lwp to save FP state to on context
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * switch) before commencing a FP copy, and reallow it on completion or
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * in error trampoline paths when we were using FP copy.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Both macros may call other functions, so be aware that all outputs are
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * forfeit after using these macros. For this reason we do not pass registers
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * to use - we just use any outputs we want.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Pseudo code:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * FP_NOMIGRATE:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (curthread->t_lwp) {
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * thread_nomigrate();
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * } else {
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * kpreempt_disable();
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * FP_ALLOWMIGRATE:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if (curthread->t_lwp) {
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * thread_allowmigrate();
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * } else {
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * kpreempt_enable();
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define FP_NOMIGRATE(label1, label2) \
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_LWP], %o0 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl brz,a,pn %o0, label1/**/f ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldsb [THREAD_REG + T_PREEMPT], %o1 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl call thread_nomigrate ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba label2/**/f ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jllabel1: ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o1, [THREAD_REG + T_PREEMPT] ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jllabel2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define FP_ALLOWMIGRATE(label1, label2) \
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_LWP], %o0 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl brz,a,pn %o0, label1/**/f ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldsb [THREAD_REG + T_PREEMPT], %o1 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl call thread_allowmigrate ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba label2/**/f ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jllabel1: ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl dec %o1 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl brnz,pn %o1, label2/**/f ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o1, [THREAD_REG + T_PREEMPT] ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_CPU], %o0 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 + CPU_KPRUNRUN], %o0 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl brz,pt %o0, label2/**/f ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl call kpreempt ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jl rdpr %pil, %o0 ;\
25cf1a301a396c38e8adf52c15f537b80d2483f7jllabel2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Copy a block of storage, returning an error code if `from' or
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * `to' takes a kernel pagefault which cannot be resolved.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Returns errno value on pagefault error, 0 if all ok
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#if defined(lint)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/* ARGSUSED */
25cf1a301a396c38e8adf52c15f537b80d2483f7jlint
25cf1a301a396c38e8adf52c15f537b80d2483f7jlkcopy(const void *from, void *to, size_t count)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl{ return(0); }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#else /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .seg ".text"
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(kcopy)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .kcopy_small ! go to larger cases
25cf1a301a396c38e8adf52c15f537b80d2483f7jl xor %o0, %o1, %o3 ! are src, dst alignable?
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 7, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .kcopy_8 ! check for longword alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 1, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .kcopy_2 ! check for half-word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_1)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .kcopy_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .kcopy_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .kcopy_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.kcopy_2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .kcopy_4 ! check for word alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_2)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .kcopy_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .kcopy_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .kcopy_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.kcopy_4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! already checked longword, must be word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_4)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .kcopy_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .kcopy_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .kcopy_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.kcopy_8:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_8)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .kcopy_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .kcopy_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .kcopy_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.kcopy_small:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(.sm_copyerr), %o5 ! sm_copyerr is lofault value
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %o5, %lo(.sm_copyerr), %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_LOFAULT], %o4 ! save existing handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .sm_do_copy ! common code
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o5, [THREAD_REG + T_LOFAULT] ! set t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.kcopy_more:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(.copyerr), %l7 ! copyerr is lofault value
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %l7, %lo(.copyerr), %l7
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_LOFAULT], %l6 ! save existing handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .do_copy ! common code
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * We got here because of a fault during bcopy_more, called from kcopy or bcopy.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Errno value is in %g1. bcopy_more uses fp quadrants 1 and 3.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyerr:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl set .copyerr2, %l0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %l0, [THREAD_REG + T_LOFAULT] ! set t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst FPUSED_FLAG, %l6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz %ncc, 1f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl and %l6, TRAMP_FLAG, %l0 ! copy trampoline flag to %l0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %o2, 0, %gsr
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst FPRS_FEF, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %icc, 4f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl BLD_FPQ1Q3_FROMSTACK(%o2)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, 1f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %o3, 0, %fprs ! restore fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl FZEROQ1Q3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %o3, 0, %fprs ! restore fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! Need to cater for the different expectations of kcopy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! and bcopy. kcopy will *always* set a t_lofault handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! If it fires, we're expected to just return the error code
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! and *not* to invoke any existing error handler. As far as
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! bcopy is concerned, we only set t_lofault if there was an
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! existing lofault handler. In that case we're expected to
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! invoke the previously existing handler after resetting the
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! t_lofault value.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl1:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andn %l6, MASK_FLAGS, %l6 ! turn trampoline flag off
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl FP_ALLOWMIGRATE(5, 6)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst TRAMP_FLAG, %l0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pn %ncc, 3f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ret
25cf1a301a396c38e8adf52c15f537b80d2483f7jl restore %g1, 0, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl3:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! We're here via bcopy. There *must* have been an error handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! in place otherwise we would have died a nasty death already.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl jmp %l6 ! goto real handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl restore %g0, 0, %o0 ! dispose of copy window
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * We got here because of a fault in .copyerr. We can't safely restore fp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * state, so we panic.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jlfp_panic_msg:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .asciz "Unable to restore fp state after copy operation"
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyerr2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl set fp_panic_msg, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl call panic
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * We got here because of a fault during a small kcopy or bcopy.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * No floating point registers are used by the small copies.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Errno value is in %g1.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.sm_copyerr:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl1:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst TRAMP_FLAG, %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andn %o4, TRAMP_FLAG, %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pn %ncc, 3f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %g1, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl3:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl jmp %o4 ! goto real handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %g0, %o0 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(kcopy)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Copy a block of storage - must not overlap (from + len <= to).
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Registers: l6 - saved t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * (for short copies, o4 - saved t_lofault)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Copy a page of memory.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Assumes double word alignment and a count >= 256.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#if defined(lint)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/* ARGSUSED */
25cf1a301a396c38e8adf52c15f537b80d2483f7jlvoid
25cf1a301a396c38e8adf52c15f537b80d2483f7jlbcopy(const void *from, void *to, size_t count)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl{}
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#else /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(bcopy)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .bcopy_small ! go to larger cases
25cf1a301a396c38e8adf52c15f537b80d2483f7jl xor %o0, %o1, %o3 ! are src, dst alignable?
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 7, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bcopy_8 ! check for longword alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 1, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bcopy_2 ! check for half-word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_1)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .bcopy_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .bcopy_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .bcopy_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bcopy_2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bcopy_4 ! check for word alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_2)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .bcopy_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .bcopy_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .bcopy_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bcopy_4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! already checked longword, must be word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_4)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .bcopy_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .bcopy_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .bcopy_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bcopy_8:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_8)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .bcopy_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .bcopy_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .bcopy_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bcopy_small:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_LOFAULT], %o4 ! save t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %icc, .sm_do_copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(.sm_copyerr), %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %o5, %lo(.sm_copyerr), %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o5, [THREAD_REG + T_LOFAULT] ! install new vector
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %o4, TRAMP_FLAG, %o4 ! error should trampoline
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.sm_do_copy:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, SHORTCOPY ! check for really short case
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .bc_sm_left !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, CHKSIZE ! check for medium length cases
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu,pn %ncc, .bc_med !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %o0, %o1, %o3 ! prepare alignment check
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc %o3, 0x3, %g0 ! test for alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_sm_word ! branch to word aligned case
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_sm_movebytes:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, 3, %o2 ! adjust count to allow cc zero test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_sm_notalign4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0], %o3 ! read byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1] ! write byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 4, %o2 ! reduce count by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 + 1], %o3 ! repeat for a total of 4 bytes
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 4, %o0 ! advance SRC by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1 + 1]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 - 2], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 4, %o1 ! advance DST by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1 - 2]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 - 1], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .bc_sm_notalign4 ! loop til 3 or fewer bytes remain
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1 - 1]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o2, 3, %o2 ! restore count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_sm_left:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_sm_exit ! check for zero length
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2 ! reduce count for cc test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0], %o3 ! move one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 + 1], %o3 ! move another byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2 ! check for more
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1 + 1]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 + 2], %o3 ! move final byte
e64c6c3f1a2cffb126880e90fb7670805896f505Michael Bergknoff ba,pt %ncc, .bc_sm_exit
e64c6c3f1a2cffb126880e90fb7670805896f505Michael Bergknoff stb %o3, [%o1 + 2]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop ! instruction alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! see discussion at start of file
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_sm_words:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0], %o3 ! read word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_sm_wordx:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 8, %o2 ! update count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1] ! write word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 8, %o0 ! update SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0 - 4], %o3 ! read word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 8, %o1 ! update DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .bc_sm_words ! loop til done
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1 - 4] ! write word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 7, %o2 ! restore count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_sm_byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_sm_half:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 2, %o2 ! reduce count by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 2, %o0 ! advance SRC by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduh [%o0 - 2], %o3 ! read half word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 2, %o1 ! advance DST by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .bc_sm_half ! loop til done
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sth %o3, [%o1 - 2] ! write half word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 1, %o2 ! restore count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_sm_byte:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0], %o3
e64c6c3f1a2cffb126880e90fb7670805896f505Michael Bergknoff ba,pt %ncc, .bc_sm_exit
e64c6c3f1a2cffb126880e90fb7670805896f505Michael Bergknoff stb %o3, [%o1]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_sm_word:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 4, %o2 ! update count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .bc_sm_wordx
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0], %o3 ! read word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 3, %o2 ! restore count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1] ! write word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2 ! reduce count for cc test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 + 4], %o3 ! load one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1 + 4] ! store one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 + 5], %o3 ! load second byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1 + 5] ! store second byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 + 6], %o3 ! load third byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1 + 6] ! store third byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_sm_exit:
e64c6c3f1a2cffb126880e90fb7670805896f505Michael Bergknoff ldn [THREAD_REG + T_LOFAULT], %o3
e64c6c3f1a2cffb126880e90fb7670805896f505Michael Bergknoff brz,pt %o3, .bc_sm_done
0090fbab0a5716f1626d1e206b44fbb23585525ekm nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andn %o4, TRAMP_FLAG, %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
0090fbab0a5716f1626d1e206b44fbb23585525ekm.bc_sm_done:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %g0, %o0 ! return 0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_med:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl xor %o0, %o1, %o3 ! setup alignment check
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 1, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pt %ncc, .bc_sm_movebytes ! unaligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pt %ncc, .bc_med_half ! halfword aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 7, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pt %ncc, .bc_med_word ! word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_med_long:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o0 ! check for
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_med_long1 ! word alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_med_long0:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0], %o3 ! load one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3,[%o1] ! store byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pt %ncc, .bc_med_long0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl dec %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_med_long1: ! word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 7, %o0 ! check for long word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_med_long2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0], %o3 ! load word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 4, %o0 ! advance SRC by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1] ! store word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 4, %o1 ! advance DST by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, 4, %o2 ! reduce count by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl! Now long word aligned and have at least 32 bytes to move
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_med_long2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, 31, %o2 ! adjust count to allow cc zero test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_med_lmove:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldx [%o0], %o3 ! read long word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stx %o3, [%o1] ! write long word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 32, %o2 ! reduce count by 32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldx [%o0 + 8], %o3 ! repeat for a total for 4 long words
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 32, %o0 ! advance SRC by 32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stx %o3, [%o1 + 8]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldx [%o0 - 16], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 32, %o1 ! advance DST by 32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stx %o3, [%o1 - 16]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldx [%o0 - 8], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .bc_med_lmove ! loop til 31 or fewer bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stx %o3, [%o1 - 8]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 24, %o2 ! restore count to long word offset
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ble,pt %ncc, .bc_med_lextra ! check for more long words to move
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_med_lword:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldx [%o0], %o3 ! read long word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 8, %o2 ! reduce count by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stx %o3, [%o1] ! write long word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 8, %o0 ! advance SRC by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .bc_med_lword ! loop til 7 or fewer bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 8, %o1 ! advance DST by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_med_lextra:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 7, %o2 ! restore rest of count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_sm_exit ! if zero, then done
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_sm_byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .bc_sm_half
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_med_word:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o0 ! check for
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_med_word1 ! word alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_med_word0:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0], %o3 ! load one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3,[%o1] ! store byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pt %ncc, .bc_med_word0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl dec %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl! Now word aligned and have at least 36 bytes to move
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_med_word1:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, 15, %o2 ! adjust count to allow cc zero test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_med_wmove:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0], %o3 ! read word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1] ! write word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 16, %o2 ! reduce count by 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0 + 4], %o3 ! repeat for a total for 4 words
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 16, %o0 ! advance SRC by 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1 + 4]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0 - 8], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 16, %o1 ! advance DST by 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1 - 8]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0 - 4], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .bc_med_wmove ! loop til 15 or fewer bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1 - 4]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 12, %o2 ! restore count to word offset
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ble,pt %ncc, .bc_med_wextra ! check for more words to move
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_med_word2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0], %o3 ! read word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 4, %o2 ! reduce count by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1] ! write word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 4, %o0 ! advance SRC by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .bc_med_word2 ! loop til 3 or fewer bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 4, %o1 ! advance DST by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_med_wextra:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 3, %o2 ! restore rest of count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_sm_exit ! if zero, then done
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_sm_byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .bc_sm_half
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_med_half:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 1, %o0 ! check for
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_med_half1 ! half word alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0], %o3 ! load one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3,[%o1] ! store byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl dec %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl! Now half word aligned and have at least 38 bytes to move
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_med_half1:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, 7, %o2 ! adjust count to allow cc zero test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_med_hmove:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduh [%o0], %o3 ! read half word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sth %o3, [%o1] ! write half word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 8, %o2 ! reduce count by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduh [%o0 + 2], %o3 ! repeat for a total for 4 halfwords
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 8, %o0 ! advance SRC by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sth %o3, [%o1 + 2]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduh [%o0 - 4], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 8, %o1 ! advance DST by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sth %o3, [%o1 - 4]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduh [%o0 - 2], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .bc_med_hmove ! loop til 7 or fewer bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sth %o3, [%o1 - 2]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 7, %o2 ! restore count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .bc_sm_byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .bc_sm_half
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(bcopy)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * The _more entry points are not intended to be used directly by
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * any caller from outside this file. They are provided to allow
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * profiling and dtrace of the portions of the copy code that uses
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * the floating point registers.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * This entry is particularly important as DTRACE (at least as of
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * 4/2004) does not support leaf functions.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(bcopy_more)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bcopy_more:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl prefetch [%o0], #n_reads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_LOFAULT], %l6 ! save t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %l6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .do_copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(.copyerr), %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %o2, %lo(.copyerr), %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector
25cf1a301a396c38e8adf52c15f537b80d2483f7jl !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! We've already captured whether t_lofault was zero on entry.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! We need to mark ourselves as being from bcopy since both
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! kcopy and bcopy use the same code path. If TRAMP_FLAG is set
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! and the saved lofault was zero, we won't reset lofault on
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! returning.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %l6, TRAMP_FLAG, %l6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Copies that reach here are larger than VIS_COPY_THRESHOLD bytes
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Also, use of FP registers has been tested to be enabled
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.do_copy:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl FP_NOMIGRATE(6, 7)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl rd %fprs, %o2 ! check for unused fp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst FPRS_FEF, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,a,pt %icc, .do_blockcopy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %g0, FPRS_FEF, %fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl BST_FPQ1Q3_TOSTACK(%o2)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.do_blockcopy:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl rd %gsr, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %l6, FPUSED_FLAG, %l6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define REALSRC %i0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define DST %i1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define CNT %i2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define SRC %i3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define TMP %i5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc DST, VIS_BLOCKSIZE - 1, TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, 2f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl neg TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add TMP, VIS_BLOCKSIZE, TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! TMP = bytes required to align DST on FP_BLOCK boundary
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! Using SRC as a tmp here
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp TMP, 3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, 1f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub CNT,TMP,CNT ! adjust main count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub TMP, 3, TMP ! adjust for end of loop test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bc_blkalign:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [REALSRC], SRC ! move 4 bytes per loop iteration
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb SRC, [DST]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc TMP, 4, TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [REALSRC + 1], SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add REALSRC, 4, REALSRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb SRC, [DST + 1]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [REALSRC - 2], SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add DST, 4, DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb SRC, [DST - 2]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [REALSRC - 1], SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu,pt %ncc, .bc_blkalign
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb SRC, [DST - 1]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc TMP, 3, TMP ! restore count adjustment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, 2f ! no bytes left?
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl1: ldub [REALSRC], SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc REALSRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu %ncc, 1b
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb SRC, [DST - 1]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #StoreLoad
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andn REALSRC, 0x7, SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! SRC - 8-byte aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! DST - 64-byte aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC], %f0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl prefetch [SRC + (1 * VIS_BLOCKSIZE)], #n_reads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl alignaddr REALSRC, %g0, %g0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x08], %f2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl prefetch [SRC + (2 * VIS_BLOCKSIZE)], #n_reads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f0, %f2, %f32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x10], %f4
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (3 * VIS_BLOCKSIZE)], #n_reads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f2, %f4, %f34
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x18], %f6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f4, %f6, %f36
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x20], %f8
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (8 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f6, %f8, %f38
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x28], %f10
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (12 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f8, %f10, %f40
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x30], %f12
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (16 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f10, %f12, %f42
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x38], %f14
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + VIS_BLOCKSIZE], %f0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub CNT, VIS_BLOCKSIZE, CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add SRC, VIS_BLOCKSIZE, SRC
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (19 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add REALSRC, VIS_BLOCKSIZE, REALSRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, 1f
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (23 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl1:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x08], %f2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f12, %f14, %f44
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x10], %f4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f14, %f0, %f46
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f32, [DST]ASI_BLK_P
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x18], %f6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f0, %f2, %f32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x20], %f8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f2, %f4, %f34
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x28], %f10
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f4, %f6, %f36
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x30], %f12
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f6, %f8, %f38
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm sub CNT, VIS_BLOCKSIZE, CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x38], %f14
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f8, %f10, %f40
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm add DST, VIS_BLOCKSIZE, DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + VIS_BLOCKSIZE], %f0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f10, %f12, %f42
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add REALSRC, VIS_BLOCKSIZE, REALSRC
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (3 * VIS_BLOCKSIZE)], #n_reads
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm add SRC, VIS_BLOCKSIZE, SRC
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + ((OLYMPUS_C_PREFETCH) * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp CNT, VIS_BLOCKSIZE + 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu,pt %ncc, 1b
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + ((OLYMPUS_C_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! only if REALSRC & 0x7 is 0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp CNT, VIS_BLOCKSIZE
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bne %ncc, 3f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc REALSRC, 0x7, %g0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, 2f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl3:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f12, %f14, %f44
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f14, %f0, %f46
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f32, [DST]ASI_BLK_P
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add DST, VIS_BLOCKSIZE, DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, 3f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x08], %f2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f12, %f44
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x10], %f4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f14, %f46
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f32, [DST]ASI_BLK_P
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x18], %f6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f0, %f32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x20], %f8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f2, %f34
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x28], %f10
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f4, %f36
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x30], %f12
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f6, %f38
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x38], %f14
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f8, %f40
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub CNT, VIS_BLOCKSIZE, CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add DST, VIS_BLOCKSIZE, DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add SRC, VIS_BLOCKSIZE, SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add REALSRC, VIS_BLOCKSIZE, REALSRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f10, %f42
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f12, %f44
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f14, %f46
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f32, [DST]ASI_BLK_P
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add DST, VIS_BLOCKSIZE, DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,a,pt %ncc, .bcb_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl3: tst CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,a,pt %ncc, .bcb_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl5: ldub [REALSRC], TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc REALSRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu %ncc, 5b
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb TMP, [DST - 1]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.bcb_exit:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %o2, 0, %gsr
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst FPRS_FEF, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %icc, 4f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl BLD_FPQ1Q3_FROMSTACK(%o2)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, 2f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %o3, 0, %fprs ! restore fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl FZEROQ1Q3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %o3, 0, %fprs ! restore fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andn %l6, MASK_FLAGS, %l6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl FP_ALLOWMIGRATE(5, 6)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ret
25cf1a301a396c38e8adf52c15f537b80d2483f7jl restore %g0, 0, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(bcopy_more)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Block copy with possibly overlapped operands.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#if defined(lint)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*ARGSUSED*/
25cf1a301a396c38e8adf52c15f537b80d2483f7jlvoid
25cf1a301a396c38e8adf52c15f537b80d2483f7jlovbcopy(const void *from, void *to, size_t count)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl{}
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#else /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(ovbcopy)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o2 ! check count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu,a %ncc, 1f ! nothing to do or bad arguments
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o0, %o1, %o3 ! difference of from and to address
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl ! return
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl1:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bneg,a %ncc, 2f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl neg %o3 ! if < 0, make it positive
25cf1a301a396c38e8adf52c15f537b80d2483f7jl2: cmp %o2, %o3 ! cmp size and abs(from - to)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu %ncc, bcopy ! if size <= abs(diff): use bcopy,
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .empty ! no overlap
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o0, %o1 ! compare from and to addresses
25cf1a301a396c38e8adf52c15f537b80d2483f7jl blu %ncc, .ov_bkwd ! if from < to, copy backwards
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! Copy forwards.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ov_fwd:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0], %o3 ! read from address
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o0 ! inc from address
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1] ! write to address
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2 ! dec count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu %ncc, .ov_fwd ! loop till done
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1 ! inc to address
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl ! return
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! Copy backwards.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ov_bkwd:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2 ! dec count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 + %o2], %o3 ! get byte at end of src
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu %ncc, .ov_bkwd ! loop till done
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1 + %o2] ! delay slot, store at end of dst
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl ! return
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(ovbcopy)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * hwblkpagecopy()
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Copies exactly one page. This routine assumes the caller (ppcopy)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * has already disabled kernel preemption and has checked
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * use_hw_bcopy. Preventing preemption also prevents cpu migration.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#ifdef lint
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*ARGSUSED*/
25cf1a301a396c38e8adf52c15f537b80d2483f7jlvoid
25cf1a301a396c38e8adf52c15f537b80d2483f7jlhwblkpagecopy(const void *src, void *dst)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl{ }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#else /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(hwblkpagecopy)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! get another window w/space for three aligned blocks of saved fpregs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl prefetch [%o0], #n_reads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! %i0 - source address (arg)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! %i1 - destination address (arg)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! %i2 - length of region (not arg)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! %l0 - saved fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! %l1 - pointer to saved fpregs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl rd %fprs, %l0 ! check for unused fp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst FPRS_FEF, %l0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,a,pt %icc, 1f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %g0, FPRS_FEF, %fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl BST_FPQ1Q3_TOSTACK(%l1)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl1: set PAGESIZE, CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov REALSRC, SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC], %f0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl prefetch [SRC + (1 * VIS_BLOCKSIZE)], #n_reads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x08], %f2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl prefetch [SRC + (2 * VIS_BLOCKSIZE)], #n_reads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x10], %f4
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (3 * VIS_BLOCKSIZE)], #n_reads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f2, %f34
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x18], %f6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f4, %f36
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x20], %f8
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (8 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f6, %f38
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x28], %f10
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (12 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f8, %f40
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x30], %f12
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (16 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f10, %f42
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x38], %f14
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + VIS_BLOCKSIZE], %f0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub CNT, VIS_BLOCKSIZE, CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add SRC, VIS_BLOCKSIZE, SRC
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (19 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, 2f
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (23 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x08], %f2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f12, %f44
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x10], %f4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f14, %f46
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f32, [DST]ASI_BLK_P
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x18], %f6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f0, %f32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x20], %f8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f2, %f34
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x28], %f10
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f4, %f36
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x30], %f12
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f6, %f38
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x38], %f14
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f8, %f40
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + VIS_BLOCKSIZE], %f0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fmovd %f10, %f42
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub CNT, VIS_BLOCKSIZE, CNT
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (3 * VIS_BLOCKSIZE)], #n_reads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add DST, VIS_BLOCKSIZE, DST
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + ((OLYMPUS_C_PREFETCH) * VIS_BLOCKSIZE)], #one_read
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm add SRC, VIS_BLOCKSIZE, SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp CNT, VIS_BLOCKSIZE + 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu,pt %ncc, 2b
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + ((OLYMPUS_C_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! trailing block
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x08], %f2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f12, %f44
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x10], %f4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f14, %f46
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f32, [DST]ASI_BLK_P
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x18], %f6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f0, %f32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x20], %f8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f2, %f34
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x28], %f10
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f4, %f36
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x30], %f12
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f6, %f38
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x38], %f14
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f8, %f40
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub CNT, VIS_BLOCKSIZE, CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add DST, VIS_BLOCKSIZE, DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add SRC, VIS_BLOCKSIZE, SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f10, %f42
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f12, %f44
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f14, %f46
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f32, [DST]ASI_BLK_P
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst FPRS_FEF, %l0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %icc, 2f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl BLD_FPQ1Q3_FROMSTACK(%l3)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba 3f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl2: FZEROQ1Q3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl3: wr %l0, 0, %fprs ! restore fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ret
25cf1a301a396c38e8adf52c15f537b80d2483f7jl restore %g0, 0, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(hwblkpagecopy)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Transfer data to and from user space -
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Note that these routines can cause faults
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * It is assumed that the kernel has nothing at
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * less than KERNELBASE in the virtual address space.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Note that copyin(9F) and copyout(9F) are part of the
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * DDI/DKI which specifies that they return '-1' on "errors."
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Sigh.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * So there's two extremely similar routines - xcopyin() and xcopyout()
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * which return the errno that we've faithfully computed. This
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * allows other callers (e.g. uiomove(9F)) to work correctly.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Given that these are used pretty heavily, we expand the calling
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * sequences inline for all flavours (rather than making wrappers).
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * There are also stub routines for xcopyout_little and xcopyin_little,
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * which currently are intended to handle requests of <= 16 bytes from
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * do_unaligned. Future enhancement to make them handle 8k pages efficiently
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * is left as an exercise...
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * General theory of operation:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * The only difference between copy{in,out} and
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * xcopy{in,out} is in the error handling routine they invoke
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * when a memory access error occurs. xcopyOP returns the errno
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * while copyOP returns -1 (see above). copy{in,out}_noerr set
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * a special flag (by oring the TRAMP_FLAG into the fault handler address)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * if they are called with a fault handler already in place. That flag
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * causes the default handlers to trampoline to the previous handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * upon an error.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * None of the copyops routines grab a window until it's decided that
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * we need to do a HW block copy operation. This saves a window
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * spill/fill when we're called during socket ops. The typical IO
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * path won't cause spill/fill traps.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * This code uses a set of 4 limits for the maximum size that will
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * be copied given a particular input/output address alignment.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * If the value for a particular limit is zero, the copy will be performed
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * by the plain copy loops rather than FPBLK.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * See the description of bcopy above for more details of the
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * data copying algorithm and the default limits.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Copy kernel data to user space (copyout/xcopyout/xcopyout_little).
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#if defined(lint)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#else /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * We save the arguments in the following registers in case of a fault:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * kaddr - %l1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * uaddr - %l2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * count - %l3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define SAVE_SRC %l1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define SAVE_DST %l2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define SAVE_COUNT %l3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define SM_SAVE_SRC %g4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define SM_SAVE_DST %g5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define SM_SAVE_COUNT %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define ERRNO %l5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#define REAL_LOFAULT %l4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Generic copyio fault handler. This is the first line of defense when a
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * fault occurs in (x)copyin/(x)copyout. In order for this to function
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * This allows us to share common code for all the flavors of the copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * operations, including the _noerr versions.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl *
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Note that this function will restore the original input parameters before
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * calling REAL_LOFAULT. So the real handler can vector to the appropriate
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * member of the t_copyop structure, if needed.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(copyio_fault)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %g1,ERRNO ! save errno in ERRNO
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst FPUSED_FLAG, %l6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz %ncc, 1f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %o2, 0, %gsr ! restore gsr
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst FPRS_FEF, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %icc, 4f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl BLD_FPQ2Q4_FROMSTACK(%o2)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, 1f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %o3, 0, %fprs ! restore fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl FZEROQ2Q4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %o3, 0, %fprs ! restore fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl1:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andn %l6, FPUSED_FLAG, %l6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl FP_ALLOWMIGRATE(5, 6)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov SAVE_SRC, %i0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov SAVE_DST, %i1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl jmp REAL_LOFAULT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov SAVE_COUNT, %i2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(copyio_fault)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#if defined(lint)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*ARGSUSED*/
25cf1a301a396c38e8adf52c15f537b80d2483f7jlint
25cf1a301a396c38e8adf52c15f537b80d2483f7jlcopyout(const void *kaddr, void *uaddr, size_t count)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl{ return (0); }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#else /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(copyout)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyout_small ! go to larger cases
25cf1a301a396c38e8adf52c15f537b80d2483f7jl xor %o0, %o1, %o3 ! are src, dst alignable?
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 7, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .copyout_8 ! check for longword alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 1, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .copyout_2 ! check for half-word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_1)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .copyout_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyout_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .copyout_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyout_2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .copyout_4 ! check for word alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_2)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .copyout_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyout_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .copyout_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyout_4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! already checked longword, must be word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_4)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .copyout_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyout_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .copyout_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyout_8:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_8)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .copyout_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyout_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .copyout_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop ! instruction alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! see discussion at start of file
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyout_small:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(.sm_copyout_err), %o5 ! .sm_copyout_err is lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %o5, %lo(.sm_copyout_err), %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_LOFAULT], %o4 ! save existing handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o5, [THREAD_REG + T_LOFAULT] ! set t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.sm_do_copyout:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %o0, SM_SAVE_SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %o1, SM_SAVE_DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, SHORTCOPY ! check for really short case
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .co_sm_left !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %o2, SM_SAVE_COUNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, CHKSIZE ! check for medium length cases
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu,pn %ncc, .co_med !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %o0, %o1, %o3 ! prepare alignment check
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc %o3, 0x3, %g0 ! test for alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_sm_word ! branch to word aligned case
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_sm_movebytes:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, 3, %o2 ! adjust count to allow cc zero test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_sm_notalign4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0], %o3 ! read byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 4, %o2 ! reduce count by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba %o3, [%o1]ASI_USER ! write byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1 ! advance DST by 1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 + 1], %o3 ! repeat for a total of 4 bytes
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 4, %o0 ! advance SRC by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba %o3, [%o1]ASI_USER
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1 ! advance DST by 1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 - 2], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba %o3, [%o1]ASI_USER
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1 ! advance DST by 1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 - 1], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba %o3, [%o1]ASI_USER
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .co_sm_notalign4 ! loop til 3 or fewer bytes remain
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1 ! advance DST by 1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o2, 3, %o2 ! restore count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_sm_left:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_sm_exit ! check for zero length
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0], %o3 ! load one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2 ! reduce count for cc test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba %o3,[%o1]ASI_USER ! store one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 + 1], %o3 ! load second byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba %o3,[%o1]ASI_USER ! store second byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 + 2], %o3 ! load third byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba %o3,[%o1]ASI_USER ! store third byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %g0, %o0 ! return 0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_sm_words:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0], %o3 ! read word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_sm_wordx:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 8, %o2 ! update count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stwa %o3, [%o1]ASI_USER ! write word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 8, %o0 ! update SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0 - 4], %o3 ! read word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 4, %o1 ! update DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stwa %o3, [%o1]ASI_USER ! write word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .co_sm_words ! loop til done
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 4, %o1 ! update DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 7, %o2 ! restore count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_sm_byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_sm_half:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 2, %o2 ! reduce count by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduh [%o0], %o3 ! read half word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 2, %o0 ! advance SRC by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stha %o3, [%o1]ASI_USER ! write half word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .co_sm_half ! loop til done
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 2, %o1 ! advance DST by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 1, %o2 ! restore count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_sm_byte:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba %o3, [%o1]ASI_USER
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %g0, %o0 ! return 0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_sm_word:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 4, %o2 ! update count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .co_sm_wordx
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0], %o3 ! read word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 3, %o2 ! restore count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stwa %o3, [%o1]ASI_USER ! write word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2 ! reduce count for cc test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 + 4], %o3 ! load one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 4, %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba %o3, [%o1]ASI_USER ! store one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 + 5], %o3 ! load second byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba %o3, [%o1]ASI_USER ! store second byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 + 6], %o3 ! load third byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba %o3, [%o1]ASI_USER ! store third byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_sm_exit:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %g0, %o0 ! return 0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_med:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl xor %o0, %o1, %o3 ! setup alignment check
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 1, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pt %ncc, .co_sm_movebytes ! unaligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pt %ncc, .co_med_half ! halfword aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 7, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pt %ncc, .co_med_word ! word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_med_long:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o0 ! check for
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_med_long1 ! word alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_med_long0:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0], %o3 ! load one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba %o3,[%o1]ASI_USER ! store byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pt %ncc, .co_med_long0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl dec %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_med_long1: ! word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 7, %o0 ! check for long word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_med_long2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0], %o3 ! load word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 4, %o0 ! advance SRC by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stwa %o3, [%o1]ASI_USER ! store word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 4, %o1 ! advance DST by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, 4, %o2 ! reduce count by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl! Now long word aligned and have at least 32 bytes to move
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_med_long2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, 31, %o2 ! adjust count to allow cc zero test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o1, 8, %o1 ! adjust pointer to allow store in
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! branch delay slot instead of add
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_med_lmove:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 8, %o1 ! advance DST by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldx [%o0], %o3 ! read long word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 32, %o2 ! reduce count by 32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stxa %o3, [%o1]ASI_USER ! write long word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 8, %o1 ! advance DST by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldx [%o0 + 8], %o3 ! repeat for a total for 4 long words
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 32, %o0 ! advance SRC by 32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stxa %o3, [%o1]ASI_USER
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldx [%o0 - 16], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 8, %o1 ! advance DST by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stxa %o3, [%o1]ASI_USER
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldx [%o0 - 8], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 8, %o1 ! advance DST by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .co_med_lmove ! loop til 31 or fewer bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stxa %o3, [%o1]ASI_USER
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 8, %o1 ! advance DST by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 24, %o2 ! restore count to long word offset
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ble,pt %ncc, .co_med_lextra ! check for more long words to move
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_med_lword:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldx [%o0], %o3 ! read long word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 8, %o2 ! reduce count by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stxa %o3, [%o1]ASI_USER ! write long word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 8, %o0 ! advance SRC by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .co_med_lword ! loop til 7 or fewer bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 8, %o1 ! advance DST by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_med_lextra:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 7, %o2 ! restore rest of count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_sm_exit ! if zero, then done
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_sm_byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .co_sm_half
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop ! instruction alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! see discussion at start of file
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_med_word:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o0 ! check for
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_med_word1 ! word alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_med_word0:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0], %o3 ! load one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba %o3,[%o1]ASI_USER ! store byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pt %ncc, .co_med_word0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl dec %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl! Now word aligned and have at least 36 bytes to move
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_med_word1:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, 15, %o2 ! adjust count to allow cc zero test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_med_wmove:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0], %o3 ! read word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 16, %o2 ! reduce count by 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stwa %o3, [%o1]ASI_USER ! write word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 4, %o1 ! advance DST by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0 + 4], %o3 ! repeat for a total for 4 words
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 16, %o0 ! advance SRC by 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stwa %o3, [%o1]ASI_USER
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 4, %o1 ! advance DST by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0 - 8], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stwa %o3, [%o1]ASI_USER
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 4, %o1 ! advance DST by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0 - 4], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stwa %o3, [%o1]ASI_USER
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .co_med_wmove ! loop til 15 or fewer bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 4, %o1 ! advance DST by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 12, %o2 ! restore count to word offset
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ble,pt %ncc, .co_med_wextra ! check for more words to move
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_med_word2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduw [%o0], %o3 ! read word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 4, %o2 ! reduce count by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stwa %o3, [%o1]ASI_USER ! write word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 4, %o0 ! advance SRC by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .co_med_word2 ! loop til 3 or fewer bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 4, %o1 ! advance DST by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_med_wextra:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 3, %o2 ! restore rest of count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_sm_exit ! if zero, then done
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_sm_byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .co_sm_half
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop ! instruction alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop ! see discussion at start of file
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_med_half:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 1, %o0 ! check for
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_med_half1 ! half word alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0], %o3 ! load one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba %o3,[%o1]ASI_USER ! store byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl dec %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl! Now half word aligned and have at least 38 bytes to move
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_med_half1:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, 7, %o2 ! adjust count to allow cc zero test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_med_hmove:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduh [%o0], %o3 ! read half word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 8, %o2 ! reduce count by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stha %o3, [%o1]ASI_USER ! write half word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 2, %o1 ! advance DST by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduh [%o0 + 2], %o3 ! repeat for a total for 4 halfwords
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 8, %o0 ! advance SRC by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stha %o3, [%o1]ASI_USER
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 2, %o1 ! advance DST by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduh [%o0 - 4], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stha %o3, [%o1]ASI_USER
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 2, %o1 ! advance DST by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduh [%o0 - 2], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stha %o3, [%o1]ASI_USER
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .co_med_hmove ! loop til 7 or fewer bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 2, %o1 ! advance DST by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 7, %o2 ! restore count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .co_sm_byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .co_sm_half
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * We got here because of a fault during short copyout.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh).
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.sm_copyout_err:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov SM_SAVE_SRC, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov SM_SAVE_DST, %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov SM_SAVE_COUNT, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, 3f ! if not, return error
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [%o3 + CP_COPYOUT], %o5 ! if handler, invoke it with
25cf1a301a396c38e8adf52c15f537b80d2483f7jl jmp %o5 ! original arguments
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl3:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %g0, -1, %o0 ! return error value
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(copyout)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * The _more entry points are not intended to be used directly by
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * any caller from outside this file. They are provided to allow
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * profiling and dtrace of the portions of the copy code that uses
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * the floating point registers.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * This entry is particularly important as DTRACE (at least as of
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * 4/2004) does not support leaf functions.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(copyout_more)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyout_more:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl prefetch [%o0], #n_reads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl set .copyout_err, REAL_LOFAULT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Copy outs that reach here are larger than VIS_COPY_THRESHOLD bytes
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.do_copyout:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl set copyio_fault, %l7 ! .copyio_fault is lofault val
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_LOFAULT], %l6 ! save existing handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %i0, SAVE_SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %i1, SAVE_DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %i2, SAVE_COUNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl FP_NOMIGRATE(6, 7)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl rd %fprs, %o2 ! check for unused fp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst FPRS_FEF, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,a,pt %icc, .do_blockcopyout
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %g0, FPRS_FEF, %fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl BST_FPQ2Q4_TOSTACK(%o2)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.do_blockcopyout:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl rd %gsr, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %l6, FPUSED_FLAG, %l6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc DST, VIS_BLOCKSIZE - 1, TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov ASI_USER, %asi
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, 2f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl neg TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add TMP, VIS_BLOCKSIZE, TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! TMP = bytes required to align DST on FP_BLOCK boundary
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! Using SRC as a tmp here
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp TMP, 3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, 1f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub CNT,TMP,CNT ! adjust main count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub TMP, 3, TMP ! adjust for end of loop test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.co_blkalign:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [REALSRC], SRC ! move 4 bytes per loop iteration
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba SRC, [DST]%asi
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc TMP, 4, TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [REALSRC + 1], SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add REALSRC, 4, REALSRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba SRC, [DST + 1]%asi
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [REALSRC - 2], SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add DST, 4, DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba SRC, [DST - 2]%asi
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [REALSRC - 1], SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu,pt %ncc, .co_blkalign
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba SRC, [DST - 1]%asi
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc TMP, 3, TMP ! restore count adjustment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, 2f ! no bytes left?
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl1: ldub [REALSRC], SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc REALSRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu %ncc, 1b
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba SRC, [DST - 1]%asi
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #StoreLoad
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andn REALSRC, 0x7, SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! SRC - 8-byte aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! DST - 64-byte aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC], %f16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl prefetch [SRC + (1 * VIS_BLOCKSIZE)], #n_reads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl alignaddr REALSRC, %g0, %g0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x08], %f18
25cf1a301a396c38e8adf52c15f537b80d2483f7jl prefetch [SRC + (2 * VIS_BLOCKSIZE)], #n_reads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f16, %f18, %f48
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x10], %f20
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (3 * VIS_BLOCKSIZE)], #n_reads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f18, %f20, %f50
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x18], %f22
25cf1a301a396c38e8adf52c15f537b80d2483f7jl prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f20, %f22, %f52
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x20], %f24
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (8 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f22, %f24, %f54
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x28], %f26
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (12 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f24, %f26, %f56
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x30], %f28
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (16 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f26, %f28, %f58
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x38], %f30
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + VIS_BLOCKSIZE], %f16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub CNT, VIS_BLOCKSIZE, CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add SRC, VIS_BLOCKSIZE, SRC
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (19 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add REALSRC, VIS_BLOCKSIZE, REALSRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, 1f
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (23 * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl1:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x08], %f18
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f28, %f30, %f60
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x10], %f20
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f30, %f16, %f62
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f48, [DST]ASI_BLK_AIUS
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x18], %f22
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f16, %f18, %f48
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x20], %f24
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f18, %f20, %f50
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x28], %f26
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f20, %f22, %f52
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x30], %f28
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f22, %f24, %f54
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm sub CNT, VIS_BLOCKSIZE, CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x38], %f30
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f24, %f26, %f56
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm add DST, VIS_BLOCKSIZE, DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + VIS_BLOCKSIZE], %f16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f26, %f28, %f58
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add REALSRC, VIS_BLOCKSIZE, REALSRC
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + (3 * VIS_BLOCKSIZE)], #n_reads
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm add SRC, VIS_BLOCKSIZE, SRC
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + ((OLYMPUS_C_PREFETCH) * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp CNT, VIS_BLOCKSIZE + 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu,pt %ncc, 1b
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetch [SRC + ((OLYMPUS_C_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! only if REALSRC & 0x7 is 0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp CNT, VIS_BLOCKSIZE
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bne %ncc, 3f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc REALSRC, 0x7, %g0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, 2f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl3:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f28, %f30, %f60
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f30, %f16, %f62
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f48, [DST]ASI_BLK_AIUS
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add DST, VIS_BLOCKSIZE, DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, 3f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x08], %f18
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f28, %f60
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x10], %f20
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f30, %f62
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f48, [DST]ASI_BLK_AIUS
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x18], %f22
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f16, %f48
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x20], %f24
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f18, %f50
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x28], %f26
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f20, %f52
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x30], %f28
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f22, %f54
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldd [SRC + 0x38], %f30
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f24, %f56
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub CNT, VIS_BLOCKSIZE, CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add DST, VIS_BLOCKSIZE, DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add SRC, VIS_BLOCKSIZE, SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add REALSRC, VIS_BLOCKSIZE, REALSRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f26, %f58
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f28, %f60
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f30, %f62
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f48, [DST]ASI_BLK_AIUS
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add DST, VIS_BLOCKSIZE, DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,a,pt %ncc, 4f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl3: tst CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,a %ncc, 4f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl5: ldub [REALSRC], TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc REALSRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu %ncc, 5b
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stba TMP, [DST - 1]%asi
25cf1a301a396c38e8adf52c15f537b80d2483f7jl4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyout_exit:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %o2, 0, %gsr ! restore gsr
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst FPRS_FEF, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %icc, 4f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl BLD_FPQ2Q4_FROMSTACK(%o2)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, 1f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %o3, 0, %fprs ! restore fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl FZEROQ2Q4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %o3, 0, %fprs ! restore fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl1:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andn %l6, FPUSED_FLAG, %l6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl FP_ALLOWMIGRATE(5, 6)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ret
25cf1a301a396c38e8adf52c15f537b80d2483f7jl restore %g0, 0, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * We got here because of a fault during copyout.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh).
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyout_err:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, 2f ! if not, return error
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [%o4 + CP_COPYOUT], %g2 ! if handler, invoke it with
25cf1a301a396c38e8adf52c15f537b80d2483f7jl jmp %g2 ! original arguments
25cf1a301a396c38e8adf52c15f537b80d2483f7jl restore %g0, 0, %g0 ! dispose of copy window
25cf1a301a396c38e8adf52c15f537b80d2483f7jl2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ret
25cf1a301a396c38e8adf52c15f537b80d2483f7jl restore %g0, -1, %o0 ! return error value
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(copyout_more)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#ifdef lint
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*ARGSUSED*/
25cf1a301a396c38e8adf52c15f537b80d2483f7jlint
25cf1a301a396c38e8adf52c15f537b80d2483f7jlxcopyout(const void *kaddr, void *uaddr, size_t count)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl{ return (0); }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#else /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(xcopyout)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .xcopyout_small ! go to larger cases
25cf1a301a396c38e8adf52c15f537b80d2483f7jl xor %o0, %o1, %o3 ! are src, dst alignable?
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 7, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .xcopyout_8 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 1, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .xcopyout_2 ! check for half-word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_1)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .xcopyout_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .xcopyout_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .xcopyout_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.xcopyout_2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .xcopyout_4 ! check for word alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_2)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .xcopyout_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .xcopyout_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .xcopyout_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.xcopyout_4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! already checked longword, must be word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_4)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .xcopyout_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .xcopyout_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .xcopyout_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.xcopyout_8:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_8)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .xcopyout_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .xcopyout_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .xcopyout_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.xcopyout_small:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(.sm_xcopyout_err), %o5 ! .sm_xcopyout_err is lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %o5, %lo(.sm_xcopyout_err), %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_LOFAULT], %o4 ! save existing handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .sm_do_copyout ! common code
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o5, [THREAD_REG + T_LOFAULT] ! set t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.xcopyout_more:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(.xcopyout_err), REAL_LOFAULT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .do_copyout ! common code
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * We got here because of fault during xcopyout
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Errno value is in ERRNO
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.xcopyout_err:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, 2f ! if not, return error
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [%o4 + CP_XCOPYOUT], %g2 ! if handler, invoke it with
25cf1a301a396c38e8adf52c15f537b80d2483f7jl jmp %g2 ! original arguments
25cf1a301a396c38e8adf52c15f537b80d2483f7jl restore %g0, 0, %g0 ! dispose of copy window
25cf1a301a396c38e8adf52c15f537b80d2483f7jl2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ret
25cf1a301a396c38e8adf52c15f537b80d2483f7jl restore ERRNO, 0, %o0 ! return errno value
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.sm_xcopyout_err:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov SM_SAVE_SRC, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov SM_SAVE_DST, %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov SM_SAVE_COUNT, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, 3f ! if not, return error
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [%o3 + CP_XCOPYOUT], %o5 ! if handler, invoke it with
25cf1a301a396c38e8adf52c15f537b80d2483f7jl jmp %o5 ! original arguments
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl3:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %g1, 0, %o0 ! return errno value
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(xcopyout)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#ifdef lint
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*ARGSUSED*/
25cf1a301a396c38e8adf52c15f537b80d2483f7jlint
25cf1a301a396c38e8adf52c15f537b80d2483f7jlxcopyout_little(const void *kaddr, void *uaddr, size_t count)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl{ return (0); }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#else /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(xcopyout_little)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(.xcopyio_err), %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %o5, %lo(.xcopyio_err), %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_LOFAULT], %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o5, [THREAD_REG + T_LOFAULT]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %o4, %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %g0, %o2, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, %o2, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %ncc, 2f ! check for zero bytes
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, 1, %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, %o4, %o0 ! start w/last byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, %o2, %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 + %o3], %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl1: stba %o4, [%o1 + %o3]ASI_AIUSL
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inccc %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o0, 2, %o0 ! get next byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bcc,a,pt %ncc, 1b
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldub [%o0 + %o3], %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %g0, %o0 ! return (0)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(xcopyout_little)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Copy user data to kernel space (copyin/xcopyin/xcopyin_little)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#if defined(lint)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*ARGSUSED*/
25cf1a301a396c38e8adf52c15f537b80d2483f7jlint
25cf1a301a396c38e8adf52c15f537b80d2483f7jlcopyin(const void *uaddr, void *kaddr, size_t count)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl{ return (0); }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#else /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(copyin)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyin_small ! go to larger cases
25cf1a301a396c38e8adf52c15f537b80d2483f7jl xor %o0, %o1, %o3 ! are src, dst alignable?
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 7, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .copyin_8 ! check for longword alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 1, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .copyin_2 ! check for half-word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_1)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .copyin_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyin_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .copyin_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyin_2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .copyin_4 ! check for word alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_2)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .copyin_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyin_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .copyin_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyin_4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! already checked longword, must be word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_4)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .copyin_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyin_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .copyin_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyin_8:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_8)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .copyin_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyin_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .copyin_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop ! instruction alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! see discussion at start of file
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyin_small:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(.sm_copyin_err), %o5 ! .sm_copyin_err is lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %o5, %lo(.sm_copyin_err), %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_LOFAULT], %o4 ! set/save t_lofault, no tramp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o5, [THREAD_REG + T_LOFAULT]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.sm_do_copyin:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %o0, SM_SAVE_SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %o1, SM_SAVE_DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, SHORTCOPY ! check for really short case
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .ci_sm_left !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %o2, SM_SAVE_COUNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, CHKSIZE ! check for medium length cases
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu,pn %ncc, .ci_med !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %o0, %o1, %o3 ! prepare alignment check
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc %o3, 0x3, %g0 ! test for alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_sm_word ! branch to word aligned case
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_sm_movebytes:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, 3, %o2 ! adjust count to allow cc zero test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_sm_notalign4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [%o0]ASI_USER, %o3 ! read byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 4, %o2 ! reduce count by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1] ! write byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 1, %o0 ! advance SRC by 1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [%o0]ASI_USER, %o3 ! repeat for a total of 4 bytes
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 1, %o0 ! advance SRC by 1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1 + 1]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 4, %o1 ! advance DST by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [%o0]ASI_USER, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 1, %o0 ! advance SRC by 1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1 - 2]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [%o0]ASI_USER, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 1, %o0 ! advance SRC by 1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .ci_sm_notalign4 ! loop til 3 or fewer bytes remain
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1 - 1]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o2, 3, %o2 ! restore count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_sm_left:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [%o0]ASI_USER, %o3 ! load one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2 ! reduce count for cc test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3,[%o1] ! store one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [%o0]ASI_USER, %o3 ! load second byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3,[%o1 + 1] ! store second byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [%o0]ASI_USER, %o3 ! load third byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3,[%o1 + 2] ! store third byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %g0, %o0 ! return 0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_sm_words:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduwa [%o0]ASI_USER, %o3 ! read word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_sm_wordx:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 8, %o2 ! update count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1] ! write word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 4, %o0 ! update SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 8, %o1 ! update DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduwa [%o0]ASI_USER, %o3 ! read word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 4, %o0 ! update SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .ci_sm_words ! loop til done
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1 - 4] ! write word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 7, %o2 ! restore count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_sm_byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_sm_half:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 2, %o2 ! reduce count by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduha [%o0]ASI_USER, %o3 ! read half word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 2, %o0 ! advance SRC by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 2, %o1 ! advance DST by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .ci_sm_half ! loop til done
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sth %o3, [%o1 - 2] ! write half word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 1, %o2 ! restore count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_sm_byte:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [%o0]ASI_USER, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %g0, %o0 ! return 0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_sm_word:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 4, %o2 ! update count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .ci_sm_wordx
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduwa [%o0]ASI_USER, %o3 ! read word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 3, %o2 ! restore count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1] ! write word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2 ! reduce count for cc test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 4, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [%o0]ASI_USER, %o3 ! load one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1 + 4] ! store one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [%o0]ASI_USER, %o3 ! load second byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1 + 5] ! store second byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [%o0]ASI_USER, %o3 ! load third byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3, [%o1 + 6] ! store third byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_sm_exit:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %g0, %o0 ! return 0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_med:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl xor %o0, %o1, %o3 ! setup alignment check
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 1, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pt %ncc, .ci_sm_movebytes ! unaligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pt %ncc, .ci_med_half ! halfword aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 7, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pt %ncc, .ci_med_word ! word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_med_long:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o0 ! check for
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_med_long1 ! word alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_med_long0:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [%o0]ASI_USER, %o3 ! load one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3,[%o1] ! store byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pt %ncc, .ci_med_long0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl dec %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_med_long1: ! word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 7, %o0 ! check for long word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_med_long2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduwa [%o0]ASI_USER, %o3 ! load word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 4, %o0 ! advance SRC by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1] ! store word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 4, %o1 ! advance DST by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, 4, %o2 ! reduce count by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl! Now long word aligned and have at least 32 bytes to move
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_med_long2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, 31, %o2 ! adjust count to allow cc zero test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_med_lmove:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldxa [%o0]ASI_USER, %o3 ! read long word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 32, %o2 ! reduce count by 32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stx %o3, [%o1] ! write long word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 8, %o0 ! advance SRC by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldxa [%o0]ASI_USER, %o3 ! repeat for a total for 4 long words
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 8, %o0 ! advance SRC by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stx %o3, [%o1 + 8]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 32, %o1 ! advance DST by 32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldxa [%o0]ASI_USER, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 8, %o0 ! advance SRC by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stx %o3, [%o1 - 16]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldxa [%o0]ASI_USER, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 8, %o0 ! advance SRC by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .ci_med_lmove ! loop til 31 or fewer bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stx %o3, [%o1 - 8]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 24, %o2 ! restore count to long word offset
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ble,pt %ncc, .ci_med_lextra ! check for more long words to move
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_med_lword:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldxa [%o0]ASI_USER, %o3 ! read long word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 8, %o2 ! reduce count by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stx %o3, [%o1] ! write long word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 8, %o0 ! advance SRC by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .ci_med_lword ! loop til 7 or fewer bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 8, %o1 ! advance DST by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_med_lextra:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 7, %o2 ! restore rest of count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_sm_exit ! if zero, then done
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_sm_byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .ci_sm_half
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop ! instruction alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! see discussion at start of file
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_med_word:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o0 ! check for
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_med_word1 ! word alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_med_word0:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [%o0]ASI_USER, %o3 ! load one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3,[%o1] ! store byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pt %ncc, .ci_med_word0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl dec %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl! Now word aligned and have at least 36 bytes to move
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_med_word1:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, 15, %o2 ! adjust count to allow cc zero test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_med_wmove:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduwa [%o0]ASI_USER, %o3 ! read word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 16, %o2 ! reduce count by 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1] ! write word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 4, %o0 ! advance SRC by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduwa [%o0]ASI_USER, %o3 ! repeat for a total for 4 words
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 4, %o0 ! advance SRC by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1 + 4]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 16, %o1 ! advance DST by 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduwa [%o0]ASI_USER, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 4, %o0 ! advance SRC by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1 - 8]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduwa [%o0]ASI_USER, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 4, %o0 ! advance SRC by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .ci_med_wmove ! loop til 15 or fewer bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1 - 4]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 12, %o2 ! restore count to word offset
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ble,pt %ncc, .ci_med_wextra ! check for more words to move
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_med_word2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduwa [%o0]ASI_USER, %o3 ! read word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 4, %o2 ! reduce count by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stw %o3, [%o1] ! write word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 4, %o0 ! advance SRC by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .ci_med_word2 ! loop til 3 or fewer bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 4, %o1 ! advance DST by 4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_med_wextra:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 3, %o2 ! restore rest of count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_sm_exit ! if zero, then done
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_sm_byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .ci_sm_half
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop ! instruction alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! see discussion at start of file
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_med_half:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 1, %o0 ! check for
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_med_half1 ! half word alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [%o0]ASI_USER, %o3 ! load one byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb %o3,[%o1] ! store byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl dec %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl! Now half word aligned and have at least 38 bytes to move
25cf1a301a396c38e8adf52c15f537b80d2483f7jl!
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_med_half1:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, 7, %o2 ! adjust count to allow cc zero test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_med_hmove:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduha [%o0]ASI_USER, %o3 ! read half word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %o2, 8, %o2 ! reduce count by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sth %o3, [%o1] ! write half word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 2, %o0 ! advance SRC by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduha [%o0]ASI_USER, %o3 ! repeat for a total for 4 halfwords
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 2, %o0 ! advance SRC by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sth %o3, [%o1 + 2]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 8, %o1 ! advance DST by 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduha [%o0]ASI_USER, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 2, %o0 ! advance SRC by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sth %o3, [%o1 - 4]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduha [%o0]ASI_USER, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 2, %o0 ! advance SRC by 2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgt,pt %ncc, .ci_med_hmove ! loop til 7 or fewer bytes left
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sth %o3, [%o1 - 2]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc %o2, 7, %o2 ! restore count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_sm_exit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .ci_sm_byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .ci_sm_half
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.sm_copyin_err:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov SM_SAVE_SRC, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov SM_SAVE_DST, %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov SM_SAVE_COUNT, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, 3f ! if not, return error
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [%o3 + CP_COPYIN], %o5 ! if handler, invoke it with
25cf1a301a396c38e8adf52c15f537b80d2483f7jl jmp %o5 ! original arguments
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl3:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %g0, -1, %o0 ! return errno value
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(copyin)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * The _more entry points are not intended to be used directly by
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * any caller from outside this file. They are provided to allow
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * profiling and dtrace of the portions of the copy code that uses
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * the floating point registers.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * This entry is particularly important as DTRACE (at least as of
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * 4/2004) does not support leaf functions.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(copyin_more)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyin_more:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl prefetch [%o0], #n_reads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl set .copyin_err, REAL_LOFAULT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Copy ins that reach here are larger than VIS_COPY_THRESHOLD bytes
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.do_copyin:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl set copyio_fault, %l7 ! .copyio_fault is lofault val
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_LOFAULT], %l6 ! save existing handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %i0, SAVE_SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %i1, SAVE_DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %i2, SAVE_COUNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl FP_NOMIGRATE(6, 7)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl rd %fprs, %o2 ! check for unused fp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst FPRS_FEF, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,a,pt %icc, .do_blockcopyin
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %g0, FPRS_FEF, %fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl BST_FPQ2Q4_TOSTACK(%o2)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.do_blockcopyin:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl rd %gsr, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %l6, FPUSED_FLAG, %l6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc DST, VIS_BLOCKSIZE - 1, TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov ASI_USER, %asi
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, 2f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl neg TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add TMP, VIS_BLOCKSIZE, TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! TMP = bytes required to align DST on FP_BLOCK boundary
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! Using SRC as a tmp here
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp TMP, 3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, 1f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub CNT,TMP,CNT ! adjust main count
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub TMP, 3, TMP ! adjust for end of loop test
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.ci_blkalign:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [REALSRC]%asi, SRC ! move 4 bytes per loop iteration
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb SRC, [DST]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc TMP, 4, TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [REALSRC + 1]%asi, SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add REALSRC, 4, REALSRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb SRC, [DST + 1]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [REALSRC - 2]%asi, SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add DST, 4, DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb SRC, [DST - 2]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [REALSRC - 1]%asi, SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu,pt %ncc, .ci_blkalign
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb SRC, [DST - 1]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl addcc TMP, 3, TMP ! restore count adjustment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, 2f ! no bytes left?
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl1: lduba [REALSRC]%asi, SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc REALSRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu %ncc, 1b
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb SRC, [DST - 1]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #StoreLoad
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andn REALSRC, 0x7, SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! SRC - 8-byte aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! DST - 64-byte aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC]%asi, %f16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl prefetcha [SRC + (1 * VIS_BLOCKSIZE)]%asi, #n_reads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl alignaddr REALSRC, %g0, %g0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x08]%asi, %f18
25cf1a301a396c38e8adf52c15f537b80d2483f7jl prefetcha [SRC + (2 * VIS_BLOCKSIZE)]%asi, #n_reads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f16, %f18, %f48
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x10]%asi, %f20
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetcha [SRC + (3 * VIS_BLOCKSIZE)]%asi, #n_reads
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f18, %f20, %f50
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x18]%asi, %f22
25cf1a301a396c38e8adf52c15f537b80d2483f7jl prefetcha [SRC + (4 * VIS_BLOCKSIZE)]%asi, #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f20, %f22, %f52
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x20]%asi, %f24
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetcha [SRC + (8 * VIS_BLOCKSIZE)]%asi, #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f22, %f24, %f54
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x28]%asi, %f26
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetcha [SRC + (12 * VIS_BLOCKSIZE)]%asi, #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f24, %f26, %f56
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x30]%asi, %f28
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetcha [SRC + (16 * VIS_BLOCKSIZE)]%asi, #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f26, %f28, %f58
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x38]%asi, %f30
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + VIS_BLOCKSIZE]%asi, %f16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub CNT, VIS_BLOCKSIZE, CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add SRC, VIS_BLOCKSIZE, SRC
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetcha [SRC + (19 * VIS_BLOCKSIZE)]%asi, #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add REALSRC, VIS_BLOCKSIZE, REALSRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, 1f
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetcha [SRC + (23 * VIS_BLOCKSIZE)]%asi, #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 32
25cf1a301a396c38e8adf52c15f537b80d2483f7jl1:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x08]%asi, %f18
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f28, %f30, %f60
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x10]%asi, %f20
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f30, %f16, %f62
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f48, [DST]ASI_BLK_P
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x18]%asi, %f22
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f16, %f18, %f48
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x20]%asi, %f24
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f18, %f20, %f50
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x28]%asi, %f26
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f20, %f22, %f52
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x30]%asi, %f28
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f22, %f24, %f54
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm sub CNT, VIS_BLOCKSIZE, CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x38]%asi, %f30
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f24, %f26, %f56
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm add DST, VIS_BLOCKSIZE, DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + VIS_BLOCKSIZE]%asi, %f16
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f26, %f28, %f58
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add REALSRC, VIS_BLOCKSIZE, REALSRC
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetcha [SRC + (3 * VIS_BLOCKSIZE)]%asi, #n_reads
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm add SRC, VIS_BLOCKSIZE, SRC
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetcha [SRC + ((OLYMPUS_C_PREFETCH) * VIS_BLOCKSIZE)]%asi, #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp CNT, VIS_BLOCKSIZE + 8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu,pt %ncc, 1b
c8a722abb8fd974fb16523acbd90ea75d5dcbeb2pm prefetcha [SRC + ((OLYMPUS_C_2ND_PREFETCH) * VIS_BLOCKSIZE)]%asi, #one_read
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! only if REALSRC & 0x7 is 0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp CNT, VIS_BLOCKSIZE
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bne %ncc, 3f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc REALSRC, 0x7, %g0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, 2f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl3:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f28, %f30, %f60
25cf1a301a396c38e8adf52c15f537b80d2483f7jl faligndata %f30, %f16, %f62
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f48, [DST]ASI_BLK_P
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add DST, VIS_BLOCKSIZE, DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, 3f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x08]%asi, %f18
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f28, %f60
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x10]%asi, %f20
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f30, %f62
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f48, [DST]ASI_BLK_P
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x18]%asi, %f22
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f16, %f48
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x20]%asi, %f24
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f18, %f50
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x28]%asi, %f26
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f20, %f52
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x30]%asi, %f28
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f22, %f54
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [SRC + 0x38]%asi, %f30
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f24, %f56
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub CNT, VIS_BLOCKSIZE, CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add DST, VIS_BLOCKSIZE, DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add SRC, VIS_BLOCKSIZE, SRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add REALSRC, VIS_BLOCKSIZE, REALSRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f26, %f58
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f28, %f60
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fsrc1 %f30, %f62
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %f48, [DST]ASI_BLK_P
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add DST, VIS_BLOCKSIZE, DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,a,pt %ncc, 4f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl3: tst CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,a %ncc, 4f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl5: lduba [REALSRC]ASI_USER, TMP
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc REALSRC
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inc DST
25cf1a301a396c38e8adf52c15f537b80d2483f7jl deccc CNT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgu %ncc, 5b
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stb TMP, [DST - 1]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyin_exit:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %o2, 0, %gsr
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst FPRS_FEF, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %icc, 4f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl BLD_FPQ2Q4_FROMSTACK(%o2)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, 1f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %o3, 0, %fprs ! restore fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl FZEROQ2Q4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %o3, 0, %fprs ! restore fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl1:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andn %l6, FPUSED_FLAG, %l6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl FP_ALLOWMIGRATE(5, 6)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ret
25cf1a301a396c38e8adf52c15f537b80d2483f7jl restore %g0, 0, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * We got here because of a fault during copyin
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh).
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyin_err:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, 2f ! if not, return error
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [%o4 + CP_COPYIN], %g2 ! if handler, invoke it with
25cf1a301a396c38e8adf52c15f537b80d2483f7jl jmp %g2 ! original arguments
25cf1a301a396c38e8adf52c15f537b80d2483f7jl restore %g0, 0, %g0 ! dispose of copy window
25cf1a301a396c38e8adf52c15f537b80d2483f7jl2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ret
25cf1a301a396c38e8adf52c15f537b80d2483f7jl restore %g0, -1, %o0 ! return error value
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(copyin_more)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#ifdef lint
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*ARGSUSED*/
25cf1a301a396c38e8adf52c15f537b80d2483f7jlint
25cf1a301a396c38e8adf52c15f537b80d2483f7jlxcopyin(const void *uaddr, void *kaddr, size_t count)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl{ return (0); }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#else /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(xcopyin)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .xcopyin_small ! go to larger cases
25cf1a301a396c38e8adf52c15f537b80d2483f7jl xor %o0, %o1, %o3 ! are src, dst alignable?
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 7, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .xcopyin_8 ! check for longword alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 1, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .xcopyin_2 ! check for half-word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_1)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .xcopyin_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .xcopyin_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .xcopyin_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.xcopyin_2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .xcopyin_4 ! check for word alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_2)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .xcopyin_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .xcopyin_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .xcopyin_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.xcopyin_4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! already checked longword, must be word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_4)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .xcopyin_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .xcopyin_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .xcopyin_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.xcopyin_8:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_8)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .xcopyin_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .xcopyin_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .xcopyin_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.xcopyin_small:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(.sm_xcopyin_err), %o5 ! .sm_xcopyin_err is lofault value
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %o5, %lo(.sm_xcopyin_err), %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_LOFAULT], %o4 ! set/save t_lofaul
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .sm_do_copyin ! common code
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o5, [THREAD_REG + T_LOFAULT]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.xcopyin_more:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(.xcopyin_err), REAL_LOFAULT ! .xcopyin_err is lofault value
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .do_copyin
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * We got here because of fault during xcopyin
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Errno value is in ERRNO
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.xcopyin_err:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, 2f ! if not, return error
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [%o4 + CP_XCOPYIN], %g2 ! if handler, invoke it with
25cf1a301a396c38e8adf52c15f537b80d2483f7jl jmp %g2 ! original arguments
25cf1a301a396c38e8adf52c15f537b80d2483f7jl restore %g0, 0, %g0 ! dispose of copy window
25cf1a301a396c38e8adf52c15f537b80d2483f7jl2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ret
25cf1a301a396c38e8adf52c15f537b80d2483f7jl restore ERRNO, 0, %o0 ! return errno value
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.sm_xcopyin_err:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov SM_SAVE_SRC, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov SM_SAVE_DST, %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov SM_SAVE_COUNT, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, 3f ! if not, return error
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [%o3 + CP_XCOPYIN], %o5 ! if handler, invoke it with
25cf1a301a396c38e8adf52c15f537b80d2483f7jl jmp %o5 ! original arguments
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl3:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %g1, 0, %o0 ! return errno value
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(xcopyin)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#ifdef lint
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*ARGSUSED*/
25cf1a301a396c38e8adf52c15f537b80d2483f7jlint
25cf1a301a396c38e8adf52c15f537b80d2483f7jlxcopyin_little(const void *uaddr, void *kaddr, size_t count)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl{ return (0); }
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#else /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(xcopyin_little)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(.xcopyio_err), %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %o5, %lo(.xcopyio_err), %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_LOFAULT], %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o5, [THREAD_REG + T_LOFAULT]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %o4, %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl subcc %g0, %o2, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, %o2, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %ncc, 2f ! check for zero bytes
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o2, 1, %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, %o4, %o0 ! start w/last byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, %o2, %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [%o0 + %o3]ASI_AIUSL, %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl1: stb %o4, [%o1 + %o3]
25cf1a301a396c38e8adf52c15f537b80d2483f7jl inccc %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %o0, 2, %o0 ! get next byte
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bcc,a,pt %ncc, 1b
25cf1a301a396c38e8adf52c15f537b80d2483f7jl lduba [%o0 + %o3]ASI_AIUSL, %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %g0, %o0 ! return (0)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.xcopyio_err:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %g1, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(xcopyin_little)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Copy a block of storage - must not overlap (from + len <= to).
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * No fault handler installed (to be called under on_fault())
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#if defined(lint)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/* ARGSUSED */
25cf1a301a396c38e8adf52c15f537b80d2483f7jlvoid
25cf1a301a396c38e8adf52c15f537b80d2483f7jlcopyin_noerr(const void *ufrom, void *kto, size_t count)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl{}
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#else /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(copyin_noerr)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyin_ne_small ! go to larger cases
25cf1a301a396c38e8adf52c15f537b80d2483f7jl xor %o0, %o1, %o3 ! are src, dst alignable?
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 7, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .copyin_ne_8 ! check for longword alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 1, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .copyin_ne_2 ! check for half-word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_1)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyin_ne_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyin_ne_2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .copyin_ne_4 ! check for word alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_2)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyin_ne_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyin_ne_4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! already checked longword, must be word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_4)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyin_ne_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyin_ne_8:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_8)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyin_ne_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyin_ne_small:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_LOFAULT], %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %ncc, .sm_do_copyin
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(.sm_copyio_noerr), %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %o5, %lo(.sm_copyio_noerr), %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .sm_do_copyin
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o5, [THREAD_REG + T_LOFAULT] ! set/save t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyin_noerr_more:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(.copyio_noerr), REAL_LOFAULT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .do_copyin
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyio_noerr:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl jmp %l6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl restore %g0,0,%g0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.sm_copyio_noerr:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o4, [THREAD_REG + T_LOFAULT] ! restore t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl jmp %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(copyin_noerr)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Copy a block of storage - must not overlap (from + len <= to).
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * No fault handler installed (to be called under on_fault())
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#if defined(lint)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/* ARGSUSED */
25cf1a301a396c38e8adf52c15f537b80d2483f7jlvoid
25cf1a301a396c38e8adf52c15f537b80d2483f7jlcopyout_noerr(const void *kfrom, void *uto, size_t count)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl{}
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#else /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(copyout_noerr)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyout_ne_small ! go to larger cases
25cf1a301a396c38e8adf52c15f537b80d2483f7jl xor %o0, %o1, %o3 ! are src, dst alignable?
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 7, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .copyout_ne_8 ! check for longword alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 1, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .copyout_ne_2 ! check for half-word
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_1)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyout_ne_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyout_ne_2:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst 3, %o3 !
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %ncc, .copyout_ne_4 ! check for word alignment
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_2)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyout_ne_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyout_ne_4:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! already checked longword, must be word aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_4)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyout_ne_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyout_ne_8:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ld [%o3 + %lo(hw_copy_limit_8)], %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %o2, %o3 ! if length <= limit
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bleu,pt %ncc, .copyout_ne_small ! go to small copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyout_ne_small:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldn [THREAD_REG + T_LOFAULT], %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl tst %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %ncc, .sm_do_copyout
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(.sm_copyio_noerr), %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or %o5, %lo(.sm_copyio_noerr), %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync ! sync error barrier
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .sm_do_copyout
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stn %o5, [THREAD_REG + T_LOFAULT] ! set/save t_lofault
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.copyout_noerr_more:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sethi %hi(.copyio_noerr), REAL_LOFAULT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .do_copyout
25cf1a301a396c38e8adf52c15f537b80d2483f7jl or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(copyout_noerr)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * hwblkclr - clears block-aligned, block-multiple-sized regions that are
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * longer than 256 bytes in length using spitfire's block stores. If
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * the criteria for using this routine are not met then it calls bzero
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * and returns 1. Otherwise 0 is returned indicating success.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Caller is responsible for ensuring use_hw_bzero is true and that
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * kpreempt_disable() has been called.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#ifdef lint
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*ARGSUSED*/
25cf1a301a396c38e8adf52c15f537b80d2483f7jlint
25cf1a301a396c38e8adf52c15f537b80d2483f7jlhwblkclr(void *addr, size_t len)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl{
25cf1a301a396c38e8adf52c15f537b80d2483f7jl return(0);
25cf1a301a396c38e8adf52c15f537b80d2483f7jl}
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#else /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! %i0 - start address
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! %i1 - length of region (multiple of 64)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! %l0 - saved fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! %l1 - pointer to saved %d0 block
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! %l2 - saved curthread->t_lwp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY(hwblkclr)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! get another window w/space for one aligned block of saved fpregs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl save %sp, -SA(MINFRAME + 2*VIS_BLOCKSIZE), %sp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! Must be block-aligned
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc %i0, (VIS_BLOCKSIZE-1), %g0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bnz,pn %ncc, 1f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! ... and must be 256 bytes or more
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %i1, 256
25cf1a301a396c38e8adf52c15f537b80d2483f7jl blu,pn %ncc, 1f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! ... and length must be a multiple of VIS_BLOCKSIZE
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andcc %i1, (VIS_BLOCKSIZE-1), %g0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pn %ncc, 2f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl1: ! punt, call bzero but notify the caller that bzero was used
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %i0, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl call bzero
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov %i1, %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ret
25cf1a301a396c38e8adf52c15f537b80d2483f7jl restore %g0, 1, %o0 ! return (1) - did not use block operations
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl2: rd %fprs, %l0 ! check for unused fp
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst FPRS_FEF, %l0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,pt %icc, 1f
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! save in-use fpregs on stack
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %fp, STACK_BIAS - 65, %l1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl and %l1, -VIS_BLOCKSIZE, %l1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %d0, [%l1]ASI_BLK_P
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl1: membar #StoreStore|#StoreLoad|#LoadStore
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %g0, FPRS_FEF, %fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %g0, ASI_BLK_P, %asi
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! Clear block
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fzero %d0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fzero %d2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fzero %d4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fzero %d6
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fzero %d8
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fzero %d10
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fzero %d12
25cf1a301a396c38e8adf52c15f537b80d2483f7jl fzero %d14
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl mov 256, %i3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ba,pt %ncc, .pz_doblock
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.pz_blkstart:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! stda %d0, [%i0 + 192]%asi ! in dly slot of branch that got us here
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %d0, [%i0 + 128]%asi
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %d0, [%i0 + 64]%asi
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %d0, [%i0]%asi
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.pz_zinst:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %i0, %i3, %i0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %i1, %i3, %i1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.pz_doblock:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %i1, 256
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bgeu,a %ncc, .pz_blkstart
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stda %d0, [%i0 + 192]%asi
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl cmp %i1, 64
25cf1a301a396c38e8adf52c15f537b80d2483f7jl blu %ncc, .pz_finish
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andn %i1, (64-1), %i3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl srl %i3, 4, %i2 ! using blocks, 1 instr / 16 words
25cf1a301a396c38e8adf52c15f537b80d2483f7jl set .pz_zinst, %i4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl sub %i4, %i2, %i4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl jmp %i4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl nop
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.pz_finish:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl btst FPRS_FEF, %l0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl bz,a .pz_finished
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %l0, 0, %fprs ! restore fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ! restore fpregs from stack
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldda [%l1]ASI_BLK_P, %d0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wr %l0, 0, %fprs ! restore fprs
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl.pz_finished:
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ret
25cf1a301a396c38e8adf52c15f537b80d2483f7jl restore %g0, 0, %o0 ! return (bzero or not)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(hwblkclr)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#ifdef lint
25cf1a301a396c38e8adf52c15f537b80d2483f7jl/*ARGSUSED*/
25cf1a301a396c38e8adf52c15f537b80d2483f7jlvoid
25cf1a301a396c38e8adf52c15f537b80d2483f7jlhw_pa_bcopy32(uint64_t src, uint64_t dst)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl{}
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#else /*!lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl /*
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * Copy 32 bytes of data from src (%o0) to dst (%o1)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl * using physical addresses.
25cf1a301a396c38e8adf52c15f537b80d2483f7jl */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ENTRY_NP(hw_pa_bcopy32)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl rdpr %pstate, %g1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl andn %g1, PSTATE_IE, %g2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wrpr %g0, %g2, %pstate
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl rdpr %pstate, %g0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldxa [%o0]ASI_MEM, %o2
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 8, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldxa [%o0]ASI_MEM, %o3
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 8, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldxa [%o0]ASI_MEM, %o4
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o0, 8, %o0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl ldxa [%o0]ASI_MEM, %o5
25cf1a301a396c38e8adf52c15f537b80d2483f7jl membar #Sync
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stxa %o2, [%o1]ASI_MEM
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 8, %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stxa %o3, [%o1]ASI_MEM
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 8, %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stxa %o4, [%o1]ASI_MEM
25cf1a301a396c38e8adf52c15f537b80d2483f7jl add %o1, 8, %o1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl stxa %o5, [%o1]ASI_MEM
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl retl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl wrpr %g0, %g1, %pstate
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl SET_SIZE(hw_pa_bcopy32)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif /* lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#if defined(lint)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jlint use_hw_bcopy = 1;
25cf1a301a396c38e8adf52c15f537b80d2483f7jlint use_hw_bzero = 1;
25cf1a301a396c38e8adf52c15f537b80d2483f7jluint_t hw_copy_limit_1 = 0;
25cf1a301a396c38e8adf52c15f537b80d2483f7jluint_t hw_copy_limit_2 = 0;
25cf1a301a396c38e8adf52c15f537b80d2483f7jluint_t hw_copy_limit_4 = 0;
25cf1a301a396c38e8adf52c15f537b80d2483f7jluint_t hw_copy_limit_8 = 0;
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#else /* !lint */
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl DGDEF(use_hw_bcopy)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .word 1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl DGDEF(use_hw_bzero)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .word 1
25cf1a301a396c38e8adf52c15f537b80d2483f7jl DGDEF(hw_copy_limit_1)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .word 0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl DGDEF(hw_copy_limit_2)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .word 0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl DGDEF(hw_copy_limit_4)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .word 0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl DGDEF(hw_copy_limit_8)
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .word 0
25cf1a301a396c38e8adf52c15f537b80d2483f7jl
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .align 64
25cf1a301a396c38e8adf52c15f537b80d2483f7jl .section ".text"
25cf1a301a396c38e8adf52c15f537b80d2483f7jl#endif /* !lint */