/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "assembler_sparc.inline.hpp"
#include "interpreter/interpreter.hpp"
#include "nativeInst_sparc.hpp"
#include "oops/instanceOop.hpp"
#include "oops/methodOop.hpp"
#include "oops/objArrayKlass.hpp"
#include "oops/oop.inline.hpp"
#include "prims/methodHandles.hpp"
#include "runtime/frame.inline.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubCodeGenerator.hpp"
#include "runtime/stubRoutines.hpp"
#ifdef TARGET_OS_FAMILY_linux
# include "thread_linux.inline.hpp"
#endif
#ifdef TARGET_OS_FAMILY_solaris
# include "thread_solaris.inline.hpp"
#endif
#ifdef COMPILER2
#include "opto/runtime.hpp"
#endif
// Declaration and definition of StubGenerator (no .hpp file).
// For a more detailed description of the stub routine structure
// see the comment in stubRoutines.hpp.
#ifdef PRODUCT
#else
#endif
// Note: The register L7 is used as L7_thread_cache, and may not be used
// any other way within this module.
// -------------------------------------------------------------------------------------------------------------------------
// Stub Code definitions
// pc is the instruction which we must emulate
// doing a no-op is fine: return garbage from the load
// request an async exception
// return address of next instruction to execute
return npc;
}
private:
#ifdef PRODUCT
#define inc_counter_np(a,b,c) (0)
#else
#endif
//----------------------------------------------------------------------------------------------------
// Call stubs are used to call Java from C
// Incoming arguments:
//
// o0 : call wrapper address
// o1 : result (address)
// o2 : result type
// o3 : method
// o4 : (interpreter) entry point
// o5 : parameters (address)
// [sp + 0x5c]: parameter size (in words)
// [sp + 0x60]: thread
//
// +---------------+ <--- sp + 0
// | |
// . reg save area .
// | |
// +---------------+ <--- sp + 0x40
// | |
// . extra 7 slots .
// | |
// +---------------+ <--- sp + 0x5c
// | param. size |
// +---------------+ <--- sp + 0x60
// | thread |
// +---------------+
// | |
// note: if the link argument position changes, adjust
// the code in frame::entry_frame_call_wrapper()
// setup thread register
#ifdef ASSERT
// make sure we have no pending exceptions
{ const Register t = G3_scratch;
Label L;
}
#endif
// create activation frame & allocate space for parameters
{ const Register t = G3_scratch;
}
// +---------------+ <--- sp + 0
// | |
// . reg save area .
// | |
// +---------------+ <--- sp + 0x40
// | |
// . extra 7 slots .
// | |
// +---------------+ <--- sp + 0x5c
// | empty slot | (only if parameter size is even)
// +---------------+
// | |
// . parameters .
// | |
// +---------------+ <--- fp + 0
// | |
// . reg save area .
// | |
// +---------------+ <--- fp + 0x40
// | |
// . extra 7 slots .
// | |
// +---------------+ <--- fp + 0x5c
// | param. size |
// +---------------+ <--- fp + 0x60
// | thread |
// +---------------+
// | |
// pass parameters if any
BLOCK_COMMENT("pass parameters if any");
// test if any parameters & setup of Lentry_args
// copy parameters if any
// Store parameter value
// done
}
// setup parameters, method & call Java function
#ifdef ASSERT
// layout_activation_impl checks it's notion of saved SP against
// this register, so if this changes update it as well.
#endif
// setup parameters
const Register t = G3_scratch;
#ifdef _LP64
#endif
// do the call
//
// the following register must be setup:
//
// G2_thread
// G5_method
// Gargs
BLOCK_COMMENT("call Java function");
BLOCK_COMMENT("call_stub_return_address:");
// The callee, if it wasn't interpreted, can return with SP changed so
// we can no longer assert of change of SP.
// store result depending on type
// (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE
// is treated as T_INT)
// store int result
#ifdef _LP64
#else
#if defined(COMPILER2)
// All return values are where we want them, except for Longs. C2 returns
// build we simply always use G1.
// do this here. Unfortunately if we did a rethrow we'd see an machepilog node
#else
#endif /* COMPILER2 */
#endif /* _LP64 */
}
return start;
}
//----------------------------------------------------------------------------------------------------
// Return point for a Java call if there's an exception thrown in Java code.
// The exception is caught and transformed into a pending exception stored in
// JavaThread that can be tested from within the VM.
//
// Oexception: exception oop
// verify that thread corresponds
__ verify_thread();
// set pending exception
// complete return to VM
return start;
}
//----------------------------------------------------------------------------------------------------
// Continuation point for runtime calls returning with a pending exception
// The pending exception check happened in the runtime or native call stub
// The pending exception in Thread is converted into a Java-level exception
//
// Contract with Java-level exception handler: O0 = exception
// O1 = throwing pc
// Upon entry, O7 has the return address returning into Java
// (interpreted or compiled) code; i.e. the return address
// becomes the throwing pc.
#ifdef ASSERT
// make sure that this code is only executed if there is a pending exception
{ Label L;
}
#endif
// compute exception handler into handler_reg
__ get_thread();
BLOCK_COMMENT("call exception_handler_for_return_address");
__ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Lscratch);
#ifdef ASSERT
// make sure exception is set
{ Label L;
}
#endif
// jump to exception handler
// clear pending exception
return start;
}
//------------------------------------------------------------------------------------------------------------------------
// Continuation point for throwing of implicit exceptions that are not handled in
// the current activation. Fabricates an exception oop and initiates normal
// exception dispatching in this frame. Only callee-saved registers are preserved
// (through the normal register window / RegisterMap handling).
// If the compiler needs all registers to be preserved between the fault
// point and the exception handler then it must assume responsibility for that in
// AbstractCompiler::continuation_for_implicit_null_exception or
// continuation_for_implicit_division_by_zero_exception. All other implicit
// exceptions (e.g., NullPointerException or AbstractMethodError on entry) are
// either at call sites or otherwise assume that stack unwinding will be initiated,
// so caller saved registers were assumed volatile in the compiler.
// Note that we generate only this stub into a RuntimeStub, because it needs to be
// properly traversed and ignored during GC, so we change the meaning of the "__"
// macro within this method.
#ifdef ASSERT
#else
#endif /* ASSERT */
__ verify_thread();
// This is an inlined and slightly modified version of call_VM
// which has the ability to fetch the return PC out of thread-local storage
// Note that we always push a frame because on the SPARC
// architecture, for all of our implicit exception kinds at call
// sites, the implicit exception is taken before the callee frame
// is pushed.
__ save_frame(0);
// Note that we always have a runtime stub frame on the top of stack by this point
// 64-bit last_java_sp is biased!
}
}
// do the call
BLOCK_COMMENT("call runtime_entry");
if (!VerifyThread)
else
// check for pending exceptions. use Gtemp as scratch register.
#ifdef ASSERT
Label L;
#endif // ASSERT
BLOCK_COMMENT("call forward_exception_entry");
// we use O7 linkage so that forward_exception_entry has the issuing PC
RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false);
return stub->entry_point();
}
// Generate a routine that sets all the registers so we
// can tell if the stop routine prints them correctly.
int i;
__ save_frame(0);
// put addr in L0, then load through L0 to F0
// use add to put 2..18 in F2..F18
for ( i = 2; i <= 18; ++i ) {
}
// Now put double 2 in F16, double 18 in F18
// use add to put 20..32 in F20..F32
for (i = 20; i < 32; i += 2) {
}
// put 0..7 in i's, 8..15 in l's, 16..23 in o's, 24..31 in g's
for ( i = 0; i < 8; ++i ) {
if (i < 6) {
}
}
return start;
}
return start;
}
__ flush_windows();
// The returned value must be a stack pointer whose register save area
// is flushed, and will stay flushed while the caller executes.
return start;
}
// Helper functions for v8 atomic operations.
//
if (mark_oop_reg == noreg) {
} else {
}
}
void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
// Initialize yield counter
__ cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dontyield);
// This code can only be called from inside the VM, this
// stub is only invoked from Atomic::add(). We do not
// want to use call_VM, because _last_java_sp and such
// must already be set.
//
// Save the regs and make space for a C call
BLOCK_COMMENT("call os::naked_sleep");
// reset the counter
// try to get lock
// did we get the lock?
// yes, got lock. do the operation here.
}
void generate_v8_lock_epilogue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
}
// Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest).
//
// Arguments :
//
// exchange_value: O0
// dest: O1
//
// Results:
//
// O0: the value previously stored in dest
//
if (UseCASForSwap) {
// Use CAS instead of swap, just in case the MP hardware
// prefers to work with just one kind of synch. instruction.
// try to replace O2 with O3
} else {
if (VM_Version::v9_instructions_work()) {
} else {
// got the lock, do the swap
}
}
return start;
}
// Support for jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value)
//
// Arguments :
//
// exchange_value: O0
// dest: O1
// compare_value: O2
//
// Results:
//
// O0: the value previously stored in dest
//
// Overwrites (v8): O3,O4,O5
//
// cmpxchg(dest, compare_value, exchange_value)
return start;
}
// Support for jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong *dest, jlong compare_value)
//
// Arguments :
//
// exchange_value: O1:O0
// dest: O2
// compare_value: O4:O3
//
// Results:
//
// O1:O0: the value previously stored in dest
//
// This only works on V9, on V8 we don't generate any
// code and just return NULL.
//
// Overwrites: G1,G2,G3
//
if (!VM_Version::supports_cx8())
return NULL;;
return start;
}
// Support for jint Atomic::add(jint add_value, volatile jint* dest).
//
// Arguments :
//
// add_value: O0 (e.g., +1 or -1)
// dest: O1
//
// Results:
//
// O0: the new value stored in dest
//
// Overwrites (v9): O3
// Overwrites (v8): O3,O4,O5
//
if (VM_Version::v9_instructions_work()) {
} else {
// got lock, do the increment
// %%% only for RMO and PSO
}
return start;
}
//------------------------------------------------------------------------------------------------------------------------
// The following routine generates a subroutine to throw an asynchronous
// UnknownError when an unsafe access gets a fault that could not be
//
// Arguments :
//
// trapping PC: O7
//
// Results:
// posts an asynchronous exception, skips the trapping instruction
//
int i;
__ save_frame(0);
}
BLOCK_COMMENT("call handle_unsafe_access");
}
__ verify_thread();
return start;
}
// Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super );
// Arguments :
//
// ret : O0, returned
// sub : O1, argument, not changed
// super: O2, argument, not changed
// raddr: O7, blown by call
// Do not use a 'save' because it blows the 64-bit O registers.
#else
__ save_frame(0);
#endif
// Match falls through here.
#else
#endif
#else
#endif
return start;
}
// Called from MacroAssembler::verify_oop
//
return start;
}
//
// Verify that a register contains clean 32-bits positive value
// (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax).
//
// Input:
// Rint - 32-bits value
// Rtmp - scratch
//
#endif
}
//
// Generate overlap test for array copy stubs
//
// Input:
// O0 - array1
// O1 - array2
// O2 - element count
//
// Kills temps: O3, O4
//
}
}
else
else
}
//
// Generate pre-write barrier for array.
//
// Input:
// addr - register containing starting address
// count - register containing element count
// tmp - scratch register
//
// The input registers are overwritten.
//
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
// With G1, don't generate the call if we statically know that the target in uninitialized
if (!dest_uninitialized) {
__ save_frame(0);
// Save the necessary global regs... will be used after.
}
}
// Get the count into O1
}
}
}
break;
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
case BarrierSet::ModRef:
break;
default:
}
}
//
// Generate post-write barrier for array.
//
// Input:
// addr - register containing starting address
// count - register containing element count
// tmp - scratch register
//
// The input registers are overwritten.
//
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
{
// Get some new fresh output registers.
__ save_frame(0);
}
break;
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
{
// Use two shifts to clear out those low order two bits! (Cannot opt. into 1.)
}
break;
case BarrierSet::ModRef:
break;
default:
}
}
//
// Generate main code for disjoint arraycopy
//
typedef void (StubGenerator::*CopyLoopFunc)(Register from, Register to, Register count, int count_dec,
prefetch_dist = (prefetch_dist + (iter_size-1)) & (-iter_size); // round up to one iteration copy size
if (UseBlockCopy) {
// 64 bytes tail + bytes copied in one loop iteration
// Use BIS copy only for big arrays since it requires membar.
// This code is for disjoint source and destination:
// to <= from || to >= from+count
// but BIS will stomp over 'from' if (to > from-tail_size && to <= from)
__ cmp_and_br_short(O4, (tail_size>>4), Assembler::lessEqualUnsigned, Assembler::pn, L_skip_block_copy);
// BIS should not be used to copy tail (64 bytes+iter_size)
// to avoid zeroing of following values.
if (prefetch_count > 0) { // rounded up to one iteration count
// Do prefetching only if copy size is bigger
// than prefetch distance.
} // prefetch_count > 0
// BIS needs membar.
// Copy tail
} // UseBlockCopy
if (prefetch_count > 0) { // rounded up to one iteration count
// Do prefetching only if copy size is bigger
// than prefetch distance.
} // prefetch_count > 0
}
//
// Helper methods for copy_16_bytes_forward_with_shift()
//
if (use_prefetch) {
if (ArraycopySrcPrefetchDistance > 0) {
}
if (ArraycopyDstPrefetchDistance > 0) {
}
}
if (use_bis) {
} else {
}
}
// Copy big chunks forward with shift
//
// Inputs:
// from - source arrays
// to - destination array aligned to 8-bytes
// count - elements count to copy >= the count equivalent to 16 bytes
// count_dec - elements count's decrement equivalent to 16 bytes
// L_copy_bytes - copy exit label
//
// if both arrays have the same alignment mod 8, do 8 bytes aligned copy
//
// Load 2 aligned 8-bytes chunks and use one from previous iteration
// to form 2 aligned 8-bytes chunks to store.
//
// copy 8 bytes, part of them already loaded in O3
}
// Copy big chunks backward with shift
//
// Inputs:
// end_from - source arrays end address
// end_to - destination array end address aligned to 8-bytes
// count - elements count to copy >= the count equivalent to 16 bytes
// count_dec - elements count's decrement equivalent to 16 bytes
// L_aligned_copy - aligned copy exit label
// L_copy_bytes - copy exit label
//
// if both arrays have the same alignment mod 8, do 8 bytes aligned copy
//
// Load 2 aligned 8-bytes chunks and use one from previous iteration
// to form 2 aligned 8-bytes chunks to store.
//
// copy 8 bytes, part of them already loaded in O3
}
//
// Generate stub for disjoint byte copy. If "aligned" is true, the
// "from" and "to" addresses are assumed to be heapword aligned.
//
// Arguments for generated stub:
// from: O0
// to: O1
// count: O2 treated as signed
//
// O3, O4, G3, G4 are used as temp registers
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
// for short arrays, just do single element copy
if (aligned) {
// 'aligned' == true when it is known statically during compilation
// of this arraycopy call site that both 'from' and 'to' addresses
// are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
//
// Aligned arrays have 4 bytes alignment in 32-bits VM
// and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
//
#ifndef _LP64
// copy a 4-bytes word if necessary to align 'to' to 8 bytes
#endif
} else {
// copy bytes to align 'to' on 8 byte boundary
}
#ifdef _LP64
if (!aligned)
#endif
{
// Copy with shift 16 bytes per iteration if arrays do not have
// the same alignment mod 8, otherwise fall through to the next
// code for aligned copy.
// The compare above (count >= 23) guarantes 'count' >= 16 bytes.
// Also jump over aligned copy after the copy with shift completed.
}
// Both array are 8 bytes aligned, copy 16 bytes at a time
// copy tailing bytes
// O3, O4 are used as temp registers
return start;
}
//
// Generate stub for conjoint byte copy. If "aligned" is true, the
// "from" and "to" addresses are assumed to be heapword aligned.
//
// Arguments for generated stub:
// from: O0
// to: O1
// count: O2 treated as signed
//
// Do reverse copy.
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
// for short arrays, just do single element copy
{
// Align end of arrays since they could be not aligned even
// when arrays itself are aligned.
// copy bytes to align 'end_to' on 8 byte boundary
}
#ifdef _LP64
if (aligned) {
// Both arrays are aligned to 8-bytes in 64-bits VM.
// The 'count' is decremented in copy_16_bytes_backward_with_shift()
// in unaligned case.
} else
#endif
{
// Copy with shift 16 bytes per iteration if arrays do not have
// the same alignment mod 8, otherwise jump to the next
// code for aligned copy (and substracting 16 from 'count' before jump).
// The compare above (count >= 11) guarantes 'count' >= 16 bytes.
// Also jump over aligned copy after the copy with shift completed.
}
// copy 4 elements (16 bytes) at a time
// copy 1 element (2 bytes) at a time
// O3, O4 are used as temp registers
return start;
}
//
// Generate stub for disjoint short copy. If "aligned" is true, the
// "from" and "to" addresses are assumed to be heapword aligned.
//
// Arguments for generated stub:
// from: O0
// to: O1
// count: O2 treated as signed
//
// O3, O4, G3, G4 are used as temp registers
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
// for short arrays, just do single element copy
if (aligned) {
// 'aligned' == true when it is known statically during compilation
// of this arraycopy call site that both 'from' and 'to' addresses
// are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
//
// Aligned arrays have 4 bytes alignment in 32-bits VM
// and 8 bytes - in 64-bits VM.
//
#ifndef _LP64
// copy a 2-elements word if necessary to align 'to' to 8 bytes
#endif
} else {
// copy 1 element if necessary to align 'to' on an 4 bytes
// copy 2 elements to align 'to' on an 8 byte boundary
}
#ifdef _LP64
if (!aligned)
#endif
{
// Copy with shift 16 bytes per iteration if arrays do not have
// the same alignment mod 8, otherwise fall through to the next
// code for aligned copy.
// The compare above (count >= 11) guarantes 'count' >= 16 bytes.
// Also jump over aligned copy after the copy with shift completed.
}
// Both array are 8 bytes aligned, copy 16 bytes at a time
// copy 1 element at a time
// O3, O4 are used as temp registers
return start;
}
//
// Generate stub for disjoint short fill. If "aligned" is true, the
// "to" address is assumed to be heapword aligned.
//
// Arguments for generated stub:
// to: O0
// value: O1
// count: O2 treated as signed
//
// O3 is used as a temp register
switch (t) {
case T_BYTE:
shift = 2;
break;
case T_SHORT:
shift = 1;
break;
case T_INT:
shift = 0;
break;
default: ShouldNotReachHere();
}
BLOCK_COMMENT("Entry:");
if (t == T_BYTE) {
// Zero extend value
}
if (t == T_SHORT) {
// Zero extend value
}
}
// align source address at 4 bytes address boundary
if (t == T_BYTE) {
// One byte misalignment happens only for byte arrays
}
// Two bytes misalignment happens only for byte and short (char) arrays
}
#ifdef _LP64
if (!aligned) {
#endif
// align to 8 bytes, we know we are 4 byte aligned to start
#ifdef _LP64
}
#endif
if (t == T_INT) {
// Zero extend value
}
}
// Fill 32-byte chunks
//
// length is too short, just fill 8 bytes at a time
//
// fill trailing 4 bytes
if (t == T_INT) {
}
} else {
}
// fill trailing 2 bytes
if (t == T_BYTE) {
// fill trailing byte
} else {
}
} else {
}
// Handle copies less than 8 bytes. Int is handled elsewhere.
if (t == T_BYTE) {
// in delay slot __ andcc(count, 1, G0);
}
if (t == T_SHORT) {
// in delay slot __ andcc(count, 1, G0);
}
return start;
}
//
// Generate stub for conjoint short copy. If "aligned" is true, the
// "from" and "to" addresses are assumed to be heapword aligned.
//
// Arguments for generated stub:
// from: O0
// to: O1
// count: O2 treated as signed
//
// Do reverse copy.
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
// for short arrays, just do single element copy
{
// Align end of arrays since they could be not aligned even
// when arrays itself are aligned.
// copy 1 element if necessary to align 'end_to' on an 4 bytes
// copy 2 elements to align 'end_to' on an 8 byte boundary
}
#ifdef _LP64
if (aligned) {
// Both arrays are aligned to 8-bytes in 64-bits VM.
// The 'count' is decremented in copy_16_bytes_backward_with_shift()
// in unaligned case.
} else
#endif
{
// Copy with shift 16 bytes per iteration if arrays do not have
// the same alignment mod 8, otherwise jump to the next
// code for aligned copy (and substracting 8 from 'count' before jump).
// The compare above (count >= 11) guarantes 'count' >= 16 bytes.
// Also jump over aligned copy after the copy with shift completed.
}
// copy 4 elements (16 bytes) at a time
// copy 1 element (2 bytes) at a time
// O3, O4 are used as temp registers
return start;
}
//
// Helper methods for generate_disjoint_int_copy_core()
//
if (use_prefetch) {
if (ArraycopySrcPrefetchDistance > 0) {
}
if (ArraycopyDstPrefetchDistance > 0) {
}
}
if (use_bis) {
} else {
}
}
//
// Generate core code for disjoint int copy (and oop copy on 32-bit).
// If "aligned" is true, the "from" and "to" addresses are assumed
// to be heapword aligned.
//
// Arguments:
// from: O0
// to: O1
// count: O2 treated as signed
//
// O3, O4, G3, G4 are used as temp registers
// 'aligned' == true when it is known statically during compilation
// of this arraycopy call site that both 'from' and 'to' addresses
// are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
//
// Aligned arrays have 4 bytes alignment in 32-bits VM
// and 8 bytes - in 64-bits VM.
//
#ifdef _LP64
if (!aligned)
#endif
{
// The next check could be put under 'ifndef' since the code in
// generate_disjoint_long_copy_core() has own checks and set 'offset'.
// for short arrays, just do single element copy
// copy 1 element to align 'to' on an 8 byte boundary
// if arrays have same alignment mod 8, do 4 elements copy
//
// Load 2 aligned 8-bytes chunks and use one from previous iteration
// to form 2 aligned 8-bytes chunks to store.
//
// copy_16_bytes_forward_with_shift() is not used here since this
// code is more optimal.
// copy with shift 4 elements (16 bytes) at a time
} // !aligned
// copy 4 elements (16 bytes) at a time
// copy 1 element at a time
}
//
// Generate stub for disjoint int copy. If "aligned" is true, the
// "from" and "to" addresses are assumed to be heapword aligned.
//
// Arguments for generated stub:
// from: O0
// to: O1
// count: O2 treated as signed
//
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
// O3, O4 are used as temp registers
return start;
}
//
// Generate core code for conjoint int copy (and oop copy on 32-bit).
// If "aligned" is true, the "from" and "to" addresses are assumed
// to be heapword aligned.
//
// Arguments:
// from: O0
// to: O1
// count: O2 treated as signed
//
// Do reverse copy.
// O3, O4, O5, G3 are used as temp registers
// copy 1 element to align 'to' on an 8 byte boundary
// Check if 'end_from' and 'end_to' has the same alignment.
// copy with shift 4 elements (16 bytes) at a time
//
// Load 2 aligned 8-bytes chunks and use one from previous iteration
// to form 2 aligned 8-bytes chunks to store.
//
// copy 4 elements (16 bytes) at a time
// copy 1 element (4 bytes) at a time
}
//
// Generate stub for conjoint int copy. If "aligned" is true, the
// "from" and "to" addresses are assumed to be heapword aligned.
//
// Arguments for generated stub:
// from: O0
// to: O1
// count: O2 treated as signed
//
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
// O3, O4 are used as temp registers
return start;
}
//
// Helper methods for generate_disjoint_long_copy_core()
//
if (ArraycopySrcPrefetchDistance > 0) {
}
if (ArraycopyDstPrefetchDistance > 0) {
}
}
if (use_bis) {
} else {
}
}
}
//
// Generate core code for disjoint long copy (and oop copy on 64-bit).
// "aligned" is ignored, because we must make the stronger
// assumption that both addresses are always 64-bit aligned.
//
// Arguments:
// from: O0
// to: O1
// count: O2 treated as signed
//
// count -= 2;
// if ( count >= 0 ) { // >= 2 elements
// if ( count > 6) { // >= 8 elements
// count -= 6; // original count - 8
// do {
// copy_8_elements;
// count -= 8;
// } while ( count >= 0 );
// count += 6;
// }
// if ( count >= 0 ) { // >= 2 elements
// do {
// copy_2_elements;
// } while ( (count=count-2) >= 0 );
// }
// }
// count += 2;
// if ( count != 0 ) { // 1 element left
// copy_1_element;
// }
//
// Copy by 64 bytes chunks
// Now we can use O4(offset0), O5(offset8) as temps
// count >= 0 (original count - 8)
// Restore O4(offset0), O5(offset8)
// Copy by 16 bytes chunks
// Copy last 8 bytes
}
//
// Generate stub for disjoint long copy.
// "aligned" is ignored, because we must make the stronger
// assumption that both addresses are always 64-bit aligned.
//
// Arguments for generated stub:
// from: O0
// to: O1
// count: O2 treated as signed
//
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
// O3, O4 are used as temp registers
return start;
}
//
// Generate core code for conjoint long copy (and oop copy on 64-bit).
// "aligned" is ignored, because we must make the stronger
// assumption that both addresses are always 64-bit aligned.
//
// Arguments:
// from: O0
// to: O1
// count: O2 treated as signed
//
// Do reverse copy.
}
// Generate stub for conjoint long copy.
// "aligned" is ignored, because we must make the stronger
// assumption that both addresses are always 64-bit aligned.
//
// Arguments for generated stub:
// from: O0
// to: O1
// count: O2 treated as signed
//
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
// O3, O4 are used as temp registers
return start;
}
// Generate stub for disjoint oop copy. If "aligned" is true, the
// "from" and "to" addresses are assumed to be heapword aligned.
//
// Arguments for generated stub:
// from: O0
// to: O1
// count: O2 treated as signed
//
bool dest_uninitialized = false) {
// caller can pass a 64-bit byte count here
BLOCK_COMMENT("Entry:");
}
// save arguments for barrier generation
#ifdef _LP64
if (UseCompressedOops) {
} else {
}
#else
#endif
// O0 is used as temp register
// O3, O4 are used as temp registers
return start;
}
// Generate stub for conjoint oop copy. If "aligned" is true, the
// "from" and "to" addresses are assumed to be heapword aligned.
//
// Arguments for generated stub:
// from: O0
// to: O1
// count: O2 treated as signed
//
bool dest_uninitialized = false) {
// caller can pass a 64-bit byte count here
BLOCK_COMMENT("Entry:");
}
// save arguments for barrier generation
#ifdef _LP64
if (UseCompressedOops) {
} else {
}
#else
#endif
// O0 is used as temp register
// O3, O4 are used as temp registers
return start;
}
// Helper for generating a dynamic type check.
// Smashes only the given temp registers.
BLOCK_COMMENT("type_check:");
BLOCK_COMMENT("type_check_slow_path:");
__ save_frame(0);
NULL, &L_pop_to_miss);
// Fall through on failure!
}
// Generate stub for checked oop copy.
//
// Arguments for generated stub:
// from: O0
// to: O1
// count: O2 treated as signed
// ckoff: O3 (super_check_offset)
// ckval: O4 (super_klass)
// ret: O0 zero for success; (-1^K) where K is partial transfer count
//
address generate_checkcast_copy(const char *name, address *entry, bool dest_uninitialized = false) {
#ifdef ASSERT
// We sometimes save a frame (see generate_type_check below).
// If this will cause trouble, let's fail now instead of later.
__ save_frame(0);
#endif
#ifdef ASSERT
// caller guarantees that the arrays really are different
// otherwise, we would have to make conjoint checks
{ Label L;
}
#endif //ASSERT
// caller can pass a 64-bit byte count here (from generic stub)
BLOCK_COMMENT("Entry:");
}
// Empty array: Nothing to do.
// ======== begin loop ========
// (Loop is rotated; its entry is load_element.)
// Loop variables:
// (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays
// (O2 = len; O2 != 0; O2--) --- number of oops *remaining*
// G3, G4, G5 --- current oop, oop.klass, oop.klass.super
// ======== loop entry is here ========
// branch to this on success:
// ======== end loop ========
// It was a real error; we must depend on the caller to finish the job.
// Register G1 has number of *remaining* oops, O2 number of *total* oops.
// Emit GC store barriers for the oops we have copied (O2 minus G1),
// and report their number to the caller.
return start;
}
// Generate 'unsafe' array copy stub
// Though just as safe as the other stubs, it takes an unscaled
// size_t argument instead of an element count.
//
// Arguments for generated stub:
// from: O0
// to: O1
// count: O2 byte count, treated as ssize_t, can be zero
//
// Examines the alignment of the operands and dispatches
// to a long, int, short, or byte copy loop.
//
// bump this on entry, not on exit:
// scale the count on the way out:
// scale the count on the way out:
// scale the count on the way out:
return start;
}
// Perform range checks on the proposed arraycopy.
// Kills the two temps, but nothing else.
// Also, clean the sign bits of src_pos and dst_pos.
BLOCK_COMMENT("arraycopy_range_checks:");
// if (src_pos + length > arrayOop(src)->length() ) FAIL;
// Note: This next instruction may be in the delay slot of a branch:
// if (dst_pos + length > arrayOop(dst)->length() ) FAIL;
// Have to clean up high 32-bits of 'src_pos' and 'dst_pos'.
// Move with sign extension can be used since they are positive.
BLOCK_COMMENT("arraycopy_range_checks done");
}
//
// Generate generic array copy stubs
//
// Input:
// O0 - src oop
// O1 - src_pos
// O2 - dst oop
// O3 - dst_pos
// O4 - element count
//
// Output:
// O0 == 0 - success
// O0 == -1 - need to call System.arraycopy
//
// Input registers
// registers used as temp
// bump this on entry, not on exit:
// In principle, the int arguments could be dirty.
//assert_clean_int(src_pos, G1);
//assert_clean_int(dst_pos, G1);
//assert_clean_int(length, G1);
//-----------------------------------------------------------------------
// Assembler stubs will be used for this call to arraycopy
// if the following conditions are met:
//
// (1) src and dst must not be null.
// (2) src_pos must not be negative.
// (3) dst_pos must not be negative.
// (4) length must not be negative.
// (5) src klass and dst klass should be the same and not NULL.
// (6) src and dst should be arrays.
// (7) src_pos + length must not exceed length of src.
// (8) dst_pos + length must not exceed length of dst.
BLOCK_COMMENT("arraycopy initial argument checks");
// if (src == NULL) return -1;
// if (src_pos < 0) return -1;
// if (dst == NULL) return -1;
// if (dst_pos < 0) return -1;
// if (length < 0) return -1;
BLOCK_COMMENT("arraycopy argument klass checks");
// get src->klass()
if (UseCompressedOops) {
} else {
}
#ifdef ASSERT
// assert(src->klass() != NULL);
BLOCK_COMMENT("assert klasses not null");
BLOCK_COMMENT("assert done");
}
#endif
// Load layout helper
//
// |array_tag| | header_size | element_type | |log2_element_size|
// 32 30 24 16 8 2 0
//
// array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
//
// Load 32-bits signed value. Use br() instruction with it to check icc.
if (UseCompressedOops) {
}
// Handle objArrays completely differently...
if (UseCompressedOops) {
} else {
}
// if (src->klass() != dst->klass()) return -1;
// if (!src->is_Array()) return -1;
// At this point, it is known to be a typeArray (array_tag 0x3).
#ifdef ASSERT
{ Label L;
}
#else
#endif
// typeArrayKlass
//
// src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
// dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
//
// next registers should be set before the jump to corresponding stub
// 'from', 'to', 'count' registers should be set in this order
// since they are the same as 'src', 'src_pos', 'dst'.
BLOCK_COMMENT("scale indexes to element size");
BLOCK_COMMENT("choose copy loop based on element size");
#ifdef ASSERT
{ Label L;
}
#endif
// objArrayKlass
// live at this point: G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length
// test array classes for subtyping
// Identically typed arrays can be copied without element-wise checks.
// live at this point: G3_src_klass, G4_dst_klass
{
// Before looking at dst.length, make sure dst is also an objArray.
// lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted to delay slot
// It is safe to examine both src.length and dst.length.
// Marshal the base address arguments now, freeing registers.
// Generate the type check.
// Fetch destination element klass from the objArrayKlass header.
// the checkcast_copy loop needs two extra arguments:
// lduw(O4, sco_offset, O3); // sco of elem klass
}
return start;
}
//
// Generate stub for heap zeroing.
// "to" address is aligned to jlong (8 bytes).
//
// Arguments for generated stub:
// to: O0
// count: O1 treated as signed (count of HeapWord)
// count could be 0
//
// Use BIS for zeroing
return start;
}
void generate_arraycopy_stubs() {
//*** jbyte
// Always need aligned and unaligned versions
"jbyte_disjoint_arraycopy");
"jbyte_arraycopy");
"arrayof_jbyte_disjoint_arraycopy");
"arrayof_jbyte_arraycopy");
//*** jshort
// Always need aligned and unaligned versions
"jshort_disjoint_arraycopy");
"jshort_arraycopy");
"arrayof_jshort_disjoint_arraycopy");
"arrayof_jshort_arraycopy");
//*** jint
// Aligned versions
"arrayof_jint_disjoint_arraycopy");
StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
"arrayof_jint_arraycopy");
#ifdef _LP64
// In 64 bit we need both aligned and unaligned versions of jint arraycopy.
// entry_jint_arraycopy always points to the unaligned version (notice that we overwrite it).
"jint_disjoint_arraycopy");
"jint_arraycopy");
#else
// In 32 bit jints are always HeapWordSize aligned, so always use the aligned version
// (in fact in 32bit we always have a pre-loop part even in the aligned version,
#endif
//*** jlong
// It is always aligned
"arrayof_jlong_disjoint_arraycopy");
StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
"arrayof_jlong_arraycopy");
//*** oops
// Aligned versions
"arrayof_oop_disjoint_arraycopy");
StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy(true, entry, &entry_oop_arraycopy,
"arrayof_oop_arraycopy");
// Aligned versions without pre-barriers
"arrayof_oop_disjoint_arraycopy_uninit",
/*dest_uninitialized*/true);
"arrayof_oop_arraycopy_uninit",
/*dest_uninitialized*/true);
#ifdef _LP64
if (UseCompressedOops) {
// With compressed oops we need unaligned versions, notice that we overwrite entry_oop_arraycopy.
"oop_disjoint_arraycopy");
"oop_arraycopy");
// Unaligned versions without pre-barriers
"oop_disjoint_arraycopy_uninit",
/*dest_uninitialized*/true);
"oop_arraycopy_uninit",
/*dest_uninitialized*/true);
} else
#endif
{
// oop arraycopy is always aligned on 32bit and 64bit without compressed oops
StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
}
StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
/*dest_uninitialized*/true);
if (UseBlockZeroing) {
}
}
void generate_initial() {
// Generates all stubs and initializes the entry points
//------------------------------------------------------------------------------------------------------------------------
// entry points that exist in all platforms
// Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
// the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
//------------------------------------------------------------------------------------------------------------------------
// entry points that are platform specific
StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows();
#endif // COMPILER2 !=> _LP64
// Build this early so it's available for the interpreter.
StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
}
void generate_all() {
// Generates all stubs and initializes the entry points
// Generate partial_subtype_check first here since its code depends on
// UseZeroBaseCompressedOops which is defined after heap initialization.
// These entry points require SharedInfo::stack0 to be set up in non-core builds
StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError));
StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError));
StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call));
// support for verify_oop (must happen after universe_init)
// arraycopy stubs used by compilers
// Don't initialize the platform math functions since sparc
// doesn't have intrinsics for these operations.
}
public:
// replace the standard masm with a special one:
if (all) {
generate_all();
} else {
}
// make sure this stub is available for all local calls
if (_atomic_add_stub.is_unbound()) {
// generate a second time, if necessary
(void) generate_atomic_add();
}
}
private:
int _stub_count;
# ifdef ASSERT
// put extra information in the stub code, to make it more readable
#ifdef _LP64
// Write the high part of the address
// [RGV] Check if there is a dependency on the size of this prolog
#endif
# endif
align(true);
}
// %%%%% move this constant somewhere else
// UltraSPARC cache line size is 8 instructions:
if (at_header) {
}
} else {
}
}
}
}; // end class declaration
}