assembler_sparc.cpp revision 113
4046N/A * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved. 0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 0N/A * This code is free software; you can redistribute it and/or modify it 0N/A * under the terms of the GNU General Public License version 2 only, as 0N/A * published by the Free Software Foundation. 0N/A * This code is distributed in the hope that it will be useful, but WITHOUT 0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 0N/A * version 2 for more details (a copy is included in the LICENSE file that 0N/A * accompanied this code). 0N/A * You should have received a copy of the GNU General Public License version 0N/A * 2 along with this work; if not, write to the Free Software Foundation, 0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 2362N/A * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 2362N/A * CA 95054 USA or visit www.sun.com if you need additional information or 0N/A#
include "incls/_precompiled.incl" 0N/A// Implementation of Address 0N/A// Warning: In LP64 mode, _disp will occupy more than 10 bits. 0N/A// This is inconsistent with the other constructors but op 0N/A// codes such as ld or ldx, only access disp() to get their 0N/A {
"A0",
"P0"}, {
"A1",
"P1"}, {
"A2",
"P2"}, {
"A3",
"P3"}, {
"A4",
"P4"},
0N/A {
"A5",
"P5"}, {
"A6",
"P6"}, {
"A7",
"P7"}, {
"A8",
"P8"}, {
"A9",
"P9"},
0N/A default: s =
"????";
break;
4046N/A default: s =
"????";
break;
0N/A// Patch instruction inst at offset inst_pos to refer to dest_pos 0N/A// and return the resulting instruction. 0N/A// We should have pcs, not offsets, but since all is relative, it will work out 0N/A int m;
// mask for displacement field 0N/A int v;
// new value for displacement field // Return the offset of the branch destionation of instruction inst // Should have pcs, but since all is relative, it works out. return 0x00;
// illegal instruction 0x00000000 // Generate a bunch 'o stuff (including v9's // Generate a bunch 'o stuff unique to V8 stc(
30,
L6, -(
1 <<
12) );
// Implementation of MacroAssembler // provoke OS NULL exception if reg = NULL by // accessing M[reg] w/o changing any registers // nothing to do, (later) access of M[reg + offset] // will provoke OS NULL exception if reg = NULL // This can only be traceable if r1 & r2 are visible after a window save // get nearby pc, store jmp target // This can only be traceable if r1 is visible after a window save // get nearby pc, store jmp target // This code sequence is relocatable to any address, even on LP64. // Force fixed length sethi because NativeJump and NativeFarCall don't handle // variable length instruction streams. sethi(a,
/*ForceRelocatable=*/ true);
// Must do the add here so relocation can find the remainder of the // value to be relocated. // get nearby pc, store jmp target // Convert to C varargs format // spill register-resident args to their memory slots // (SPARC calling convention requires callers to have already preallocated these) // Note that the inArg might in fact be an outgoing argument, // if a leaf routine or stub does some tricky argument shuffling. // This routine must work even though one of the saved arguments // is in the d register (e.g., set_varargs(Argument(0, false), O0)). // return the address of the first memory slot // Conditional breakpoint (for assertion checks in assembly code) // We want to use ST_BREAKPOINT here, but the debugger is confused by it. // flush windows (except current) using flushw instruction if avail. // Write serialization page so VM thread can do a pseudo remote membar // We use the current thread pointer to calculate a thread specific // offset to write to within the page. This minimizes bus traffic // due to cache line collision. // Get the condition codes the V8 way. // This is a test of V8 which has icc but not xcc // so mask off the xcc bits // Compare condition codes from the V8 and V9 ways. // Write out the saved condition codes the V8 way // Read back the condition codes using the V9 instruction // This is a test of V8 which has icc but not xcc // so mask off the xcc bits // Compare the V8 way with the V9 way. // Test code sequence used on V8. Do not move above rdccr. // Test code sequence used on V8. Do not move below wrccr. // call this when G2_thread is not known to be valid mov(
G1,
L0);
// avoid clobbering G1 mov(
G3,
L2);
// avoid clobbering G3 also mov(
G4,
L5);
// avoid clobbering G4 // NOTE: this chops off the heads of the 64-bit O registers. // make sure G2_thread contains the right value mov(
G1,
L1);
// avoid clobbering G1 mov(
G3,
L3);
// avoid clobbering G3 mov(
G4,
L4);
// avoid clobbering G4 // Save & restore possible 64-bit Long arguments in G-regs // Save & restore possible 64-bit Long arguments in G-regs sllx(
L0,
32,
G2);
// Move old high G1 bits high in G2 sllx(
G1, 0,
G1);
// Clear current high G1 bits sllx(
L6,
32,
G2);
// Move old high G4 bits high in G2 sllx(
G4, 0,
G4);
// Clear current high G4 bits // smash G2_thread, as if the VM were about to anyway // %%% maybe get rid of [re]set_last_Java_frame // Always set last_Java_pc and flags first because once last_Java_sp is visible // has_last_Java_frame is true and users will look at the rest of the fields. // (Note: flags should always be zero before we get here so doesn't need to be set.) // Verify that flags was zeroed on return to Java stop(
"last_Java_pc not zeroed before leaving Java");
// Verify that flags was zeroed on return to Java stop(
"flags not zeroed before leaving Java");
// When returning from calling out from Java mode the frame anchor's last_Java_pc // will always be set to NULL. It is set here so that if we are doing a call to // native (not VM) that we capture the known pc and don't have to rely on the // native call having a standard frame linkage where we can find the pc. // Make sure that we have an odd stack stop(
"Stack Not Biased in set_last_Java_frame");
// check that it WAS previously set // Always return last_Java_pc to zero // Always null flags after return to Java // determine last_java_sp register // 64-bit last_java_sp is biased! // check for pending exceptions. use Gtemp as scratch register. // get oop result if there is one and reset the value in the thread // we use O7 linkage so that forward_exception_entry has the issuing PC // O0 is reserved for the thread // O0 is reserved for the thread // O0 is reserved for the thread // Note: The following call_VM overloadings are useful when a "save" // has already been performed by a stub, and the last Java frame is // the previous one. In that case, last_java_sp must be passed as FP // O0 is reserved for the thread // O0 is reserved for the thread // O0 is reserved for the thread // We require that C code which does not return a value in vm_result will // Check that we are not overwriting any other oop. // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.) /* $$$ This stuff needs to go into one of the BarrierSet generator functions. (The particular barrier sets will have to be friends of MacroAssembler, I guess.) */ // %%% Note: The following six instructions have been moved, // They will be refactored at a later date. // if addr of local, do not need to load it else if (a.
hi32() == -
1) {
if ( a.
hi32() &
0x3ff )
// Any bits? or3( a.
base(), a.
hi32() &
0x3ff ,a.
base() );
// High 32 bits are now in low 32 if ( a.
low32() &
0xFFFFFC00 ) {
// done? if( (a.
low32() >>
20) &
0xfff ) {
// Any bits set? if( (a.
low32() >>
10) &
0x3ff ) {
// Pad out the instruction sequence so it can be if (
lo32 &
0xFFFFFC00 ) {
or3(
G0,
value, d);
// setsw (this leaves upper 32 bits sign-extended) // (A negative value could be loaded in 2 insns with sethi/xor, // but it would take a more complex relocation.) // %%% End of moved six set instructions. // (Matcher::isSimpleConstant64 knows about the following optimizations.) // compute size in bytes of sparc frame, given // save_frame: given number of "extra" words in frame, // issue approp. save instruction (p 200, v8 manual) // The trick here is to use precisely the same memory word // that trap handlers also use to save the register. // This word cannot be used for any other purpose, but // it works fine to save the register's value, whether or not // an interrupt flushes register windows at any given moment! for ( j = 0; j <
8; ++j )
if ( j !=
6 ) s->
print_cr(
"i%d = 0x%.16lx", j, i[j]);
else s->
print_cr(
"fp = 0x%.16lx", i[j]);
for ( j = 0; j <
8; ++j )
for ( j = 0; j <
8; ++j )
if ( j !=
6 ) s->
print_cr(
"o%d = 0x%.16lx", j, o[j]);
else s->
print_cr(
"sp = 0x%.16lx", o[j]);
for ( j = 0; j <
8; ++j )
// print out floats with compression // and doubles (evens only) for (i = 0; i <
8; ++i) {
for (i = 0; i <
32; ++i) {
for (
int i =
1; i <
8; ++i) {
for (
int j = 0; j <
32; ++j) {
// pushes double TOS element of FPU stack on CPU stack; pops from FPU stack // %%%%%% need to implement this // pops double TOS element from CPU stack and pushes on FPU stack // %%%%%% need to implement this // %%%%%% need to implement this // plausibility check for oops if (
reg ==
G0)
return;
// always NULL, which is always an oop // Call indirectly to solve generation ordering problem // Make some space on stack above the current register window. // Enough to hold 8 64-bit registers. // Save some 64-bit registers; a normal 'save' chops the heads off // of 64-bit longs in the 32-bit build. mov(
reg,
O0);
// Move arg into O0; arg might be in O7 which is about to be crushed // Load address to call to into O7 // Register call to verify_oop_subroutine // plausibility check for oops // Call indirectly to solve generation ordering problem // Make some space on stack above the current register window. // Enough to hold 8 64-bit registers. // Save some 64-bit registers; a normal 'save' chops the heads off // of 64-bit longs in the 32-bit build. // Load address to call to into O7 // Register call to verify_oop_subroutine // This macro is expanded just once; it creates shared code. Contract: // receives an oop in O0. Must restore O0 & O7 from TLS. Must not smash ANY // registers, including flags. May not use a register 'save', as this blows // the high bits of the O-regs if they contain Long values. Acts as a 'leaf' // O0 and O7 were saved already (O0 in O0's TLS home, O7 in O5's TLS home). // O0 is now the oop to be checked. O7 is the return address. // Save some more registers for temps. {
// count number of verifies // mark lower end of faulting range // We can't check the mark oop because it could be in the process of // locking or unlocking while this is running. // assert((obj & oop_mask) == oop_bits); // the null_or_fail case is useless; must test for null separately // Check the klassOop of this object for being in the right area of memory. // Cannot do the load in the delay above slot in case O0 is null // assert((klass & klass_mask) == klass_bits); // Check the klass's klass // mark upper end of faulting range //----------------------- // Restore prior 64-bit registers retl();
// Leaf return; restore prior O7 in delay slot //----------------------- //----------------------- // stop_subroutine expects message pointer in I1. // Restore prior 64-bit registers // factor long stop-sequence into subroutine to save space // call indirectly to solve generation ordering problem // save frame first to get O7 for return address // add one word to size in case struct is odd number of words long // It must be doubleword-aligned for storing doubles into it. // stop_subroutine expects message pointer in I1. // factor long stop-sequence into subroutine to save space // call indirectly to solve generation ordering problem // unnoticeable results in the output files. // restore(); done in callee to save space! // We must be able to turn interactive prompting off // in order to run automated test scripts on the VM // Use the flag ShowMessageBoxOnError char* b =
new char[
1024];
// for the sake of the debugger, stick a PC on the current frame // (this assumes that the caller has performed an extra "save") save_frame();
// one more save to free up another O7 register mov(
I0,
O1);
// addr of reg save area // We expect pointer to message in I1. Caller must set it up in O1 // In order to get locks work, we need to fake a in_VM state ::
tty->
print_cr(
"=============== DEBUG MESSAGE: %s ================\n",
msg);
// --------------------------------------------------------- // compares register with zero and branches. NOT FOR USE WITH 64-bit POINTERS // Compares a pointer register with zero and branches on null. // Does a test & branch on 32-bit systems and a register-branch on 64-bit. // instruction sequences factored across compiler & interpreter // And, with an unsigned comparison, it does not matter if the numbers // E.g., -2 cmp -1: the low parts are 0xfffffffe and 0xffffffff. // The second one is bigger (unsignedly). // Other notes: The first move in each triplet can be unconditional // (and therefore probably prefetchable). // And the equals case for the high part does not need testing, // since that triplet is reached only after finding the high halves differ. "register alias checks");
// This code can be optimized to use the 64 bit shifts in V9. // Here we use the 32 bit shifts. // shift < 32 bits, Ralt_count = Rcount-31 // We get the transfer bits by shifting right by 32-count the low // register. This is done by shifting right by 31-count and then by one // more to take care of the special (rare) case where count is zero // (shifting by 32 would not work). // The order of the next two instructions is critical in the case where // Rin and Rout are the same and should not be reversed. // shift >= 32 bits, Ralt_count = Rcount-32 "register alias checks");
// This code can be optimized to use the 64 bit shifts in V9. // Here we use the 32 bit shifts. // shift < 32 bits, Ralt_count = Rcount-31 // We get the transfer bits by shifting left by 32-count the high // register. This is done by shifting left by 31-count and then by one // more to take care of the special (rare) case where count is zero // (shifting by 32 would not work). // The order of the next two instructions is critical in the case where // Rin and Rout are the same and should not be reversed. // shift >= 32 bits, Ralt_count = Rcount-32 "register alias checks");
// This code can be optimized to use the 64 bit shifts in V9. // Here we use the 32 bit shifts. // shift < 32 bits, Ralt_count = Rcount-31 // We get the transfer bits by shifting left by 32-count the high // register. This is done by shifting left by 31-count and then by one // more to take care of the special (rare) case where count is zero // (shifting by 32 would not work). // The order of the next two instructions is critical in the case where // Rin and Rout are the same and should not be reversed. // shift >= 32 bits, Ralt_count = Rcount-32 //fb(lt, true, pn, done); delayed()->set( -1, Rresult ); // number() does a sanity check on the alignment. // number() does a sanity check on the alignment. // number() does a sanity check on the alignment. // number() does a sanity check on the alignment. // number() does a sanity check on the alignment. // number() does a sanity check on the alignment. // Use for 64 bit operation. // store ptr_reg as the new top value // [RGV] This routine does not handle 64 bit operations. // use casx_under_lock() or casx directly!!! // store ptr_reg as the new top value // If the register is not an out nor global, it is not visible // after the save. Allocate a register for it, save its // value in the register save area (the save may not flush // registers to the save area). // Initialize yield counter Untested(
"Need to verify global reg consistancy");
// Save the regs and make space for a C call // yes, got lock. do we have the same top? // See whether the lock is currently biased toward our thread and // whether the epoch is still valid // Note that the runtime guarantees sufficient alignment of JavaThread // pointers to allow age to be placed into low bits // Reload mark_reg as we may need it later // At this point we know that the header has the bias pattern and // that we are not the bias owner in the current epoch. We need to // figure out more details about the state of the header in order to // know what operations can be legally performed on the object's // If the low three bits in the xor result aren't clear, that means // the prototype header is no longer biased and we have to revoke // the bias on this object. // Biasing is still enabled for this data type. See whether the // epoch of the current bias is still valid, meaning that the epoch // bits of the mark word are equal to the epoch bits of the // prototype header. (Note that the prototype header's epoch bits // only change at a safepoint.) If not, attempt to rebias the object // toward the current thread. Note that we must be absolutely sure // that the current epoch is invalid in order to do this because // otherwise the manipulations it performs on the mark word are // The epoch of the current bias is still valid but we know nothing // about the owner; it might be set or it might be clear. Try to // acquire the bias of the object using an atomic operation. If this // fails we will go in to the runtime to revoke the object's bias. // Note that we first construct the presumed unbiased header so we // don't accidentally blow away another thread's valid bias. // If the biasing toward our thread failed, this means that // another thread succeeded in biasing it toward itself and we // need to revoke that bias. The revocation will occur in the // interpreter runtime in the slow case. // At this point we know the epoch has expired, meaning that the // current "bias owner", if any, is actually invalid. Under these // circumstances _only_, we are allowed to use the current header's // value as the comparison value when doing the cas to acquire the // bias in the current epoch. In other words, we allow transfer of // the bias from one thread to another directly in this situation. // FIXME: due to a lack of registers we currently blow away the age // bits in this situation. Should attempt to preserve them. // If the biasing toward our thread failed, this means that // another thread succeeded in biasing it toward itself and we // need to revoke that bias. The revocation will occur in the // interpreter runtime in the slow case. // The prototype mark in the klass doesn't have the bias bit set any // more, indicating that objects of this data type are not supposed // to be biased any more. We are going to try to reset the mark of // this object to the prototype value and fall through to the // CAS-based locking scheme. Note that if our CAS fails, it means // that another thread raced us for the privilege of revoking the // bias of this particular object, so it's okay to continue in the // FIXME: due to a lack of registers we currently blow away the age // bits in this situation. Should attempt to preserve them. // Fall through to the normal CAS-based lock, because no matter what // the result of the above CAS, some thread must have succeeded in // removing the bias bit from the object's header. // Check for biased locking unlock case, which is a no-op // Note: we do not have to check the thread ID for two reasons. // First, the interpreter checks for IllegalMonitorStateException at // a higher level. Second, if the bias was revoked while we held the // lock, the object could not be rebiased toward another thread, so // the bias bit would be clear. // CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by // Solaris/SPARC's "as". Another apt name would be cas_ptr() // compiler_lock_object() and compiler_unlock_object() are direct transliterations // of i486.ad fast_lock() and fast_unlock(). See those methods for detailed comments. // The code could be tightened up considerably. // box->dhw disposition - post-conditions at DONE_LABEL. // - Successful inflated lock: box->dhw != 0. // Any non-zero value suffices. // Consider G2_thread, rsp, boxReg, or unused_mark() // - Successful Stack-lock: box->dhw == mark. // box->dhw must contain the displaced mark word value // - Failure -- icc.ZFlag == 0 and box->dhw is undefined. // The slow-path fast_enter() and slow_enter() operators // are responsible for setting box->dhw = NonZero (typically ::unused_mark). // - Biased: box->dhw is undefined // SPARC refworkload performance - specifically jetstream and scimark - are // extremely sensitive to the size of the code emitted by compiler_lock_object // and compiler_unlock_object. Critically, the key factor is code size, not path // length. (Simply experiments to pad CLO with unexecuted NOPs demonstrte the // Fetch object's markword // Save Rbox in Rscratch to be used for the cas operation // set Rmark to markOop | markOopDesc::unlocked_value // Initialize the box. (Must happen before we update the object mark!) // compare object markOop with Rmark and if equal exchange Rscratch with object markOop // if compare/exchange succeeded we found an unlocked object and we now have locked it // we did not find an unlocked object so see if this is a recursive case // sub(Rscratch, SP, Rscratch); // Triage: biased, stack-locked, neutral, inflated // Invariant: if control reaches this point in the emitted stream // then Rmark has not been modified. // Store mark into displaced mark field in the on-stack basic-lock "box" // Critically, this must happen before the CAS // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty. // Try stack-lock acquisition. // Beware: the 1st instruction is in a delay slot // Stack-lock attempt failed - check for recursive stack-lock. // See the comments below about how we might remove this case. // If m->owner != null goto IsLocked // Pessimistic form: Test-and-CAS vs CAS // The optimistic form avoids RTS->RTO cache line upgrades. // m->owner == null : it's unlocked. // Try to CAS m->owner from null to Self // Invariant: if we acquire the lock then _recursions should be 0. // Intentional fall-through into done // Aggressively avoid the Store-before-CAS penalty // Defer the store into box->dhw until after the CAS // Anticipate CAS -- Avoid RTS->RTO upgrade // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ; // Triage: biased, stack-locked, neutral, inflated // Invariant: if control reaches this point in the emitted stream // then Rmark has not been modified. delayed()->
// Beware - dangling delay-slot // Try stack-lock acquisition. // Transiently install BUSY (0) encoding in the mark word. // if the CAS of 0 into the mark was successful then we execute: // ST box->dhw = mark -- save fetched mark in on-stack basiclock box // ST obj->mark = box -- overwrite transient 0 value // This presumes TSO, of course. // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ; // Stack-lock attempt failed - check for recursive stack-lock. // Tests show that we can remove the recursive case with no impact // on refworkload 0.83. If we need to reduce the size of the code // emitted by compiler_lock_object() the recursive case is perfect // A more extreme idea is to always inflate on stack-lock recursion. // This lets us eliminate the recursive checks in compiler_lock_object // and compiler_unlock_object and the (box->dhw == 0) encoding. // and showed a performance *increase*. In the same experiment I eliminated // the fast-path stack-lock code from the interpreter and always passed // RScratch contains the fetched obj->mark value from the failed CASN. // Accounting needs the Rscratch register // If m->owner != null goto IsLocked // Pessimistic form avoids futile (doomed) CAS attempts // The optimistic form avoids RTS->RTO cache line upgrades. // m->owner == null : it's unlocked. // Try to CAS m->owner from null to Self // Invariant: if we acquire the lock then _recursions should be 0. // ST box->displaced_header = NonZero. // Any non-zero value suffices: // unused_mark(), G2_thread, RBox, RScratch, rsp, etc. // Intentional fall-through into done // Test first if it is a fast recursive unlock // Check if it is still a light weight lock, this is is true if we see // the stack address of the basicLock in the markOop of the object // Beware ... If the aggregate size of the code emitted by CLO and CUO is // is too large performance rolls abruptly off a cliff. // This could be related to inlining policies, code cache management, or // TODO: eliminate redundant LDs of obj->mark delayed()->
nop() ;
// consider: relocate fetch of mark, above, into this DS // Conceptually we need a #loadstore|#storestore "release" MEMBAR before // the ST of 0 into _owner which releases the lock. This prevents loads // and stores within the critical section from reordering (floating) // past the store that releases the lock. But TSO is a strong memory model // and that particular flavor of barrier is a noop, so we can safely elide it. // Note that we use 1-0 locking by default for the inflated case. We // close the resultant (and rare) race by having contented threads in // monitorenter periodically poll _owner. // invert icc.zf and goto done // Consider: we could replace the expensive CAS in the exit // path with a simple ST of the displaced mark value fetched from // the on-stack basiclock box. That admits a race where a thread T2 // in the slow lock path -- inflating with monitor M -- could race a // thread T1 in the fast unlock path, resulting in a missed wakeup for T2. // More precisely T1 in the stack-lock unlock path could "stomp" the // inflated mark value M installed by T2, resulting in an orphan // object monitor M and T2 becoming stranded. We can remedy that situation // by having T2 periodically poll the object's mark word using timed wait // operations. If T2 discovers that a stomp has occurred it vacates // the monitor M and wakes any other threads stranded on the now-orphan M. // In addition the monitor scavenger, which performs deflation, // would also need to check for orpan monitors and stranded threads. // Finally, inflation is also used when T2 needs to assign a hashCode // to O and O is stack-locked by T1. The "stomp" race could cause // an assigned hashCode value to be lost. We can avoid that condition // and provide the necessary hashCode stability invariants by ensuring // that hashCode generation is idempotent between copying GCs. // For example we could compute the hashCode of an object O as // O's heap address XOR some high quality RNG value that is refreshed // at GC-time. The monitor scavenger would install the hashCode // found in any orphan monitors. Again, the mechanism admits a // lost-update "stomp" WAW race but detects and recovers as needed. // A prototype implementation showed excellent results, although // the scavenger and timeout code was rather involved. // Intentional fall through into done ... // %%%%% need to implement this // %%%%% need to implement this // %%%%% need to implement this // %%%%% need to implement this // %%%%% need to implement this // %%%%% need to implement this // %%%%% need to implement this // %%%%% need to implement this stop(
"assert(top >= start)");
stop(
"assert(top <= end)");
Register obj,
// result: pointer to object after successful allocation // make sure arguments make sense // note: we need both top & top_addr! // make sure eden top is properly aligned stop(
"eden top is not properly aligned");
// size is unknown at compile time // size is known at compile time // Compare obj with the value at top_addr; if still equal, swap the value of // end with the value at top_addr. If not equal, read the value at top_addr // if someone beat us on the allocation, try again, otherwise continue // make sure eden top is properly aligned stop(
"eden top is not properly aligned");
Register obj,
// result: pointer to object after successful allocation // make sure arguments make sense // calculate amount of free space // calculate the new top pointer // make sure new free pointer is properly aligned stop(
"updated TLAB free is not properly aligned");
// update the tlab top pointer // No allocation in the shared eden. // calculate amount of free space // Retain tlab and allocate object in shared space if // the amount free in the tlab is too large to discard. // increment waste limit to prevent getting stuck on this slow path // increment number of slow_allocations // increment number of refills // if tlab is currently allocated (top or end != null) then // fill [top, end + alignment_reserve) with array object // set klass to intArrayKlass // refill the tlab with an eden allocation // check that tlab_size (t1) is still valid stop(
"assert(t1 == tlab_size)");
// Note some conditions are synonyms for others // Writes to stack successive pages until offset reached to check for // stack overflow + shadow pages. This clobbers tsp and scratch. // Use stack pointer in temp stack pointer // Bang stack for total size given plus stack shadow page size. // Bang one page at a time because a large size can overflow yellow and // red zones (the bang will fail but stack overflow handling can't tell that // it was a stack overflow bang vs a regular segv). // Bang down shadow pages too. // The -1 because we already subtracted 1 page. // The number of bytes in this code is used by // MachCallDynamicJavaNode::ret_addr_offset() // if this changes, change that. // ??? figure out src vs. dst! assert(
s1 != d,
"not enough registers");
// Zero out entire klass field first. assert(
s1 != d &&
s2 != d,
"not enough registers");
assert(
s1 != d,
"not enough registers");
// optimize for frequent case src == dst // could be moved before branch, and annulate delay, // but may add some unneeded work decoding null // call indirectly to solve generation ordering problem