ppcasm.h revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* Copyright (c) 1999 by Sun Microsystems, Inc.
* All rights reserved.
*/
#ifndef PPCASM_H
#define PPCASM_H
/*
* Cylink Corporation � 1998
*
* This software is licensed by Cylink to the Internet Software Consortium to
* promote implementation of royalty free public key cryptography within IETF
* standards. Cylink wishes to expressly thank the contributions of Dr.
* Martin Hellman, Whitfield Diffie, Ralph Merkle and Stanford University for
* their contributions to Internet Security. In accordance with the terms of
* this license, ISC is authorized to distribute and sublicense this software
* for the practice of IETF standards.
*
* The software includes BigNum, written by Colin Plumb and licensed by Philip
* R. Zimmermann for royalty free use and distribution with Cylink's
* software. Use of BigNum as a stand alone product or component is
* specifically prohibited.
*
* Disclaimer of All Warranties. THIS SOFTWARE IS BEING PROVIDED "AS IS",
* WITHOUT ANY EXPRESSED OR IMPLIED WARRANTY OF ANY KIND WHATSOEVER. IN
* PARTICULAR, WITHOUT LIMITATION ON THE GENERALITY OF THE FOREGOING, CYLINK
* MAKES NO REPRESENTATION OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
* PURPOSE.
*
* Cylink or its representatives shall not be liable for tort, indirect,
* special or consequential damages such as loss of profits or loss of
* goodwill from the use or inability to use the software for any purpose or
* for any reason whatsoever.
*
* EXPORT LAW: Export of the Foundations Suite may be subject to compliance
* with the rules and regulations promulgated from time to time by the Bureau
* of Export Administration, United States Department of Commerce, which
* restrict the export and re-export of certain products and technical data.
* If the export of the Foundations Suite is controlled under such rules and
* regulations, then the Foundations Suite shall not be exported or
* re-exported, directly or indirectly, (a) without all export or re-export
* licenses and governmental approvals required by any applicable laws, or (b)
* in violation of any applicable prohibition against the export or re-export
* of any part of the Foundations Suite. All export licenses for software
* containing the Foundations Suite are the sole responsibility of the licensee.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* A PowerPC assembler in the C preprocessor.
* This assumes that ints are 32 bits, and uses them for the values.
*
* An assembly-language routine is simply an array of unsigned ints,
* initialized with the macros defined here.
*
* In the PowerPC, a generic function pointer does *not* point to the
* first word of code, but to a two (or possibly more) word "transition
* vector." The first word of the TV points to the function's code.
* The second word is the function's TOC (Table Of Contents) pointer,
* which is loaded into r2. The function's global variables are
* accessed via the TOC pointed to by r2. TOC pointers are changed,
* for example, when a dynamically linked library is called, so the
* library can have private global variables.
*
* Saving r2 and reloading r2 each function call is a hassle that
* I'd really rather avoid, since a lot of useful assembly language routines
* can be written without global variables at all, so they don't need a TOC
* pointer. But I haven't figured out how to persuade CodeWarrior 7 to
* generate an intra-TOC call to an array. (CodeWarrior 8 supports
* PowerPC asm, which obviates the need to do the cast-to-function-pointer
* trick, which obviates the need for cross-TOC calls.)
*
* The basic PowerPC calling conventions for integers are:
* r0 - scratch. May be modified by function calls.
* r1 - stack pointer. Must be preserved across function calls.
* See IMPORTANT notes on stack frame format below.
* This must *ALWAYS*, at every instruction boundary, be 16-byte
* aligned and point to a valid stack frame. If a procedure
* needs to create a stack frame, the recommended way is to do:
* stwu r1,-frame_size(r1)
* and on exit, recover with one of:
* addi r1,r1,frame_size, OR
* lwz r1,0(r1)
* r2 - TOC pointer. Points to the current table of contents.
* Must be preserved across function calls.
* r3 - First argument register and return value register.
* Arguments are passed in r3 through r10, and values returned in
* r3 through r6, as needed. (Usually only r3 for single word.)
* r4-r10 - More argument registers
* r11 - Scratch, may be modified by function calls.
* On entry to indirect function calls, this points to the
* transition vector, and additional words may be loaded
* at offsets from it. Some conventions use r12 instead.
* r12 - Scratch, may be modified by function calls.
* r13-r31 - Callee-save registers, may not be modified by function
* calls.
* The LR, CTR and XER may be modified by function calls, as may the MQ
* register, on those processors for which it is implemented.
* CR fields 0, 1, 5, 6 and 7 are scratch and may be modified by function
* calls. CR fields 2, 3 and 4 must be preserved across function calls.
*
* Stack frame format - READ
*
* r1 points to a stack frame, which must *ALWAYS*, meaning after each and
* every instruction, without excpetion, point to a valid 16-byte-aligned
* stack frame, defined as follows:
* - The 296 bytes below r1 (from -296(r1) to -1(r1)) are the so-called Red
* Zone reserved for leaf procedures, which may use it without allocating
* a stack frame and without decrementing r1. The size comes from the room
* needed to store all the callee-save registers: 19 64-bit integer registers
* and 18 64-bit floating-point registers. (18+19)*8 = 296. So any
* procedure can save all the registers it needs to save before creating
* a stack frame and moving r1.
* The bytes at -297(r1) and below may be used by interrupt and exception
* handlers *at any time*. Anything placed there may disappear before
* the next instruction.
* The word at 0(r1) is the previous r1, and so on in a linked list.
* This is the minimum needed to be a valid stack frame, but some other
* offsets from r1 are preallocated by the calling procedure for the called
* procedure's use. These are:
* Offset 0: Link to previous stack frame - saved r1, if the called
* procedure alters it.
* Offset 4: Saved CR, if the called procedure alters the callee-save
* fields. There's no important reason to save it here,
* but the space is reserved and you might as well use it
* for its intended purpose unless you have good reason to
* do otherwise. (This may help some debuggers.)
* Offset 8: Saved LR, if the called procedure needs to save it for
* later function return. Saving the LR here helps a debugger
* track the chain of return addresses on the stack.
* Note that a called procedure does not need to preserve the
* LR for it's caller's sake, but it uually wants to preserve
* the value for its own sake until it finishes and it's
* time to return. At that point, this is usually loaded
* back into the LR and the branch accomplished with BLR.
* However, if you want to be preverse, you could load it
* into the CTR and use BCTR instead.
* Offset 12: Reserved to compiler. I can't find what this is for.
* Offset 16: Reserved to compiler. I can't find what this is for.
* Offset 20: Saved TOC pointer. In a cross-TOC call, the old TOC (r2)
* is saved here before r2 is loaded with the new TOC value.
* Again, it's not important to use this slot for this, but
* you might as well.
* Beginning at offset 24 is the argument area. This area is at least 8 words
* (32 bytes; I don't know what happens with 64 bits) long, and may be longer,
* up to the length of the longest argument list in a function called by
* the function which allocated this stack frame. Generally, arguments
* to functions are passed in registers, but if those functions notice
* the address of the arguments being taken, the registers are stored
* into the space reserved for them in this area and then used from memory.
* Additional arguments that will not fit into registers are also stored
* here. Variadic functions (like printf) generally start by saving
* all the integer argument registers from the "..." onwards to this space.
* For that reason, the space must be large enough to store all the argument
* registers, even if they're never used.
* (It could probably be safely shrunk if you're not calling any variadic
* functions, but be careful!)
*
* Offsets above that are private to the calling function and shouldn't
* be messed with. Generally, what appears there is locals, then saved
* registers.
*
*
* The floating-point instruction set isn't implemented yet (I'm too
* lazy, as I don't need it yet), but for when it is, the register
* usage convention is:
* FPSCR - Scratch, except for floating point exception enable fields,
* which should only be modified by functions defined to do so.
* fr0 - scratch
* fr1 - first floating point parameter and return value, scratch
* fr2 - second floating point parameter and return value (if needed), scratch
* fr3 - third floating point parameter and return value (if needed), scratch
* fr4 - fourth floating point parameter and return value (if needed), scratch
* fr5-fr13 - More floating point argument registers, scratch
* fr14-fr31 - Callee-save registers, may not be modified across a function call
*
* Complex values store the real part in the lower-numberd register of a pair.
* When mixing floating-point and integer arguments, reserve space (one register
* for single-precision, two for double-precision values) in the integer
* argument list for the floating-point values. Those integer registers
* generally have undefined values, UNLESS there is no prototype for the call,
* in which case they should contain a copy of the floating-point value's
* bit pattern to cope with wierd software.
* If the floating point arguments go past the end of the integer registers,
* they are stored in the argument area as well as being passed in here.
*
* After the argument area comes the calling function's private storage.
* Typically, there are locals, followed by saved GP rgisters, followed
* by saved FP registers.
*
* Suggested instruction for allocating a stack frame:
* stwu r1,-frame_size(r1)
* Suggested instructions for deallocating a stack frame:
* addi r1,r1,frame_size
* or
* lwz r1,0(r1)
* If frame_size is too big, you'll have to load the offset into a temp
* register, but be sure that r1 is updated atomically.
*
*
* Basic PowerPC instructions look like this:
*
* 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | Opcode | | | | | | | | | | | | | | | | | | | | | | | | | | |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
* Branch instructions look like this:
*
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | Opcode | Branch offset |A|L|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
* The L, or LK, or Link bit indicates that the return address for the
* branch should be copied to the link register (LR).
* The A, or AA, or absolute address bit, indicates that the address
* of the current instruction (NOTE: not next instruction!) should NOT
* be added to the branch offset; it is relative to address 0.
*
* Conditional branches looks like this:
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | Opcode | BO | BI | Branch offset |A|L|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
* The BI field specifies the condition bit of interest (from the CR).
* The BO field specifies what's interesting. You can branch on a
* combination of a bit of the condition register and --ctr, the CTR
* register. Two bits encode the branch condition to use:
* BRANCH IF
* 00--- = Bit BI is 0
* 01--- = Bit BI is 1
* 1z--- = don't care about bit BI (always true)
* AND
* --00- = --ctr != 0
* --01- = --ctr == 0
* --1z- = don't decrement ctr (always true)
* The last bit us used as a branch prediction bit. If set, it reverses
* the usual backward-branch-taken heuristic.
*
* y = branch prediction bit. z = unused, must be 0
* 0000y - branch if --ctr != 0 && BI == 0
* don't branch if --ctr == 0 || BI != 0
* 0001y - branch if --ctr == 0 && BI == 0
* don't branch if --ctr != 0 || BI != 0
* 001zy - branch if BI == 0
* don't branch if BI != 0
* 0100y - branch if --ctr != 0 && BI != 0
* don't branch if --ctr == 0 || BI == 0
* 0101y - branch if --ctr == 0 && BI != 0
* don't branch if --ctr != 0 || BI == 0
* 011zy - branch if BI != 0
* don't branch if BI == 0
* 1z00y - branch if --ctr != 0
* don't branch if --ctr == 0
* 1z01y - branch if --ctr == 0
* don't branch if --ctr != 0
* 1z1zz - branch always
* If y is 1, the usual branch prediction (usually not taken, taken for
* backwards branches with immediate offsets) is reversed.
*
* Instructions with 2 operands and a 16-bit immediate field look like this:
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | Opcode | D | A | 16-bit immediate value |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
* Now, there are three variations of note. In some instructions, the 16-bit
* value is sign-extended. In others, it's zero-extended. These are noted
* below as "simm" (signed immediate) and "uimm", respectively. Also, which
* field is the destination and which is the source sometimes switches.
* Sometimes it's d = a OP imm, and sometimes it's a = s OP imm. In the
* latter cases, the "d" field is referred to as "s" ("source" instead of
* "destination". These are logical and shift instructions. (Store also
* refers to the s register, but that's the source of the value to be stored.)
* The assembly mnemonics, however, always lists the destination first,
* swapping the order in the instruction if necessary.
* Third, quite often, if r0 is specified for the source a, then the constant
* value 0 is used instead. Thus, r0 is of limited use - it can be used for
* some things, but not all.
*
* Instructions with three register operands look like this:
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | Opcode | D | A | B | Subopcode |C|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
* For most of the instructions of interest the Opcode is 31 and the subopcode
* determines what the instruction does. For a few instructions (mostly loads
* and stores), if the A field is 0, the constant 0 is used. The "C"
* bit (also known as the "RC" bit) controls whether or not the condition
* codes are updated. If it is set (indicated by a "." suffix on the official
* PowerPC opcodes, and a "_" suffix on these macros), condition code register
* field 0 (for integer instructions; field 1 for floating point) is updated
* to reflect the result of the operation.
* Some arithmetic instructions use the most significant bit of the subopcode
* field as an overflow enable bit (o suffix).
*
* Then there are the rotate and mask instructions, which have 5 operands, and
* fill the subopcode field with 2 more 5-bit fields. See below for them.
*
* NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
* These macros fully parenthesize their arguments, but are not themselves
* fully parenthesized. They are intended to be used for initializer lists,
* and if you want to do tricks with their numeric values, wrap them in
* parentheses.
*/
/* Unconditional branch (dest is 26 bits, +/- 2^25 bytes) */
/* Three-operand instructions */
#define PPC_TYPE31(minor,d,a,b) \
#define PPC_SUBO_(d,b,a) PPC_SUBFO_(d,a,b)
/* Immediate-operand instructions. Take a 16-bit immediate operand */
/* Trap word immediate */
/* Integer arithmetic */
/* Conditional branch (dest is 16 bits, +/- 2^15 bytes) */
/* Logical operations */
/* Major number = 19 - condition register operations. d, a and b are CR bits */
#define PPC_TYPE19(minor,d,a,b) \
/* Indirect conditional branch */
/* Other */
/* Major number = 63 Floating-point operations (not implemented for now) */
/* Simplified Mnemonics */
/* Fabricate immediate subtract out of add negative */
/* Fabricate subtract out of subtract from */
#define PPC_SUBC_(d,b,a) PPC_SUBFC_(d,a,b)
#define PPC_SUBCO(d,b,a) PPC_SUBFCO(d,a,b)
#define PPC_SUBCO_(d,b,a) PPC_SUBFCO_(d,a,b)
/* Messy compare bits omitted */
/* Shift and rotate omitted */
/* Branch coding omitted */
#define PPC_CRMOVE(d,s) PPC_CROR(d,s,s)
/* Trap menmonics omitted */
/* Menmonics for user-accessible SPRs */
/* Recommended mnemonics */
#endif /* PPCASM_H */
/* 45678901234567890123456789012345678901234567890123456789012345678901234567 */