/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#ifndef __R300_REG_H_
#define __R300_REG_H_
#ifdef __cplusplus
extern "C" {
#endif
/*
* Copyright (C) 2004-2005 Nicolai Haehnle et al.
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#define R300_MC_MISC__MC_CPR_INIT_LAT_SHIFT 0
#define R300_MC_MISC__MC_G3D0R_INIT_LAT_SHIFT 0
/*
* This file contains registers and constants for the R300. They have been
* found mostly by examining command buffers captured using glxtest, as well
* as by extrapolating some known registers and constants from the R200.
*
* I am fairly certain that they are correct unless stated otherwise in
* comments.
*/
// This register is written directly and also starts data
// section in many 3d CP_PACKET3's
#define R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT 0
#define R300_VAP_VF_CNTL__PRIM_NONE (0<<0)
/* State based - direct writes to registers trigger vertex generation */
/* I don't think I saw these three used.. */
/* index size - when not set the indices are assumed to be 16 bit */
/* number of vertices */
/* BEGIN: Wild guesses */
/* END */
/* BEGIN: Vertex data assembly - lots of uncertainties */
/* gap */
// Where do we get our vertex data?
//
// Vertex data either comes either from immediate mode registers or from
// vertex arrays.
// There appears to be no mixed mode (though we can force the pitch of
// vertex arrays to 0, effectively reusing the same element over and over
// again).
//
// Immediate mode is controlled by the INPUT_CNTL registers. I am not sure
// if these registers influence vertex array processing.
//
// Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3.
//
// In both cases, vertex attributes are then passed through INPUT_ROUTE.
// Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data
// into the vertex processor's input registers.
// The first word routes the first input, the second word the second, etc.
// The corresponding input is routed into the register with the given index.
// The list is ended by a word with INPUT_ROUTE_END set.
//
// Always set COMPONENTS_4 in immediate mode. */
#define R300_INPUT_ROUTE_COMPONENTS_1 (0 << 0)
/* gap */
// Notes:
// - always set up to produce at least two attributes:
// if vertex program uses only position, fglrx will set normal, too
// - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal */
/* gap */
// Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0
// are set to a swizzling bit pattern, other words are 0.
//
// In immediate mode, the pattern is always set to xyzw. In vertex array
// mode, the swizzling pattern is e.g. used to set zw components in texture
// coordinates with only tweo components
#define R300_INPUT_ROUTE_SELECT_X 0
#define R300_INPUT_ROUTE_X_SHIFT 0
/* END */
/* gap */
// BEGIN: Upload vertex program and data
// The programmable vertex shader unit has a memory bank of unknown size
// that can be written to in 16 byte units by writing the address into
// UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs).
//
// Pointers into the memory bank are always in multiples of 16 bytes.
//
// The memory bank is divided into areas with fixed meaning.
//
// Starting at address UPLOAD_PROGRAM: Vertex program instructions.
// Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB),
// whereas the difference between known addresses suggests size 512.
//
// Starting at address UPLOAD_PARAMETERS: Vertex program parameters.
// Native reported limits and the VPI layout suggest size 256, whereas
// difference between known addresses suggests size 512.
//
// At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the
// floating point pointsize. The exact purpose of this state is uncertain,
// as there is also the R300_RE_POINTSIZE register.
//
// Multiple vertex programs and parameter sets can be loaded at once,
// which could explain the size discrepancy.
/* gap */
/* END */
/* gap */
/*
* I do not know the purpose of this register. However, I do know that
* it is set to 221C_CLEAR for clear operations and to 221C_NORMAL
* for normal rendering.
*/
/* gap */
/*
* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between
* rendering commands and overwriting vertex program parameters.
* Therefore, I suspect writing zero to 0x2284 synchronizes the engine and
* avoids bugs caused by still running shaders reading bad data from memory.
*/
/* Absolutely no clue what this register is about. */
/* gap */
/*
* Addresses are relative to the vertex program instruction area of the
* memory bank. PROGRAM_END points to the last instruction of the active
* program
*
* The meaning of the two UNKNOWN fields is obviously not known. However,
* experiments so far have shown that both *must* point to an instruction
* inside the vertex program, otherwise the GPU locks up.
* fglrx usually sets CNTL_3_UNKNOWN to the end of the program and
* CNTL_1_UNKNOWN points to instruction where last write to position
* takes place. Most likely this is used to ignore rest of the program
* in cases where group of verts arent visible.
* For some reason this "section" is sometimes accepted other instruction
* that have no relationship with position calculations.
*/
#define R300_PVS_CNTL_1_PROGRAM_START_SHIFT 0
/* Addresses are relative the the vertex program parameters area. */
#define R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0
#define R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT 0
// The entire range from 0x2300 to 0x2AC inclusive seems to be used for
// immediate vertices
/* gap */
/*
* These are values from r300_reg/r300_reg.h - they are known to
* be correct and are here so we can use one register file instead
* of several
* - Vladimir
*/
// each of the following is 3 bits wide, specifies number
// of components
/*
* UNK30 seems to enables point to quad transformation on
* textures (or something closely related to that).This bit
* is rather fatal at the time being due to lackings at pixel
* shader side
*/
/* each of the following is 2 bits wide */
#define R300_GB_TEX_REPLICATE 0
/* MSPOS - positions for multisample antialiasing (?) */
/* shifts - each of the fields is 4 bits */
#define R300_GB_MSPOS0__MS_X0_SHIFT 0
#define R300_GB_MSPOS1__MS_X3_SHIFT 0
#define R300_GB_TILE_PIPE_COUNT_RV300 0
#define R300_GB_TILE_SIZE_8 0
#define R300_GB_SUPER_TILE_A 0
#define R300_GB_SUBPIXEL_1_12 0
/* each of the following is 2 bits wide */
#define R300_GB_FIFO_SIZE_32 0
#define R300_SC_IFIFO_SIZE_SHIFT 0
// the following use the same constants as above, but meaning is
// is times 2 (i.e. instead of 32 words it means 64 */
/* watermarks, 3 bits wide */
#define R300_GB_FOG_SELECT_C0A 0
#define R300_GB_DEPTH_SELECT_Z 0
#define R300_GB_W_SELECT_1_W 0
#define R300_AA_SUBSAMPLES_2 0
/* END */
/* gap */
/* Zero to flush caches. */
/* The upper enable bits are guessed, based on fglrx reported limits. */
// The pointsize is given in multiples of 6. The pointsize can be
// enormous: Clear() renders a single point that fills the entire
// framebuffer. */
#define R300_POINTSIZE_Y_SHIFT 0
/*
* The line width is given in multiples of 6.
* In default mode lines are classified as vertical lines.
* HO: horizontal
* VE: vertical or horizontal
* HO & VE: no classification
*/
#define R300_LINESIZE_SHIFT 0
/* Some sort of scale or clamp value for texcoordless textures. */
/* Dangerous */
#define R300_PM_FRONT_POINT (0 << 0)
#define R300_PM_BACK_POINT (0 << 0)
/*
* Not sure why there are duplicate of factor and constant values.
* My best guess so far is that there are seperate zbiases for test
* and write.
* Ordering might be wrong.
* Some of the tests indicate that fgl has a fallback implementation
* of zbias via pixel shaders.
*/
/*
* This register needs to be set to (1<<1) for RV350 to correctly
* perform depth test (see --vb-triangles in r300_demo)
* Don't know about other chips. - Vladimir
* This is set to 3 when GL_POLYGON_OFFSET_FILL is on.
* My guess is that there are two bits for each zbias
* primitive (FILL, LINE, POINT).
* One to enable depth test and one for depth write.
* Yet this doesnt explain why depth writes work ...
*/
// BEGIN: Rasterization / Interpolators - many guesses
// 0_UNKNOWN_18 has always been set except for clear operations.
// TC_CNT is the number of incoming texture coordinate sets (i.e. it depends
// on the vertex program, *not* the fragment program) */
/* number of color interpolators used */
/* Guess: RS_CNTL_1 holds the index of the highest used RS_ROUTE_n register. */
/* gap */
// Only used for texture coordinates.
// Use the source field to route texture coordinate input from the
// vertex program to the desired interpolator. Note that the source
// field is relative to the outputs the vertex program *actually*
// writes. If a vertex program only writes texcoord[1], this will
// be source index 0. Set INTERP_USED on all interpolators that
// produce data used by the fragment program. INTERP_USED looks
// like a swizzling mask, but I haven't seen it used that way.
//
// Note: The _UNKNOWN constants are always set in their respective register.
// I don't know if this is necessary. */
// These DWORDs control how vertex data is routed into fragment program
// registers, after interpolators. */
#define R300_RS_ROUTE_SOURCE_INTERP_0 0
// Special handling for color: When the fragment program uses color,
// the ROUTE_0_COLOR bit is set and ROUTE_0_COLOR_DEST contains the
// color register index. */
/* As above, but for secondary color */
/* END */
// BEGIN: Scissors and cliprects
// There are four clipping rectangles. Their corner coordinates are inclusive.
// Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending
// on whether the pixel is inside cliprects 0-3, respectively. For example,
// if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned
// the number 3 (binary 0011).
// Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set,
// the pixel is rasterized.
//
// In addition to this, there is a scissors rectangle. Only pixels inside the
// scissors rectangle are drawn. (coordinates are inclusive)
//
// For some reason, the top-left corner of the framebuffer is at (1440, 1440)
// for the purpose of clipping and scissors. */
#define R300_CLIPRECT_X_SHIFT 0
/* gap */
#define R300_SCISSORS_X_SHIFT 0
/* END */
// BEGIN: Texture specification
// The texture specification dwords are grouped by meaning and not
// by texture unit. This means that e.g. the offset for texture
// image unit N is found in register TX_OFFSET_0 + (4*N) */
#define R300_TX_REPEAT 0
#define R300_TX_WRAP_S_SHIFT 0
/*
* NOTE: NEAREST doesnt seem to exist
* Im not seting MAG_FILTER_MASK and (3 << 11) on for all
* anisotropy modes because that would void selected mag filter
*/
#define R300_CHROMA_KEY_MODE_DISABLE 0
#define R300_TX_WIDTHMASK_SHIFT 0
/* The interpretation of the format word by Wladimir van der Laan */
/*
* The X, Y, Z and W refer to the layout of the components.
* They are given meanings as R, G, B and Alpha by the swizzle
* specification
*/
/* 0x16 - some 16 bit green format.. ?? */
/* gap */
/* Floating point formats */
/* Note - hardware supports both 16 and 32 bit floating point */
/* alpha modes, convenience mostly */
// if you have alpha, pick constant appropriate to the
// number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc
/* Swizzling */
/* constants */
#define R300_TX_FORMAT_X 0
/* 2.0*Z, everything above 1.0 is set to 0.0 */
/* 2.0*W, everything above 1.0 is set to 0.0 */
/* Convenience macro to take care of layout and swizzling */
((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT) \
| ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT) \
| ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT) \
| ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT) \
| (R300_TX_FORMAT_##FMT))
/* These can be ORed with result of R300_EASY_TX_FORMAT() */
/* We don't really know what they do. Take values from a constant color ? */
/* obvious missing in gap */
/* BEGIN: Guess from R200 */
#define R300_TXO_ENDIAN_NO_SWAP (0 << 0)
/* END */
/* 32 bit chroma key */
/* ff00ff00 == { 0, 1.0, 0, 1.0 } */
/* END */
// BEGIN: Fragment program instruction set
// Fragment programs are written directly into register space.
// There are separate instruction streams for texture instructions and ALU
// instructions.
// In order to synchronize these streams, the program is divided into up
// to 4 nodes. Each node begins with a number of TEX operations, followed
// by a number of ALU operations.
// The first node can have zero TEX ops, all subsequent nodes must have at least
// one TEX ops.
// All nodes must have at least one ALU op.
//
// The index of the last node is stored in PFS_CNTL_0: A value of 0 means
// 1 node, a value of 3 means 4 nodes.
// The total amount of instructions is defined in PFS_CNTL_2. The offsets are
// offsets into the respective instruction streams, while *_END points to the
// last instruction relative to this offset.
#define R300_PFS_CNTL_LAST_NODES_SHIFT 0
// There is an unshifted value here which has so far always been equal to the
// index of the highest used temporary register.
#define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0
/* gap */
// Nodes are stored backwards. The last active node is always stored in
// PFS_NODE_3.
// Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The
// first node is stored in NODE_2, the second node is stored in NODE_3.
//
// Offsets are relative to the master offset from PFS_CNTL_2.
// LAST_NODE is set for the last node, and only for the last node.
#define R300_PFS_NODE_ALU_OFFSET_SHIFT 0
/* #define R300_PFS_NODE_LAST_NODE (1 << 22) */
// TEX
// As far as I can tell, texture instructions cannot write into output
// registers directly. A subsequent ALU instruction is always necessary,
// even if it's just MAD o0, r0, 1, 0
#define R300_FPITX_SRC_SHIFT 0
/* GUESS based on layout and native limits */
/*
* Unsure if these are opcodes, or some kind of bitfield, but this is how
* they were set when I checked
*/
// ALU
// The ALU instructions register blocks are enumerated according to the order
// in which fglrx. I assume there is space for 64 instructions, since
// each block has space for a maximum of 64 DWORDs, and this matches reported
// native limits.
//
// The basic functional block seems to be one MAD for each color and alpha,
// and an adder that adds all components after the MUL.
// - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands
// - DP4: Use OUTC_DP4, OUTA_DP4
// - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands
// - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands
// - CMP: If ARG2 < 0, return ARG1, else return ARG0
// - FLR: use FRC+MAD
// - XPD: use MAD+MAD
// - SGE, SLT: use MAD+CMP
// - RSQ: use ABS modifier for argument
// - Use OUTC_REPL_ALPHA to write results of an alpha-only operation (e.g. RCP)
// into color register
// - apparently, there's no quick DST operation
// - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2"
// - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0"
// - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1"
//
// Operand selection
// First stage selects three sources from the available registers and
// constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha).
// fglrx sorts the three source fields: Registers before constants,
// lower indices before higher indices; I do not know whether this is necessary.
// fglrx fills unused sources with "read constant 0"
// According to specs, you cannot select more than two different constants.
//
// Second stage selects the operands from the sources. This is defined in
// INSTR0 (color) and INSTR2 (alpha). You can also select the special constants
// zero and one.
// Swizzling and negation happens in this stage, as well.
//
// Important: Color and alpha seem to be mostly separate, i.e. their sources
// selection appears to be fully independent (the register storage is probably
// physically split into a color and an alpha section).
// However (because of the apparent physical split), there is some interaction
// WRT swizzling. If, for example, you want to load an R component into an
// Alpha operand, this R component is taken from a *color* source, not from
// an alpha source. The corresponding register doesn't even have to appear in
// the alpha sources list. (I hope this alll makes sense to you)
//
// Destination selection
// The destination register index is in FPI1 (color) and FPI3 (alpha) together
// with enable bits.
// There are separate enable bits for writing into temporary registers
// (DSTC_REG_* /DSTA_REG) and and program output registers
// (DSTC_OUTPUT_* /DSTA_OUTPUT).
// You can write to both at once, or not write at all (the same index
// must be used for both).
//
// Note: There is a special form for LRP
// - Argument order is the same as in ARB_fragment_program.
// - Operation is MAD
// - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP
// - Set FPI0/FPI2_SPECIAL_LRP
// Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD
#define R300_FPI1_SRC0C_SHIFT 0
#define R300_FPI3_SRC0A_SHIFT 0
#define R300_FPI0_ARGC_SRC0C_XYZ 0
#define R300_FPI0_ARG0C_SHIFT 0
#define R300_FPI2_ARGA_SRC0C_X 0
#define R300_FPI2_ARG0A_SHIFT 0
/* END */
/* gap */
/* gap */
/* Fragment program parameters in 7.16 floating point */
/* GUESS: PARAM_31 is last, based on native limits reported by fglrx */
// Notes:
// - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used
// in the application
// - AFAIK fglrx always sets BLEND_NO_SEPARATE when CBLEND and
// ABLEND are set to the same
// function (both registers are always set up completely in any case)
// - Most blend flags are simply copied from R200 and not tested yet
/* the following only appear in CBLEND */
/* the following are shared between CBLEND and ABLEND */
/* gap */
/* gap */
// Bit 16: Larger tiles
// Bit 17: 4x2 tiles
// Bit 18: Extremely weird tile like, but some pixels duplicated?
/* gap */
/*
* Guess by Vladimir.
* Set to 0A before 3D operations, set to 02 afterwards.
*/
/* gap */
/*
* There seems to be no "write only" setting, so use
* Z-test = ALWAYS for this. Bit (1<<8) is the "test"
* bit. so plain write is 6 - vd
*/
/* functions */
#define R300_ZS_NEVER 0
/* operations */
#define R300_ZS_KEEP 0
/*
* front and back refer to operations done for front
* and back faces, i.e. separate stencil function support
*/
#define R300_RB3D_ZS1_DEPTH_FUNC_SHIFT 0
#define R300_RB3D_ZS2_STENCIL_REF_SHIFT 0
/* gap */
#define R300_DEPTH_FORMAT_16BIT_INT_Z (0 << 0)
/* gap */
/*
* BEGIN: Vertex program instruction set
* Every instruction is four dwords long:
* DWORD 0: output and opcode
* DWORD 1: first argument
* DWORD 2: second argument
* DWORD 3: third argument
*
* Notes:
* - ABS r, a is implemented as MAX r, a, -a
* - MOV is implemented as ADD to zero
* - XPD is implemented as MUL + MAD
* - FLR is implemented as FRC + ADD
* - apparently, fglrx tries to schedule instructions so that there
* is at least one instruction between the write to a temporary
* and the first read from said temporary; however, violations
* of this scheduling are allowed
* - register indices seem to be unrelated with OpenGL aliasing to
* conventional state
* - only one attribute and one parameter can be loaded at a time;
* than one argument
* - the second software argument for POW is the third hardware
* argument (no idea why)
* - MAD with only temporaries as input seems to use VPI_OUT_SELECT_MAD_2
*
* There is some magic surrounding LIT:
* The single argument is replicated across all three inputs, but swizzled:
* First argument: xyzy
* Second argument: xyzx
* Third argument: xyzw
* Whenever the result is used later in the fragment program, fglrx forces
* x and w to be 1.0 in the input selection; I don't know whether this is
* strictly necessary
*/
/*
* Used in GL_POINT_DISTANCE_ATTENUATION_ARB,
* vector(scalar, vector)
*/
/* Used in fog computations, scalar(scalar) */
/*
* Used in GL_POINT_DISTANCE_ATTENUATION_ARB,
* scalar(scalar)
*/
/* all temps, vector(scalar, vector, vector) */
/* GUESS based on fglrx native limits */
#define R300_VPI_IN_REG_CLASS_TEMPORARY (0 << 0)
/* GUESS based on fglrx native limits */
/*
* The R300 can select components from the input register arbitrarily.
* Use the following constants, shifted by the component shift you
* want to select
*/
#define R300_VPI_IN_SELECT_X 0
/* END */
/* BEGIN: Packet 3 commands */
// A primitive emission dword.
#define R300_PRIM_TYPE_NONE (0 << 0)
// GUESS (based on r200)
// GUESS (based on r200)
// Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR.
// Two parameter dwords:
// 0. The first parameter appears to be always 0
// 1. The second parameter is a standard primitive emission dword.
// Specify the full set of vertex arrays as (address, stride).
// The first parameter is the number of vertex arrays specified.
// The rest of the command is a variable length list of blocks, where
// each block is three dwords long and specifies two arrays.
// The first dword of a block is split into two words, the lower significant
// word refers to the first array, the more significant word to the second
// array in the block.
// The low byte of each word contains the size of an array entry in dwords,
// the high byte contains the stride of the array.
// The second dword of a block contains the pointer to the first array,
// the third dword of a block contains the pointer to the second array.
// Note that if the total number of arrays is odd, the third dword of
// the last block is omitted.
#ifdef __cplusplus
}
#endif
#endif /* __R300_REG_H_ */