/*
* Pixel and vertex shaders implementation using ARB_vertex_program
* and ARB_fragment_program GL extensions.
*
* Copyright 2002-2003 Jason Edmeades
* Copyright 2002-2003 Raphael Junqueira
* Copyright 2004 Christian Costa
* Copyright 2005 Oliver Stieber
* Copyright 2006 Ivan Gyurdiev
* Copyright 2006 Jason Green
* Copyright 2006 Henri Verbeet
* Copyright 2007-2008 Stefan Dösinger for CodeWeavers
* Copyright 2009 Henri Verbeet for CodeWeavers
*
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
/*
* Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
* other than GPL or LGPL is available it will apply instead, Oracle elects to use only
* the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
* a choice of LGPL license versions is made available with the language indicating
* that LGPLv2 or any later version may be used, or where a choice of which version
* of the LGPL is applied is otherwise unspecified.
*/
#include "config.h"
#include <math.h>
#include <stdio.h>
#include "wined3d_private.h"
/* Extract a line. Note that this modifies the source string. */
{
char *p, *q;
p = *ptr;
if (!(q = strstr(p, "\n")))
{
if (!*p) return NULL;
return p;
}
*q = '\0';
*ptr = q + 1;
return p;
}
{
unsigned long source_size;
if (!tmp)
{
return;
}
FIXME("\n");
}
/* GL locking for state handlers is done by the caller. */
{
}
/* Returns TRUE if result.clip from GL_NV_vertex_program2 should be used and FALSE otherwise */
{
}
{
{
return TRUE;
}
return FALSE;
}
static unsigned int reserved_vs_const(IWineD3DBaseShader *shader, const struct wined3d_gl_info *gl_info)
{
/* We use one PARAM for the pos fixup, and in some cases one to load
* some immediate values into the shader
*/
return ret;
}
{
}
/* ARB_program_shader private data */
struct control_frame
{
enum
{
IF,
IFC,
LOOP,
} type;
union
{
unsigned int loop;
unsigned int ifc;
} no;
};
struct arb_ps_np2fixup_info
{
/* For ARB we need a offset value:
* With both GLSL and ARB mode the NP2 fixup information (the texture dimensions) are stored in a
* consecutive way (GLSL uses a uniform array). Since ARB doesn't know the notion of a "standalone"
* array we need an offset to the index inside the program local parameter array. */
};
struct arb_ps_compile_args
{
};
struct stb_const_desc
{
unsigned char texunit;
};
struct arb_ps_compiled_shader
{
unsigned char numbumpenvmatconsts;
char num_int_consts;
};
struct arb_vs_compile_args
{
union
{
struct
{
char clip_texcoord;
char clipplane_mask;
} boolclip;
} clip;
union
{
} vertex;
};
struct arb_vs_compiled_shader
{
char num_int_consts;
char need_color_unclamp;
};
struct recorded_instruction
{
};
struct shader_arb_ctx_priv
{
enum
{
/* plain GL_ARB_vertex_program or GL_ARB_fragment_program */
ARB,
/* GL_NV_vertex_progam2_option or GL_NV_fragment_program_option */
NV2,
/* GL_NV_vertex_program3 or GL_NV_fragment_program2 */
int aL;
unsigned int vs_clipplanes;
/* For 3.0 vertex shaders */
/* For 2.x and earlier vertex shaders */
/* 3.0 pshader input for compatibility with fixed function */
};
struct ps_signature
{
};
struct arb_pshader_private {
};
struct arb_vshader_private {
};
struct shader_arb_priv
{
};
/********************************************************
********************************************************/
/* Loads floating point constants into the currently set ARB_vertex/fragment_program.
* When constant_list == NULL, it will load all the constants.
*
* @target_type should be either GL_VERTEX_PROGRAM_ARB (for vertex shaders)
* or GL_FRAGMENT_PROGRAM_ARB (for pixel shaders)
*/
/* GL locking is done by the caller */
static unsigned int shader_arb_load_constantsF(IWineD3DBaseShaderImpl *This, const struct wined3d_gl_info *gl_info,
{
DWORD i, j;
unsigned int ret;
if (TRACE_ON(d3d_constants))
{
for(i = 0; i < max_constants; i++) {
if(!dirty_consts[i]) continue;
}
}
i = 0;
/* In 1.X pixel shaders constants are implicitly clamped in the range [-1;1] */
{
/* ps 1.x supports only 8 constants, clamp only those. When switching between 1.x and higher
* shaders, the first 8 constants are marked dirty for reload
*/
if(!dirty_consts[i]) continue;
dirty_consts[i] = 0;
j = 4 * i;
}
/* If further constants are dirty, reload them without clamping.
*
* The alternative is not to touch them, but then we cannot reset the dirty constant count
* to zero. That's bad for apps that only use PS 1.x shaders, because in that case the code
* above would always re-check the first 8 constants since max_constant remains at the init
* value
*/
}
{
/* TODO: Benchmark if we're better of with finding the dirty constants ourselves,
* or just reloading *all* constants at once
*
GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, i, max_constants, constants + (i * 4)));
*/
for(; i < max_constants; i++) {
if(!dirty_consts[i]) continue;
/* Find the next block of dirty constants */
dirty_consts[i] = 0;
j = i;
for(i++; (i < max_constants) && dirty_consts[i]; i++) {
dirty_consts[i] = 0;
}
}
} else {
for(; i < max_constants; i++) {
if(dirty_consts[i]) {
dirty_consts[i] = 0;
}
}
}
checkGLcall("glProgramEnvParameter4fvARB()");
/* Load immediate constants */
if (TRACE_ON(d3d_shader)) {
}
}
/* Immediate constants are clamped for 1.X shaders at loading times */
ret = 0;
}
checkGLcall("glProgramEnvParameter4fvARB()");
return ret; /* The loaded immediate constants need reloading for the next shader */
} else {
return 0; /* No constants are dirty now */
}
}
/**
*/
static void shader_arb_load_np2fixup_constants(
char usePixelShader,
char useVertexShader) {
const struct shader_arb_priv* const priv = (const struct shader_arb_priv *) deviceImpl->shader_priv;
if (!usePixelShader) {
/* NP2 texcoord fixup is (currently) only done for pixelshaders. */
return;
}
UINT i;
if (!(active & 1)) continue;
if (!tex) {
FIXME("Nonexistent texture is flagged for NP2 texcoord fixup\n");
continue;
}
if (idx % 2) {
} else {
}
}
}
}
}
/* GL locking is done by the caller. */
{
unsigned char i;
for(i = 0; i < gl_shader->numbumpenvmatconsts; i++)
{
/* The state manager takes care that this function is always called if the bump env matrix changes */
GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->bumpenvmatconst[i].const_num, data));
{
/* WINED3DTSS_BUMPENVLSCALE and WINED3DTSS_BUMPENVLOFFSET are next to each other.
* point gl to the scale, and load 4 floats. x = scale, y = offset, z and w are junk, we
* don't care about them. The pointers are valid for sure because the stateblock is bigger.
* (they're WINED3DTSS_TEXTURETRANSFORMFLAGS and WINED3DTSS_ADDRESSW, so most likely 0 or NaN
*/
GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->luminanceconst[i].const_num, scale));
}
}
checkGLcall("Load bumpmap consts");
{
/* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen).
* ycorrection.y: -1.0(onscreen), 1.0(offscreen)
* ycorrection.z: 1.0
* ycorrection.w: 0.0
*/
checkGLcall("y correction loading");
}
if(gl_shader->num_int_consts == 0) return;
for(i = 0; i < MAX_CONST_I; i++)
{
{
}
}
checkGLcall("Load ps int consts");
}
/* GL locking is done by the caller. */
{
unsigned char i;
/* Upload the position fixup */
GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->pos_fixup, deviceImpl->posFixup));
if(gl_shader->num_int_consts == 0) return;
for(i = 0; i < MAX_CONST_I; i++)
{
{
}
}
checkGLcall("Load vs int consts");
}
/**
*
* We only support float constants in ARB at the moment, so don't
* worry about the Integers or Booleans
*/
/* GL locking is done by the caller (state handler) */
static void shader_arb_load_constants(const struct wined3d_context *context, char usePixelShader, char useVertexShader)
{
if (useVertexShader) {
/* Load DirectX 9 float constants for vertex shader */
device->highest_dirty_vs_const = shader_arb_load_constantsF(vshader, gl_info, GL_VERTEX_PROGRAM_ARB,
}
if (usePixelShader) {
/* Load DirectX 9 float constants for pixel shader */
device->highest_dirty_ps_const = shader_arb_load_constantsF(pshader, gl_info, GL_FRAGMENT_PROGRAM_ARB,
}
}
{
/* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active
* context. On a context switch the old context will be fully dirtified */
}
{
/* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active
* context. On a context switch the old context will be fully dirtified */
}
{
if(!ret) {
ERR("Out of memory\n");
return NULL;
}
}
return ret;
}
/* Generate the variable & register declarations for the ARB_vertex_program output target */
static DWORD shader_generate_arb_declarations(IWineD3DBaseShader *iface, const shader_reg_maps *reg_maps,
{
unsigned max_constantsF;
/* In pixel shaders, all private constants are program local, we don't need anything
* from program.env. Thus we can advertise the full set of constants in pixel shaders.
* If we need a private constant the GL implementation will squeeze it in somewhere
*
* With vertex shaders we need the posFixup and on some GL implementations 4 helper
* immediate values. The posFixup is loaded using program.env for now, so always
* subtract one from the number of constants. If the shader uses indirect addressing,
* account for the helper const too because we have to declare all availabke d3d constants
* and don't know which are actually used.
*/
if (pshader)
{
}
else
{
{
}
{
else
clip_limit = 0;
}
else
{
}
if(*num_clipplanes < clip_limit)
{
}
}
else
{
}
}
{
}
{
}
{
{
}
}
/* Load local constants using the program-local space,
* this avoids reloading them each time the shader is used
*/
if(lconst_map) {
}
}
/* After subtracting privately used constants from the hardware limit(they are loaded as
* local constants), make sure the shader doesn't violate the env constant limit
*/
if(pshader)
{
}
else
{
}
/* Avoid declaring more constants than needed */
/* we use the array-based constants array if the local constants are marked for loading,
* because then we use indirect addressing, or when the local constant list is empty,
* because then we don't know if we're using indirect addressing or not. If we're hardcoding
* local constants do not declare the loaded constants as an array because ARB compilers usually
* do not optimize unused constants away
*/
/* Need to PARAM the environment parameters (constants) so we can use relative addressing */
} else {
for(i = 0; i < max_constantsF; i++) {
idx = i >> 5;
}
}
}
return next_local;
}
static const char * const shift_tab[] = {
"dummy", /* 0 (none) */
"coefmul.x", /* 1 (x2) */
"coefmul.y", /* 2 (x4) */
"coefmul.z", /* 3 (x8) */
"coefmul.w", /* 4 (x16) */
"dummy", /* 5 (x32) */
"dummy", /* 6 (x64) */
"dummy", /* 7 (x128) */
"dummy", /* 8 (d256) */
"dummy", /* 9 (d128) */
"dummy", /* 10 (d64) */
"dummy", /* 11 (d32) */
"coefdiv.w", /* 12 (d16) */
"coefdiv.z", /* 13 (d8) */
"coefdiv.y", /* 14 (d4) */
"coefdiv.x" /* 15 (d2) */
};
{
{
*ptr++ = '.';
}
*ptr = '\0';
}
static void shader_arb_get_swizzle(const struct wined3d_shader_src_param *param, BOOL fixup, char *swizzle_str)
{
/* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra",
* but addressed as "rgba". To fix this we need to swap the register's x
* and z components. */
/* swizzle bits fields: wwzzyyxx */
/* If the swizzle is the default swizzle (ie, "xyzw"), we don't need to
* generate a swizzle string. Unless we need to our own swizzling. */
{
*ptr++ = '.';
} else {
}
}
*ptr = '\0';
}
{
}
{
/* oPos, oFog and oPts in D3D */
{
case WINED3DSPR_TEMP:
break;
case WINED3DSPR_INPUT:
if (pshader)
{
{
}
else
{
{
{
if(idx < MAX_REG_INPUT)
{
}
else
{
}
}
{
/* There are two ways basically:
*
* 1) Use the unrolling code that is used for loop emulation and unroll the loop.
* That means trouble if the loop also contains a breakc or if the control values
* aren't local constants.
* 2) Generate an if block that checks if aL.y < 8, == 8 or == 9 and selects the
* source dynamically. The trouble is that we cannot simply read aL.y because it
* is an ADDRESS register. We could however push it, load .zw with a value and use
* ADAC to load the condition code register and pop it again afterwards
*/
FIXME("Relative input register addressing with more than 8 registers\n");
/* This is better than nothing for now */
}
{
/* This is problematic because we'd have to consult the ctx->ps_input strings
* for where to find the varying. Some may be "0.0", others can be texcoords or
* colors. This needs either a pipeline replacement to make the vertex shader feed
* proper varyings, or loop unrolling
*
* For now use the texcoords and hope for the best
*/
FIXME("Non-vertex shader varying input with indirect addressing\n");
}
else
{
/* D3D supports indirect addressing only with aL in loop registers. The loop instruction
* pulls GL_NV_fragment_program2 in
*/
}
}
else
{
{
}
else
{
}
}
}
}
else
{
}
break;
case WINED3DSPR_CONST:
{
} else {
} else {
}
}
}
if(aL)
else
}
else
{
else
}
break;
case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */
if (pshader) {
/* In ps <= 1.3, Tx is a temporary register as destination to all instructions,
* and as source to most instructions. For some instructions it is the texcoord
* input. Those instructions know about the special use
*/
} else {
/* in ps 1.4 and 2.x Tx is always a (read-only) varying */
}
}
else
{
{
}
else
{
}
}
break;
case WINED3DSPR_COLOROUT:
{
}
else
{
{
}
else
{
}
}
break;
case WINED3DSPR_RASTOUT:
break;
case WINED3DSPR_DEPTHOUT:
break;
case WINED3DSPR_ATTROUT:
/* case WINED3DSPR_OUTPUT: */
break;
case WINED3DSPR_TEXCRDOUT:
if (pshader)
{
}
else
{
{
}
else
{
}
}
break;
case WINED3DSPR_LOOP:
{
/* Pshader has an implicitly declared loop index counter A0.x that cannot be renamed */
}
else
{
/* Unfortunately this code cannot return the value of ctx->aL here. An immediate value
* would be valid, but if aL is used for indexing(its only use), there's likely an offset,
* thus the result would be something like C[15 + 30], which is not valid in the ARB program
* grammar. So return a marker for the emulated aL and intercept it in constant and varying
* indexing
*/
}
break;
case WINED3DSPR_CONSTINT:
break;
case WINED3DSPR_MISCTYPE:
{
}
{
}
else
{
}
break;
default:
break;
}
}
{
}
{
switch(channel_source)
{
case CHANNEL_SOURCE_ZERO: return "0";
case CHANNEL_SOURCE_ONE: return "1";
case CHANNEL_SOURCE_X: return "x";
case CHANNEL_SOURCE_Y: return "y";
case CHANNEL_SOURCE_Z: return "z";
case CHANNEL_SOURCE_W: return "w";
default:
return "undefined";
}
}
{
if (is_complex_fixup(fixup))
{
return;
}
mask = 0;
if (mask)
{
}
mask = 0;
if (mask)
{
if (mask != WINED3DSP_WRITEMASK_ALL)
{
*ptr++ = '.';
}
*ptr = '\0';
}
}
{
/* Silently ignore PARTIALPRECISION if its not supported */
if(mod & WINED3DSPDM_MSAMPCENTROID)
{
FIXME("Unhandled modifier WINED3DSPDM_MSAMPCENTROID\n");
}
switch(mod)
{
return "H_SAT";
case WINED3DSPDM_SATURATE:
return "_SAT";
return "H";
case 0:
return "";
default:
return "";
}
}
{
const char *tex_type;
const char *mod;
/* D3D vertex shader sampler IDs are vertex samplers(0-3), not global d3d samplers */
switch(sampler_type) {
case WINED3DSTT_1D:
tex_type = "1D";
break;
case WINED3DSTT_2D:
IWineD3DBaseTexture_GetTextureDimensions(device->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
tex_type = "RECT";
} else {
tex_type = "2D";
}
{
{
}
}
break;
case WINED3DSTT_VOLUME:
tex_type = "3D";
break;
case WINED3DSTT_CUBE:
tex_type = "CUBE";
break;
default:
tex_type = "";
}
/* TEX, TXL, TXD and TXP do not support the "H" modifier,
* so don't use shader_arb_get_modifier
*/
else mod = "";
/* Fragment samplers always have indentity mapping */
if(sampler_idx >= MAX_FRAGMENT_SAMPLERS)
{
}
{
}
{
}
{
/* Shouldn't be possible, but let's check for it */
/* TXB takes the 4th component of the source vector automatically, as d3d. Nothing more to do */
shader_addline(buffer, "TXB%s %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg, sampler_idx, tex_type);
}
{
shader_addline(buffer, "TXP%s %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg, sampler_idx, tex_type);
}
else
{
if (np2_fixup)
{
}
else
shader_addline(buffer, "TEX%s %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg, sampler_idx, tex_type);
}
if (pshader)
{
}
}
{
/* Generate a line that does the input modifier computation and return the input register to use */
int insert_line;
/* Assume a new line will be added */
insert_line = 1;
/* Get register name */
{
case WINED3DSPSM_NONE:
insert_line = 0;
break;
case WINED3DSPSM_NEG:
insert_line = 0;
break;
case WINED3DSPSM_BIAS:
break;
case WINED3DSPSM_BIASNEG:
break;
case WINED3DSPSM_SIGN:
break;
case WINED3DSPSM_SIGNNEG:
break;
case WINED3DSPSM_COMP:
break;
case WINED3DSPSM_X2:
break;
case WINED3DSPSM_X2NEG:
break;
case WINED3DSPSM_DZ:
break;
case WINED3DSPSM_DW:
break;
case WINED3DSPSM_ABS:
insert_line = 0;
} else {
}
break;
case WINED3DSPSM_ABSNEG:
} else {
}
insert_line = 0;
break;
default:
insert_line = 0;
}
/* Return modified or original register, with swizzle */
if (insert_line)
}
{
/* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed
*
* Keep in mind that src_name[1] can be "TB" and src_name[0] can be "TA" because modifiers like _x2 are valid
* with bem. So delay loading the first parameter until after the perturbation calculation which needs two
* temps is done.
*/
}
{
*extra_char = ' ';
switch(mod)
{
case WINED3DSPSM_NONE: return WINED3DSPSM_NEG;
case WINED3DSPSM_NEG: return WINED3DSPSM_NONE;
case WINED3DSPSM_BIAS: return WINED3DSPSM_BIASNEG;
case WINED3DSPSM_BIASNEG: return WINED3DSPSM_BIAS;
case WINED3DSPSM_SIGN: return WINED3DSPSM_SIGNNEG;
case WINED3DSPSM_SIGNNEG: return WINED3DSPSM_SIGN;
case WINED3DSPSM_X2: return WINED3DSPSM_X2NEG;
case WINED3DSPSM_X2NEG: return WINED3DSPSM_X2;
case WINED3DSPSM_ABS: return WINED3DSPSM_ABSNEG;
case WINED3DSPSM_ABSNEG: return WINED3DSPSM_ABS;
}
return mod;
}
{
/* The coissue flag changes the semantic of the cnd instruction in <= 1.3 shaders */
{
} else {
char extra_neg;
/* src0 may have a negate srcmod set, so we can't blindly add "-" to the name */
/* No modifiers supported on CMP */
/* _SAT on CMP doesn't make much sense, but it is not a pure NOP */
{
}
}
}
{
/* Generate input register names (with modifiers) */
/* No modifiers are supported on CMP */
{
}
}
/** Process the WINED3DSIO_DP2ADD instruction in ARB.
* dst = dot2(src0, src1) + src2 */
{
{
/* GL_NV_fragment_program2 has a 1:1 matching instruction */
}
{
/* dst.x = src2.?, src0.x, src1.x + src0.y * src1.y
* dst.y = src2.?, src0.x, src1.z + src0.y * src1.w
* dst.z = src2.?, src0.x, src1.x + src0.y * src1.y
* dst.z = src2.?, src0.x, src1.z + src0.y * src1.w
*
* Make sure that src1.zw = src1.xy, then we get a classic dp2add
*
* .xyxy and other swizzles that we could get with this are not valid in
* plain ARBfp, but luckily the NV extension grammar lifts this limitation.
*/
}
else
{
/* Emulate a DP2 with a DP3 and 0.0. Don't use the dest as temp register, it could be src[1] or src[2]
* src_name[0] can be TA, but TA is a private temp for modifiers, so it is save to overwrite
*/
}
}
/* Map the opcode 1-to-1 to the GL code */
{
const char *instruction;
unsigned int i;
switch (ins->handler_idx)
{
default: instruction = "";
break;
}
/* Note that shader_arb_add_dst_param() adds spaces. */
arguments[0] = '\0';
{
}
shader_addline(buffer, "%s%s %s%s;\n", instruction, shader_arb_get_modifier(ins), dst_str, arguments);
}
{
}
{
return;
}
/* This implements the mova formula used in GLSL. The first two instructions
* prepare the sign() part. Note that it is fine to have my_sign(0.0) = 1.0
* in this case:
* mova A0.x, 0.0
*
* A0.x = arl(floor(abs(0.0) + 0.5) * 1.0) = floor(0.5) = 0.0 since arl does a floor
*
* The ARL is performed when A0 is used - the requested component is read from A0_SHADOW into
* A0.x. We can use the overwritten component of A0_shadow as temporary storage for the sign.
*/
{
}
{
src0_param[0] = '\0';
{
}
else
{
/* Apple's ARB_vertex_program implementation does not accept an ARL source argument
* with more than one component. Thus replicate the first source argument over all
* 4 components. For example, .xyzw -> .x (or better: .xxxx), .zwxy -> .z, etc) */
}
}
{
{
return;
}
}
else
{
}
}
{
/* No swizzles are allowed in d3d's texkill. PS 1.x ignores the 4th component as documented,
* but >= 2.0 honors it(undocumented, but tested by the d3d9 testsuit)
*/
{
{
}
else
{
/* Sigh. KIL doesn't support swizzles/writemasks. KIL passes a writemask, but ".xy" for example
* is not valid as a swizzle in ARB (needs ".xyyy"). Use SWZ to load the register properly, and set
* masked out components to 0(won't kill)
*/
char x = '0', y = '0', z = '0', w = '0';
}
} else {
/* ARB fp doesn't like swizzles on the parameter of the KIL instruction. To mask the 4th component,
* copy the register into our general purpose TMP variable, overwrite .w and pass TMP to KIL
*
* ps_1_3 shaders use the texcoord incarnation of the Tx register. ps_1_4 shaders can use the same,
* or pass in any temporary register(in shader phase 2)
*/
} else {
}
}
}
{
/* All versions have a destination register */
/* 1.0-1.4: Use destination register number as texture code.
2.0+: Use provided sampler number as texure code. */
else
/* 1.0-1.3: Use the texcoord varying.
1.4+: Use provided coordinate source register. */
else {
/* TEX is the only instruction that can handle DW and DZ natively */
}
/* projection flag:
* 1.1, 1.2, 1.3: Use WINED3DTSS_TEXTURETRANSFORMFLAGS
* 1.4: Use WINED3DSPSM_DZ or WINED3DSPSM_DW on src[0]
* 2.0+: Use WINED3DSI_TEXLD_PROJECT on the opcode
*/
{
if(reg_sampler_code < MAX_TEXTURES) {
flags = priv->cur_ps_args->super.tex_transform >> (reg_sampler_code*WINED3D_PSARGS_TEXTRANSFORM_SHIFT);
}
if (flags & WINED3D_PSARGS_PROJECTED) {
}
}
{
if (src_mod == WINED3DSPSM_DZ) {
/* TXP cannot handle DZ natively, so move the z coordinate to .w. reg_coord is a read-only
* varying register, so we need a temp reg
*/
} else if(src_mod == WINED3DSPSM_DW) {
}
} else {
}
}
{
{
} else {
}
}
{
/* Note that texreg2ar treats Tx as a temporary register, not as a varying */
/* Move .x first in case src_str is "TA" */
if (reg1 < MAX_TEXTURES)
{
}
shader_hw_sample(ins, reg1, dst_str, "TA", flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
}
{
/* Note that texreg2gb treats Tx as a temporary register, not as a varying */
}
{
/* Note that texreg2rg treats Tx as a temporary register, not as a varying */
}
{
/* All versions have a destination register. The Tx where the texture coordinates come
* from is the varying incarnation of the texture register
*/
/* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed
* The Tx in which the perturbation map is stored is the tempreg incarnation of the texture register
*
* GL_NV_fragment_program_option could handle this in one instruction via X2D:
* X2D TA.xy, fragment.texcoord, T%u, bumpenvmat%u.xzyw
*
* However, the NV extensions are never enabled for <= 2.0 shaders because of the performance penalty that
* comes with it, and texbem is an 1.x only instruction. No 1.x instruction forces us to enable the NV
* extension.
*/
/* with projective textures, texbem only divides the static texture coord, not the displacement,
* so we can't let the GL handle this.
*/
flags = priv->cur_ps_args->super.tex_transform >> (reg_dest_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT);
if (flags & WINED3D_PSARGS_PROJECTED)
{
} else {
}
{
/* No src swizzles are allowed, so this is ok */
}
}
{
/* The next instruction will be a texm3x2tex or texm3x2depth that writes to the uninitialized
* T<reg+1> register. Use this register to store the calculated vector
*/
}
{
/* We know that we're writing to the uninitialized T<reg> register, so use it for temporary storage */
flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
}
{
/* There are always 2 texm3x3pad instructions followed by one texm3x3[tex,vspec, ...] instruction, with
* incrementing ins->dst[0].register_idx numbers. So the pad instruction already knows the final destination
* register, and this register is uninitialized(otherwise the assembler complains that it is 'redeclared')
*/
}
{
/* Sample the texture using the calculated coordinates */
flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
shader_hw_sample(ins, reg, dst_str, dst_name, flags & WINED3DTTFF_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
current_state->current_row = 0;
}
{
/* Get the dst reg without writemask strings. We know this register is uninitialized, so we can use all
* components for temporary data storage
*/
/* Construct the eye-ray vector from w coordinates */
/* Calculate reflection vector
*/
/* The .w is ignored when sampling, so I can use TB.w to calculate dot(N, N) */
/* Sample the texture using the calculated coordinates */
flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
current_state->current_row = 0;
}
{
/* Note: dst_reg.xy is input here, generated by two texm3x3pad instructions */
/* Calculate reflection vector.
*
* dot(N, E)
* dst_reg.xyz = 2 * --------- * N - E
* dot(N, N)
*
* Which normalizes the normal vector
*/
/* Sample the texture using the calculated coordinates */
flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
current_state->current_row = 0;
}
{
/* texdepth has an implicit destination, the fragment depth value. It's only parameter,
* which is essentially an input, is the destination register because it is the first
* parameter. According to the msdn, this must be register r5, but let's keep it more flexible
* here(writemasks/swizzles are not valid on texdepth)
*/
/* According to the msdn, the source register(must be r5) is unusable after
* the texdepth instruction, so we're free to modify it
*/
/* How to deal with the special case dst_name.g == 0? if r != 0, then
* the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct
* result. But if r = 0.0, then 0 * inf = 0, which is incorrect.
*/
}
/** Process the WINED3DSIO_TEXDP3TEX instruction in ARB:
* Take a 3-component dot product of the TexCoord[dstreg] and src,
* then perform a 1D texture lookup from stage dstregnum, place into dst. */
{
shader_hw_sample(ins, sampler_idx, dst_str, "TB", 0 /* Only one coord, can't be projected */, NULL, NULL);
}
/** Process the WINED3DSIO_TEXDP3 instruction in ARB:
* Take a 3-component dot product of the TexCoord[dstreg] and src. */
{
/* Handle output register */
}
/** Process the WINED3DSIO_TEXM3X3 instruction in ARB
* Perform the 3rd row of a 3x3 matrix multiply */
{
}
/** Process the WINED3DSIO_TEXM3X2DEPTH instruction in ARB:
* Last row of a 3x2 matrix multiply, use the result to calculate the depth:
* Calculate tmp0.y = TexCoord[dstreg] . src.xyz; (tmp0.x has already been calculated)
* depth = (tmp0.y == 0.0) ? 1.0 : tmp0.x / tmp0.y
*/
{
/* How to deal with the special case dst_name.g == 0? if r != 0, then
* the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct
* result. But if r = 0.0, then 0 * inf = 0, which is incorrect.
*/
}
/** Handles transforming all WINED3DSIO_M?x? opcodes for
{
int i;
int nComponents = 0;
/* Set constants for the temporary argument */
switch(ins->handler_idx)
{
case WINED3DSIH_M4x4:
nComponents = 4;
break;
case WINED3DSIH_M4x3:
nComponents = 3;
break;
case WINED3DSIH_M3x4:
nComponents = 4;
break;
case WINED3DSIH_M3x3:
nComponents = 3;
break;
case WINED3DSIH_M3x2:
nComponents = 2;
break;
default:
break;
}
for (i = 0; i < nComponents; i++) {
}
}
{
const char *instruction;
switch(ins->handler_idx)
{
default: instruction = "";
break;
}
{
/* Dx sdk says .x is used if no swizzle is given, but our test shows that
* .w is used
*/
}
}
{
{
}
else
{
/* dst.w = src[0].w * 1 / (src.x^2 + src.y^2 + src.z^2)^(1/2) according to msdn*/
src_name);
}
}
{
/* ARB_fragment_program has a convenient LRP instruction */
return;
}
}
{
/* This instruction exists in ARB, but the d3d instruction takes two extra parameters which
* must contain fixed constants. So we need a separate function to filter those constants and
* can't use map2gl
*/
/* No modifiers are supported on SCS */
{
}
/* Sincos writemask must be .x, .y or .xy */
} else {
/* Approximate sine and cosine with a taylor series, as per math textbook. The application passes 8
* helper constants(D3DSINCOSCONST1 and D3DSINCOSCONST2) in src1 and src2.
*
* sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ...
* cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + ...
*
* The constants we get are:
*
* +1 +1, -1 -1 +1 +1 -1 -1
* ---- , ---- , ---- , ----- , ----- , ----- , ------
* 1!*2 2!*4 3!*8 4!*16 5!*32 6!*64 7!*128
*
* If used with x^2, x^3, x^4 etc they calculate sin(x/2) and cos(x/2):
*
* (x/2)^2 = x^2 / 4
* (x/2)^3 = x^3 / 8
* (x/2)^4 = x^4 / 16
* (x/2)^5 = x^5 / 32
* etc
*
* To get the final result:
* sin(x) = 2 * sin(x/2) * cos(x/2)
* cos(x) = cos(x/2)^2 - sin(x/2)^2
* (from sin(x+y) and cos(x+y) rules)
*
* As per MSDN, dst.z is undefined after the operation, and so is
* dst.x and dst.y if they're masked out by the writemask. Ie
* sincos dst.y, src1, c0, c1
* returns the sine in dst.y. dst.x and dst.z are undefined, dst.w is not touched. The assembler
* vsa.exe also stops with an error if the dest register is the same register as the source
* register. This means we can use dest.xyz as temporary storage. The assembler vsa.exe output also
* indicates that sincos consumes 8 instruction slots in vs_2_0(and, strangely, in vs_3_0).
*/
/* sin(x/2)
*
* Unfortunately we don't get the constants in a DP4-capable form. Is there a way to
* properly merge that with MULs in the code above?
* The swizzles .yz and xw however fit into the .yzxw swizzle added to ps_2_0. Maybe
* we can merge the sine and cosine MAD rows to calculate them together.
*/
/* cos(x/2) */
shader_addline(buffer, "MAD TA.y, %s.x, %s.y, %s.z;\n", dst_name, src_name2, src_name2); /* -1/(2!*4), +1.0 */
/* cos x */
}
/* sin x */
}
}
}
{
/* SGN is only valid in vertex shaders */
return;
}
/* If SRC > 0.0, -SRC < SRC = TRUE, otherwise false.
* if SRC < 0.0, SRC < -SRC = TRUE. If neither is true, src = 0.0
*/
} else {
/* src contains TA? Write to the dest first. This won't overwrite our destination.
* Then use TA, and calculate the final result
*
* Not reading from TA? Store the first result in TA to avoid overwriting the
* destination if src reg = dst reg
*/
{
}
else
{
}
}
}
{
shader_addline(buffer, "MUL%s %s, %s, ycorrection.y;\n", shader_arb_get_modifier(ins), dst, dst_name);
}
{
switch(mod)
{
case WINED3DSPSM_NONE: return WINED3DSPSM_ABS;
case WINED3DSPSM_NEG: return WINED3DSPSM_ABS;
case WINED3DSPSM_ABS: return WINED3DSPSM_ABS;
case WINED3DSPSM_ABSNEG: return WINED3DSPSM_ABS;
}
return mod;
}
{
const char *instr;
switch(ins->handler_idx)
{
default:
return;
}
/* LOG, LOGP and POW operate on the absolute value of the input */
if(need_abs)
{
if(arg2)
{
}
else
{
}
}
else if(arg2)
{
}
else
{
}
}
{
/* src0 is aL */
if(vshader)
{
/* The constant loader makes sure to load -1 into iX.w */
}
else
{
}
}
{
/* The constant loader makes sure to load -1 into iX.w */
if(vshader)
{
}
else
{
}
}
{
if(vshader)
{
}
else
{
}
}
{
if(vshader)
{
}
else
{
}
}
{
{
}
ERR("Could not find loop for break\n");
return NULL;
}
{
if(vshader)
{
}
else
{
}
}
{
switch (flags)
{
case COMPARISON_GT: return "GT";
case COMPARISON_EQ: return "EQ";
case COMPARISON_GE: return "GE";
case COMPARISON_LT: return "LT";
case COMPARISON_NE: return "NE";
case COMPARISON_LE: return "LE";
default:
return "(\?\?)";
}
}
{
switch (flags)
{
case COMPARISON_GT: return COMPARISON_LE;
case COMPARISON_EQ: return COMPARISON_NE;
case COMPARISON_GE: return COMPARISON_LT;
case COMPARISON_LT: return COMPARISON_GE;
case COMPARISON_NE: return COMPARISON_EQ;
case COMPARISON_LE: return COMPARISON_GT;
default:
return -1;
}
}
{
if(vshader)
{
/* SUBC CC, src0, src1" works only in pixel shaders, so use TA to throw
* away the subtraction result
*/
}
else
{
}
}
{
const char *comp;
if(vshader)
{
/* Invert the flag. We jump to the else label if the condition is NOT true */
}
else
{
}
}
{
if(vshader)
{
}
else
{
}
}
{
if(vshader)
{
if(control_frame->had_else)
{
}
else
{
}
}
else
{
}
}
{
}
{
}
{
/* Call instructions activate the NV extensions, not labels and rets. If there is an uncalled
* subroutine, don't generate a label that will make GL complain
*/
}
static void vshader_add_footer(IWineD3DVertexShaderImpl *This, struct wined3d_shader_buffer *buffer,
{
unsigned int i;
/* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used
* or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE),
* the fog frag coord is thrown away. If the fog frag coord is used, but not written by
* the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0)
*/
/* posFixup.x is always 1.0, so we can savely use it */
}
/* Write the final position.
*
* OpenGL coordinates specify the center of the pixel while d3d coords specify
* the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
* 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
* contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that.
*/
{
{
for(i = 0; i < priv_ctx->vs_clipplanes; i++)
{
}
}
}
{
unsigned int cur_clip = 0;
{
{
}
}
switch(cur_clip)
{
case 0:
break;
case 1:
break;
case 2:
break;
case 3:
break;
}
}
/* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
* and the glsl equivalent
*/
if(need_helper_const(gl_info)) {
} else {
}
}
{
if(vshader)
{
if(priv->in_main_func) vshader_add_footer((IWineD3DVertexShaderImpl *) shader, buffer, priv->cur_vs_args, priv);
}
}
{
}
/* GL locking is done by the caller */
{
const char *blt_vprogram =
"!!ARBvp1.0\n"
"PARAM c[1] = { { 1, 0.5 } };\n"
"MOV result.position, vertex.position;\n"
"MOV result.color, c[0].x;\n"
"END\n";
checkGLcall("glProgramStringARB()");
if (pos != -1)
{
}
else
{
GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native));
checkGLcall("glGetProgramivARB()");
}
return program_id;
}
/* GL locking is done by the caller */
static GLuint create_arb_blt_fragment_program(const struct wined3d_gl_info *gl_info, enum tex_types tex_type)
{
{
/* tex_1d */
NULL,
/* tex_2d */
"!!ARBfp1.0\n"
"TEMP R0;\n"
"TEX R0.x, fragment.texcoord[0], texture[0], 2D;\n"
"MOV result.depth.z, R0.x;\n"
"END\n",
/* tex_3d */
NULL,
/* tex_cube */
"!!ARBfp1.0\n"
"TEMP R0;\n"
"TEX R0.x, fragment.texcoord[0], texture[0], CUBE;\n"
"MOV result.depth.z, R0.x;\n"
"END\n",
/* tex_rect */
"!!ARBfp1.0\n"
"TEMP R0;\n"
"TEX R0.x, fragment.texcoord[0], texture[0], RECT;\n"
"MOV result.depth.z, R0.x;\n"
"END\n",
};
if (!blt_fprograms[tex_type])
{
}
checkGLcall("glProgramStringARB()");
if (pos != -1)
{
}
else
{
GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native));
checkGLcall("glGetProgramivARB()");
}
return program_id;
}
{
/* Perform sRGB write correction. See GLX_EXT_framebuffer_sRGB */
if(condcode)
{
/* Sigh. MOVC CC doesn't work, so use one of the temps as dummy dest */
/* Calculate the > 0.0031308 case */
/* Calculate the < case */
}
else
{
/* Calculate the > 0.0031308 case */
/* Calculate the < case */
/* Get 1.0 / 0.0 masks for > 0.0031308 and < 0.0031308 */
/* Store the components > 0.0031308 in the destination */
/* Add the components that are < 0.0031308 */
/* Move everything into result.color at once. Nvidia hardware cannot handle partial
* result.color writes(.rgb first, then .a), or handle overwriting already written
* components. The assembler uses a temporary register in this case, which is usually
* not allocated from one of our registers that were used earlier.
*/
}
/* [0.0;1.0] clamping. Not needed, this is done implicitly */
}
{
{
{
}
}
return NULL;
}
static void init_ps_input(const IWineD3DPixelShaderImpl *This, const struct arb_ps_compile_args *args,
struct shader_arb_ctx_priv *priv)
{
{
};
unsigned int i;
const char *semantic_name;
{
case pretransformed:
case fixedfunction:
/* The pixelshader has to collect the varyings on its own. In any case properly load
* color0 and color1. In the case of pretransformed vertices also load texcoords. Set
* other attribs to 0.0.
*
* For fixedfunction this behavior is correct, according to the tests. For pretransformed
* we'd either need a replacement shader that can load other attribs like BINORMAL, or
* load the texcoord attrib pointers to match the pixel shader signature
*/
for(i = 0; i < MAX_REG_INPUT; i++)
{
if(semantic_name == NULL) continue;
{
}
{
}
{
}
{
}
else
{
}
}
break;
case vertexshader:
/* That one is easy. The vertex shaders provide v0-v7 in fragment.texcoord and v8 and v9 in
* fragment.color
*/
for(i = 0; i < 8; i++)
{
}
break;
}
}
/* GL locking is done by the caller */
static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This, struct wined3d_shader_buffer *buffer,
{
const struct wined3d_gl_info *gl_info = &((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info;
unsigned int i, found = 0;
{
if (!(map & 1)
continue;
++found;
if (found == 4) break;
}
switch(found) {
case 0:
break;
case 1:
break;
case 2:
break;
case 3:
break;
}
/* Create the hw ARB shader */
/* Avoid enabling NV_fragment_program* if we do not need it.
*
* Enabling GL_NV_fragment_program_option causes the driver to occupy a temporary register,
* and it slows down the shader execution noticeably(about 5%). Usually our instruction emulation
* is faster than what we gain from using higher native instructions. There are some things though
* that cannot be emulated. In that case enable the extensions.
* If the extension is enabled, instruction handlers that support both ways will use it.
*
* Testing shows no performance difference between OPTION NV_fragment_program2 and NV_fragment_program.
* So enable the best we can get.
*/
{
want_nv_prog = TRUE;
}
{
}
{
} else {
if(want_nv_prog)
{
/* This is an error - either we're advertising the wrong shader version, or aren't enforcing some
* limits properly
*/
ERR("The shader requires instructions that are not available in plain GL_ARB_fragment_program\n");
ERR("Try GLSL\n");
}
}
{
}
{
case FOG_OFF:
break;
case FOG_LINEAR:
break;
case FOG_EXP:
break;
case FOG_EXP2:
break;
}
}
/* For now always declare the temps. At least the Nvidia assembler optimizes completely
* unused temps away(but occupies them for the whole shader if they're used once). Always
* declaring them avoids tricky bookkeeping work
*/
{
} else {
if(This->color0_mov) {
} else {
}
} else {
}
}
}
/* Base Declarations */
{
if (!(map & 1)) continue;
/* We can fit the constants into the constant limit for sure because texbem, texbeml, bem and beml are only supported
* in 1.x shaders, and GL_ARB_fragment_program has a constant limit of 24 constants. So in the worst case we're loading
* 8 shader constants, 8 bump matrices and 8 luminance parameters and are perfectly fine. (No NP2 fixup on bumpmapped
* textures due to conditional NP2 restrictions)
*
* Use local constants to load the bump env parameters, not program.env. This avoids collisions with d3d constants of
* shaders in newer shader models. Since the bump env parameters have to share their space with NP2 fixup constants,
* their location is shader dependent anyway and they cannot be loaded globally.
*/
}
for(i = 0; i < MAX_CONST_I; i++)
{
{
if(control_values)
{
}
else
{
}
}
}
{
{
/* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen).
* ycorrection.y: -1.0(onscreen), 1.0(offscreen)
* ycorrection.z: 1.0
* ycorrection.w: 0.0
*/
}
}
else
{
}
/* Load constants to fixup NP2 texcoords if there are still free constants left:
* Constants (texture dimensions) for the NP2 fixup are loaded as local program parameters. This will consume
* at most 8 (MAX_FRAGMENT_SAMPLERS / 2) parameters, which is highly unlikely, since the application had to
* use 16 NP2 textures at the same time. In case that we run out of constants the fixup is simply not
* applied / activated. This will probably result in wrong rendering of the texture, but will save us from
* shader compilation errors and the subsequent errors when drawing with this shader. */
cur = 0;
for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) {
if (!(map & (1 << i))) continue;
} else {
FIXME("No free constant found to load NP2 fixup data into shader. "
"Sampling from this texture will probably look wrong.\n");
break;
}
}
}
}
{
}
/* Base Shader Body */
}
}
/* TODO: change to resource.glObjectHandle or something like that */
/* Create the program and check for errors */
checkGLcall("glProgramStringARB()");
if (errPos != -1)
{
FIXME("HW PixelShader Error at position %d: %s\n\n",
retval = 0;
}
else
{
GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native));
checkGLcall("glGetProgramivARB()");
}
/* Load immediate constants */
if(lconst_map) {
checkGLcall("glProgramLocalParameter4fvARB");
}
}
return retval;
}
static int compare_sig(const struct wined3d_shader_signature_element *sig1, const struct wined3d_shader_signature_element *sig2)
{
unsigned int i;
int ret;
for(i = 0; i < MAX_REG_INPUT; i++)
{
{
/* Compare pointers, not contents. One string is NULL(element does not exist), the other one is not NULL */
if(sig1[i].semantic_name != sig2[i].semantic_name) return sig1[i].semantic_name < sig2[i].semantic_name ? -1 : 1;
continue;
}
if(sig1[i].semantic_idx != sig2[i].semantic_idx) return sig1[i].semantic_idx < sig2[i].semantic_idx ? -1 : 1;
if(sig1[i].sysval_semantic != sig2[i].sysval_semantic) return sig1[i].sysval_semantic < sig2[i].sysval_semantic ? -1 : 1;
if(sig1[i].component_type != sig2[i].component_type) return sig1[i].sysval_semantic < sig2[i].component_type ? -1 : 1;
if(sig1[i].register_idx != sig2[i].register_idx) return sig1[i].register_idx < sig2[i].register_idx ? -1 : 1;
}
return 0;
}
static struct wined3d_shader_signature_element *clone_sig(const struct wined3d_shader_signature_element *sig)
{
int i;
char *name;
for(i = 0; i < MAX_REG_INPUT; i++)
{
{
continue;
}
/* Clone the semantic string */
}
return new;
}
static DWORD find_input_signature(struct shader_arb_priv *priv, const struct wined3d_shader_signature_element *sig)
{
{
}
{
ERR("Failed to insert program entry.\n");
}
}
static void init_output_registers(IWineD3DVertexShaderImpl *shader, DWORD sig_num, struct shader_arb_ctx_priv *priv_ctx,
struct arb_vs_compiled_shader *compiled)
{
unsigned int i, j;
{
};
const char *semantic_name;
* and varying 9 to result.color.secondary
*/
{
"result.color.primary", "result.color.secondary"
};
if(sig_num == ~0)
{
TRACE("Pixel shader uses builtin varyings\n");
/* Map builtins to builtins */
for(i = 0; i < 8; i++)
{
}
for (i = 0; i < (sizeof(baseshader->output_signature) / sizeof(*baseshader->output_signature)); ++i)
{
if(semantic_name == NULL) continue;
{
TRACE("o%u is TMP_OUT\n", i);
}
{
TRACE("o%u is result.pointsize\n", i);
}
{
}
{
}
{
TRACE("o%u is result.fogcoord\n", i);
}
else
{
}
}
return;
}
/* Instead of searching for the signature in the signature list, read the one from the current pixel shader.
* Its maybe not the shader where the signature came from, but it is the same signature and faster to find
*/
TRACE("Pixel shader uses declared varyings\n");
for(i = 0; i < 8; i++)
{
}
for(i = 0; i < MAX_REG_INPUT; i++)
{
if(semantic_name == NULL) continue;
/* If a declared input register is not written by builtin arguments, don't write to it.
* GL_NV_vertex_program makes sure the input defaults to 0.0, which is correct with D3D
*
* Don't care about POSITION and PSIZE here - this is a builtin vertex shader, position goes
* to TMP_OUT in any case
*/
{
}
{
}
{
}
else
{
continue;
}
{
}
}
/* Map declared to declared */
for (i = 0; i < (sizeof(baseshader->output_signature) / sizeof(*baseshader->output_signature)); ++i)
{
/* Write unread output to TA to throw them away */
if(semantic_name == NULL)
{
continue;
}
{
continue;
}
{
continue;
}
for(j = 0; j < MAX_REG_INPUT; j++)
{
{
continue;
}
{
{
}
}
}
}
}
/* GL locking is done by the caller */
static GLuint shader_arb_generate_vshader(IWineD3DVertexShaderImpl *This, struct wined3d_shader_buffer *buffer,
{
unsigned int i;
/* Create the hw ARB shader */
/* Always enable the NV extension if available. Unlike fragment shaders, there is no
* mesurable performance penalty, and we can always make use of it for clipplanes.
*/
{
}
{
} else {
}
if(need_helper_const(gl_info)) {
}
}
/* Base Declarations */
for(i = 0; i < MAX_CONST_I; i++)
{
{
if(control_values)
{
}
else
{
}
}
}
/* We need a constant to fixup the final position */
/* Initialize output parameters. GL_ARB_vertex_program does not require special initialization values
* for output parameters. D3D in theory does not do that either, but some applications depend on a
* proper initialization of the secondary color, and programs using the fixed function pipeline without
* a replacement shader depend on the texcoord.w being set properly.
*
* GL_NV_vertex_program defines that all output values are initialized to {0.0, 0.0, 0.0, 1.0}. This
* assertion is in effect even when using GL_ARB_vertex_program without any NV specific additions. So
* skip this if NV_vertex_program is supported. Otherwise, initialize the secondary color. For the tex-
* coords, we have a flag in the opengl caps. Many cards do not require the texcoord being set, and
* this can eat a number of instructions, so skip it unless this cap is set as well
*/
{
{
int i;
}
}
}
}
/* The shader starts with the main function */
/* Base Shader Body */
/* TODO: change to resource.glObjectHandle or something like that */
/* Create the program and check for errors */
checkGLcall("glProgramStringARB()");
if (errPos != -1)
{
FIXME("HW VertexShader Error at position %d: %s\n\n",
ret = -1;
}
else
{
GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native));
checkGLcall("glGetProgramivARB()");
/* Load immediate constants */
if(lconst_map) {
}
}
}
return ret;
}
/* GL locking is done by the caller */
{
UINT i;
{
shader->baseShader.backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
else shader_data->input_signature_idx = find_input_signature(priv, shader->baseShader.input_signature);
if (!device->vs_clipping)
else
shader_data->clipplane_emulation = ~0U;
}
/* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
* so a linear search is more performant than a hashmap or a binary search
* (cache coherency etc)
*/
for(i = 0; i < shader_data->num_gl_shaders; i++) {
return &shader_data->gl_shaders[i];
}
}
TRACE("No matching GL shader found, compiling a new shader\n");
if (shader_data->num_gl_shaders)
{
} else {
new_size = 1;
}
if(!new_array) {
ERR("Out of memory\n");
return 0;
}
}
if (!shader_buffer_init(&buffer))
{
ERR("Failed to initialize shader buffer.\n");
return 0;
}
}
static inline BOOL vs_args_equal(const struct arb_vs_compile_args *stored, const struct arb_vs_compile_args *new,
}
{
UINT i;
const struct wined3d_gl_info *gl_info = &((IWineD3DDeviceImpl *)shader->baseShader.device)->adapter->gl_info;
{
shader->baseShader.backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
}
/* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
* so a linear search is more performant than a hashmap or a binary search
* (cache coherency etc)
*/
for(i = 0; i < shader_data->num_gl_shaders; i++) {
{
return &shader_data->gl_shaders[i];
}
}
TRACE("No matching GL shader found, compiling a new shader\n");
if (shader_data->num_gl_shaders)
{
} else {
new_size = 1;
}
if(!new_array) {
ERR("Out of memory\n");
return 0;
}
}
if (!shader_buffer_init(&buffer))
{
ERR("Failed to initialize shader buffer.\n");
return 0;
}
}
static inline void find_arb_ps_compile_args(IWineD3DPixelShaderImpl *shader, IWineD3DStateBlockImpl *stateblock,
struct arb_ps_compile_args *args)
{
int i;
const struct wined3d_gl_info *gl_info = &((IWineD3DDeviceImpl *)shader->baseShader.device)->adapter->gl_info;
/* This forces all local boolean constants to 1 to make them stateblock independent */
for(i = 0; i < MAX_CONST_B; i++)
{
}
/* Only enable the clip plane emulation KIL if at least one clipplane is enabled. The KIL instruction
* is quite expensive because it forces the driver to disable early Z discards. It is cheaper to
* duplicate the shader than have a no-op KIL instruction in every shader
*/
{
}
else
{
}
/* Skip if unused or local, or supported natively */
int_skip = ~shader->baseShader.reg_maps.integer_constants | shader->baseShader.reg_maps.local_int_consts;
{
return;
}
for(i = 0; i < MAX_CONST_I; i++)
{
if(int_skip & (1 << i))
{
}
else
{
}
}
}
static inline void find_arb_vs_compile_args(IWineD3DVertexShaderImpl *shader, IWineD3DStateBlockImpl *stateblock,
struct arb_vs_compile_args *args)
{
int i;
if(use_ps(stateblock))
{
}
else
{
args->ps_signature = ~0;
if(!dev->vs_clipping)
{
}
/* Otherwise: Setting boolclip_compare set clip_texcoord to 0 */
}
{
{
}
/* clipplane_mask was set to 0 by setting boolclip_compare to 0 */
}
/* This forces all local boolean constants to 1 to make them stateblock independent */
/* TODO: Figure out if it would be better to store bool constants as bitmasks in the stateblock */
for(i = 0; i < MAX_CONST_B; i++)
{
}
/* Skip if unused or local */
int_skip = ~shader->baseShader.reg_maps.integer_constants | shader->baseShader.reg_maps.local_int_consts;
/* This is about flow control, not clipping. */
{
return;
}
for(i = 0; i < MAX_CONST_I; i++)
{
if(int_skip & (1 << i))
{
}
else
{
}
}
}
/* GL locking is done by the caller */
{
int i;
/* Deal with pixel shaders first so the vertex shader arg function has the input signature ready */
if (usePS) {
/* Bind the fragment program */
checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id);");
if(!priv->use_arbfp_fixed_func) {
/* Enable OpenGL fragment programs */
checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB);");
}
TRACE("(%p) : Bound fragment program %u and enabled GL_FRAGMENT_PROGRAM_ARB\n", This, priv->current_fprogram_id);
/* Pixel Shader 1.x constants are clamped to [-1;1], Pixel Shader 2.0 constants are not. If switching between
* a 1.x and newer shader, reload the first 8 constants
*/
if(priv->last_ps_const_clamped != ((struct arb_pshader_private *)ps->baseShader.backend_data)->clamp_consts)
{
priv->last_ps_const_clamped = ((struct arb_pshader_private *)ps->baseShader.backend_data)->clamp_consts;
for(i = 0; i < 8; i++)
{
}
/* Also takes care of loading local constants */
}
else
{
}
/* Force constant reloading for the NP2 fixup (see comment in shader_glsl_select for more info) */
}
{
/* Disable only if we're not using arbfp fixed function fragment processing. If this is used,
* keep GL_FRAGMENT_PROGRAM_ARB enabled, and the fixed function pipeline will bind the fixed function
* replacement shader
*/
checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
priv->current_fprogram_id = 0;
}
if (useVS) {
/* Bind the vertex program */
checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id);");
/* Enable OpenGL vertex programs */
checkGLcall("glEnable(GL_VERTEX_PROGRAM_ARB);");
TRACE("(%p) : Bound vertex program %u and enabled GL_VERTEX_PROGRAM_ARB\n", This, priv->current_vprogram_id);
{
checkGLcall("glClampColorARB");
} else {
FIXME("vertex color clamp needs to be changed, but extension not supported.\n");
}
}
}
{
priv->current_vprogram_id = 0;
checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)");
}
}
/* GL locking is done by the caller */
if (!priv->depth_blt_vprogram_id) priv->depth_blt_vprogram_id = create_arb_blt_vertex_program(gl_info);
}
/* GL locking is done by the caller */
if (priv->current_vprogram_id) {
checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vertexShader->prgId);");
TRACE("(%p) : Bound vertex program %u and enabled GL_VERTEX_PROGRAM_ARB\n", This, priv->current_vprogram_id);
} else {
checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)");
}
if (priv->current_fprogram_id) {
checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, pixelShader->prgId);");
TRACE("(%p) : Bound fragment program %u and enabled GL_FRAGMENT_PROGRAM_ARB\n", This, priv->current_fprogram_id);
} else if(!priv->use_arbfp_fixed_func) {
checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
}
}
{
UINT i;
if(!shader_data) return; /* This can happen if a shader was never compiled */
if (shader_data->num_gl_shaders)
{
ENTER_GL();
for (i = 0; i < shader_data->num_gl_shaders; ++i)
{
{
checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))");
}
else
{
WARN("Attempting to delete fprog %u created in ctx %p from ctx %p\n",
}
}
LEAVE_GL();
}
}
else
{
UINT i;
if(!shader_data) return; /* This can happen if a shader was never compiled */
if (shader_data->num_gl_shaders)
{
ENTER_GL();
for (i = 0; i < shader_data->num_gl_shaders; ++i)
{
{
checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))");
}
else
{
WARN("Attempting to delete vprog %u created in ctx %p from ctx %p\n",
}
}
LEAVE_GL();
}
}
}
{
}
{
};
{
ERR("RB tree init failed\n");
return E_OUTOFMEMORY;
}
return WINED3D_OK;
}
{
int i;
for(i = 0; i < MAX_REG_INPUT; i++)
{
}
}
/* Context activation is done by the caller. */
int i;
ENTER_GL();
if(priv->depth_blt_vprogram_id) {
}
for (i = 0; i < tex_type_count; ++i) {
if (priv->depth_blt_fprogram_id[i]) {
}
}
LEAVE_GL();
}
return TRUE;
}
{
DWORD vs_consts = min(gl_info->limits.arb_vs_float_constants, gl_info->limits.arb_vs_native_constants);
DWORD ps_consts = min(gl_info->limits.arb_ps_float_constants, gl_info->limits.arb_ps_native_constants);
/* We don't have an ARB fixed function pipeline yet, so let the none backend set its caps,
* then overwrite the shader specific ones
*/
{
{
}
else if (vs_consts >= 256)
{
/* Shader Model 2.0 requires at least 256 vertex shader constants */
}
else
{
}
}
{
{
}
else if (ps_consts >= 32)
{
/* Shader Model 2.0 requires at least 32 pixel shader constants */
}
else
{
}
}
}
{
{
TRACE("Checking support for color_fixup:\n");
}
/* We support everything except complex conversions. */
if (!is_complex_fixup(fixup))
{
TRACE("[OK]\n");
return TRUE;
}
TRACE("[FAILED]\n");
return FALSE;
}
if(shift == 0) return; /* Saturate alone is handled by the instructions */
/* Generate a line that does the output modifier computation
* FIXME: _SAT vs shift? _SAT alone is already handled in the instructions, if this
* maps problems in e.g. _d4_sat modify shader_arb_get_modifier
*/
}
{
/* WINED3DSIH_ABS */ shader_hw_map2gl,
/* WINED3DSIH_ADD */ shader_hw_map2gl,
/* WINED3DSIH_BEM */ pshader_hw_bem,
/* WINED3DSIH_BREAK */ shader_hw_break,
/* WINED3DSIH_BREAKC */ shader_hw_breakc,
/* WINED3DSIH_BREAKP */ NULL,
/* WINED3DSIH_CALL */ shader_hw_call,
/* WINED3DSIH_CALLNZ */ NULL,
/* WINED3DSIH_CMP */ pshader_hw_cmp,
/* WINED3DSIH_CND */ pshader_hw_cnd,
/* WINED3DSIH_CRS */ shader_hw_map2gl,
/* WINED3DSIH_CUT */ NULL,
/* WINED3DSIH_DCL */ NULL,
/* WINED3DSIH_DEF */ NULL,
/* WINED3DSIH_DEFB */ NULL,
/* WINED3DSIH_DEFI */ NULL,
/* WINED3DSIH_DP2ADD */ pshader_hw_dp2add,
/* WINED3DSIH_DP3 */ shader_hw_map2gl,
/* WINED3DSIH_DP4 */ shader_hw_map2gl,
/* WINED3DSIH_DST */ shader_hw_map2gl,
/* WINED3DSIH_DSX */ shader_hw_map2gl,
/* WINED3DSIH_DSY */ shader_hw_dsy,
/* WINED3DSIH_ELSE */ shader_hw_else,
/* WINED3DSIH_EMIT */ NULL,
/* WINED3DSIH_ENDIF */ shader_hw_endif,
/* WINED3DSIH_ENDLOOP */ shader_hw_endloop,
/* WINED3DSIH_ENDREP */ shader_hw_endrep,
/* WINED3DSIH_EXP */ shader_hw_scalar_op,
/* WINED3DSIH_EXPP */ shader_hw_scalar_op,
/* WINED3DSIH_FRC */ shader_hw_map2gl,
/* WINED3DSIH_IADD */ NULL,
/* WINED3DSIH_IFC */ shader_hw_ifc,
/* WINED3DSIH_IGE */ NULL,
/* WINED3DSIH_LABEL */ shader_hw_label,
/* WINED3DSIH_LIT */ shader_hw_map2gl,
/* WINED3DSIH_LOG */ shader_hw_log_pow,
/* WINED3DSIH_LOGP */ shader_hw_log_pow,
/* WINED3DSIH_LOOP */ shader_hw_loop,
/* WINED3DSIH_LRP */ shader_hw_lrp,
/* WINED3DSIH_LT */ NULL,
/* WINED3DSIH_M3x2 */ shader_hw_mnxn,
/* WINED3DSIH_M3x3 */ shader_hw_mnxn,
/* WINED3DSIH_M3x4 */ shader_hw_mnxn,
/* WINED3DSIH_M4x3 */ shader_hw_mnxn,
/* WINED3DSIH_M4x4 */ shader_hw_mnxn,
/* WINED3DSIH_MAD */ shader_hw_map2gl,
/* WINED3DSIH_MAX */ shader_hw_map2gl,
/* WINED3DSIH_MIN */ shader_hw_map2gl,
/* WINED3DSIH_MOV */ shader_hw_mov,
/* WINED3DSIH_MOVA */ shader_hw_mov,
/* WINED3DSIH_MUL */ shader_hw_map2gl,
/* WINED3DSIH_NOP */ shader_hw_nop,
/* WINED3DSIH_NRM */ shader_hw_nrm,
/* WINED3DSIH_PHASE */ NULL,
/* WINED3DSIH_POW */ shader_hw_log_pow,
/* WINED3DSIH_RCP */ shader_hw_scalar_op,
/* WINED3DSIH_REP */ shader_hw_rep,
/* WINED3DSIH_RET */ shader_hw_ret,
/* WINED3DSIH_RSQ */ shader_hw_scalar_op,
/* WINED3DSIH_SETP */ NULL,
/* WINED3DSIH_SGE */ shader_hw_map2gl,
/* WINED3DSIH_SGN */ shader_hw_sgn,
/* WINED3DSIH_SINCOS */ shader_hw_sincos,
/* WINED3DSIH_SLT */ shader_hw_map2gl,
/* WINED3DSIH_SUB */ shader_hw_map2gl,
/* WINED3DSIH_TEX */ pshader_hw_tex,
/* WINED3DSIH_TEXBEM */ pshader_hw_texbem,
/* WINED3DSIH_TEXBEML */ pshader_hw_texbem,
/* WINED3DSIH_TEXCOORD */ pshader_hw_texcoord,
/* WINED3DSIH_TEXDEPTH */ pshader_hw_texdepth,
/* WINED3DSIH_TEXDP3 */ pshader_hw_texdp3,
/* WINED3DSIH_TEXDP3TEX */ pshader_hw_texdp3tex,
/* WINED3DSIH_TEXKILL */ pshader_hw_texkill,
/* WINED3DSIH_TEXLDD */ shader_hw_texldd,
/* WINED3DSIH_TEXLDL */ shader_hw_texldl,
/* WINED3DSIH_TEXM3x2DEPTH */ pshader_hw_texm3x2depth,
/* WINED3DSIH_TEXM3x2PAD */ pshader_hw_texm3x2pad,
/* WINED3DSIH_TEXM3x2TEX */ pshader_hw_texm3x2tex,
/* WINED3DSIH_TEXM3x3 */ pshader_hw_texm3x3,
/* WINED3DSIH_TEXM3x3DIFF */ NULL,
/* WINED3DSIH_TEXM3x3PAD */ pshader_hw_texm3x3pad,
/* WINED3DSIH_TEXM3x3SPEC */ pshader_hw_texm3x3spec,
/* WINED3DSIH_TEXM3x3TEX */ pshader_hw_texm3x3tex,
/* WINED3DSIH_TEXM3x3VSPEC */ pshader_hw_texm3x3vspec,
/* WINED3DSIH_TEXREG2AR */ pshader_hw_texreg2ar,
/* WINED3DSIH_TEXREG2GB */ pshader_hw_texreg2gb,
/* WINED3DSIH_TEXREG2RGB */ pshader_hw_texreg2rgb,
};
static inline BOOL get_bool_const(const struct wined3d_shader_instruction *ins, IWineD3DBaseShaderImpl *This, DWORD idx)
{
{
/* What good is a if(bool) with a hardcoded local constant? I don't know, but handle it */
{
{
}
}
ERR("Local constant not found\n");
return FALSE;
}
else
{
}
}
{
/* Integer constants can either be a local constant, or they can be stored in the shader
* type specific compile args. */
{
{
{
/* Step is signed. */
return;
}
}
/* If this happens the flag was set incorrectly */
ERR("Local constant not found\n");
loop_control->count = 0;
loop_control->start = 0;
loop_control->step = 0;
return;
}
{
/* Count and aL start value are unsigned */
/* Step is signed. */
break;
break;
default:
break;
}
}
{
unsigned int i;
if(!rec)
{
ERR("Out of memory\n");
return;
}
{
}
{
{
}
}
return;
free:
ERR("Out of memory\n");
if(dst_param)
{
}
if(src_param)
{
{
}
}
}
{
unsigned int i;
{
{
}
{
{
}
}
}
}
{
{
priv->loop_depth++;
}
else
{
/* Don't bother recording when we're in a not used if branch */
{
return;
}
{
return; /* Instruction is handled */
}
/* Record this loop in the outer loop's recording */
}
}
{
{
/* Nothing to do. The control frame is popped after the HW instr handler */
}
else
{
if(control_frame->outer_loop)
{
/* Turn off recording before playback */
/* Move the recorded instructions to a separate list and get them out of the private data
* structure. If there are nested loops, the shader_arb_handle_instruction below will
* be recorded again, thus priv->record might be overwritten
*/
{
}
else
{
}
{
{
}
else
{
}
{
}
{
}
}
return; /* Instruction is handled */
}
else
{
/* This is a nested loop. Proceed to the normal recording function */
}
}
}
{
return;
}
/* boolean if */
{
{
}
return; /* Instruction is handled */
}
{
/* IF(bool) and if_cond(a, b) use the same ELSE and ENDIF tokens */
}
{
{
{
}
return; /* Instruction is handled. */
}
/* In case of an ifc, generate a HW shader instruction */
}
{
{
return; /* Instruction is handled */
}
}
/* Select handler */
/* Unhandled opcode */
if (!hw_fct)
{
return;
}
{
priv->loop_depth--;
}
{
/* Non-ifc ENDIFs don't reach that place because of the return in the if block above */
}
}
};
/* ARB_fragment_program fixed function pipeline replacement definitions */
#define ARB_FFP_CONST_TFACTOR 0
struct arbfp_ffp_desc
{
unsigned int num_textures_used;
};
/* Context activation is done by the caller. */
ENTER_GL();
if(enable) {
checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)");
} else {
checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
}
LEAVE_GL();
}
/* Share private data between the shader backend and the pipeline replacement, if both
* are the arb implementation. This is needed to figure out whether ARBfp should be disabled
* if no pixel shader is bound or not
*/
} else {
This->fragment_priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct shader_arb_priv));
}
{
ERR("Failed to initialize rbtree.\n");
return E_OUTOFMEMORY;
}
return WINED3D_OK;
}
/* Context activation is done by the caller. */
{
ENTER_GL();
checkGLcall("glDeleteProgramsARB(1, &entry_arb->shader)");
LEAVE_GL();
}
/* Context activation is done by the caller. */
}
}
{
/* TODO: Implement WINED3DTEXOPCAPS_PREMODULATE */
}
static void state_texfactor_arbfp(DWORD state, IWineD3DStateBlockImpl *stateblock, struct wined3d_context *context)
{
/* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite
* application provided constants
*/
if (use_ps(stateblock)) return;
}
checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, col)");
}
static void state_arb_specularenable(DWORD state, IWineD3DStateBlockImpl *stateblock, struct wined3d_context *context)
{
/* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite
* application provided constants
*/
if (use_ps(stateblock)) return;
device->highest_dirty_ps_const = max(device->highest_dirty_ps_const, ARB_FFP_CONST_SPECULAR_ENABLE + 1);
}
/* The specular color has no alpha */
} else {
}
GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col));
checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col)");
}
static void set_bumpmat_arbfp(DWORD state, IWineD3DStateBlockImpl *stateblock, struct wined3d_context *context)
{
if (use_ps(stateblock))
{
if (stage != 0
&& (((IWineD3DPixelShaderImpl *)stateblock->pixelShader)->baseShader.reg_maps.bumpmat & (1 << stage)))
{
/* The pixel shader has to know the bump env matrix. Do a constants update if it isn't scheduled
* anyway
*/
}
/* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants */
return;
}
device->highest_dirty_ps_const = max(device->highest_dirty_ps_const, ARB_FFP_CONST_BUMPMAT(stage) + 1);
}
GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0]));
checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0])");
}
static void tex_bumpenvlum_arbfp(DWORD state, IWineD3DStateBlockImpl *stateblock, struct wined3d_context *context)
{
if (use_ps(stateblock))
{
if (stage != 0
&& (((IWineD3DPixelShaderImpl *)stateblock->pixelShader)->baseShader.reg_maps.luminanceparams & (1 << stage)))
{
/* The pixel shader has to know the luminance offset. Do a constants update if it
* isn't scheduled anyway
*/
}
/* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants */
return;
}
device->highest_dirty_ps_const = max(device->highest_dirty_ps_const, ARB_FFP_CONST_LUMINANCE(stage) + 1);
}
GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param));
checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param)");
}
static const char *get_argreg(struct wined3d_shader_buffer *buffer, DWORD argnum, unsigned int stage, DWORD arg)
{
const char *ret;
switch(arg & WINED3DTA_SELECTMASK) {
case WINED3DTA_DIFFUSE:
ret = "fragment.color.primary"; break;
case WINED3DTA_CURRENT:
else ret = "ret";
break;
case WINED3DTA_TEXTURE:
switch(stage) {
case 0: ret = "tex0"; break;
default: ret = "unknown texture";
}
break;
case WINED3DTA_TFACTOR:
ret = "tfactor"; break;
case WINED3DTA_SPECULAR:
ret = "fragment.color.secondary"; break;
case WINED3DTA_TEMP:
ret = "tempreg"; break;
case WINED3DTA_CONSTANT:
FIXME("Implement perstage constants\n");
switch(stage) {
case 0: ret = "const0"; break;
default: ret = "unknown constant";
}
break;
default:
return "unknown";
}
if(arg & WINED3DTA_COMPLEMENT) {
}
if(arg & WINED3DTA_ALPHAREPLICATE) {
}
return ret;
}
{
else dstmask = ".w";
else dstreg = "ret";
switch(op) {
case WINED3DTOP_DISABLE:
break;
case WINED3DTOP_SELECTARG2:
case WINED3DTOP_SELECTARG1:
break;
case WINED3DTOP_MODULATE4X:
mul = 2;
case WINED3DTOP_MODULATE2X:
mul *= 2;
dstreg = "ret";
}
case WINED3DTOP_MODULATE:
break;
case WINED3DTOP_ADDSIGNED2X:
mul = 2;
dstreg = "ret";
}
case WINED3DTOP_ADDSIGNED:
arg2 = "arg2";
case WINED3DTOP_ADD:
break;
case WINED3DTOP_SUBTRACT:
break;
case WINED3DTOP_ADDSMOOTH:
break;
break;
break;
break;
break;
break;
/* D3DTOP_PREMODULATE ???? */
break;
break;
break;
break;
case WINED3DTOP_DOTPRODUCT3:
mul = 4;
dstreg = "ret";
}
break;
case WINED3DTOP_MULTIPLYADD:
break;
case WINED3DTOP_LERP:
/* The msdn is not quite right here */
break;
case WINED3DTOP_BUMPENVMAP:
/* Those are handled in the first pass of the shader(generation pass 1 and 2) already */
break;
default:
}
if(mul == 2) {
shader_addline(buffer, "MUL_SAT %s%s, %s, const.y;\n", mul_final_dest ? "result.color" : dstreg, dstmask, dstreg);
} else if(mul == 4) {
shader_addline(buffer, "MUL_SAT %s%s, %s, const.z;\n", mul_final_dest ? "result.color" : dstreg, dstmask, dstreg);
}
}
/* The stateblock is passed for GLINFO_LOCATION */
static GLuint gen_arbfp_ffp_shader(const struct ffp_frag_settings *settings, IWineD3DStateBlockImpl *stateblock)
{
unsigned int stage;
const char *textype;
/* Find out which textures are read */
}
tfactor_used = TRUE;
}
tfactor_used = TRUE;
}
tempreg_used = TRUE;
}
tempreg_used = TRUE;
}
tfactor_used = TRUE;
}
}
/* Shader header */
if (!shader_buffer_init(&buffer))
{
ERR("Failed to initialize shader buffer.\n");
return 0;
}
case FOG_OFF: break;
}
shader_addline(&buffer, "PARAM bumpmat%u = program.env[%u];\n", stage, ARB_FFP_CONST_BUMPMAT(stage));
if(!luminance_used[stage]) continue;
shader_addline(&buffer, "PARAM luminance%u = program.env[%u];\n", stage, ARB_FFP_CONST_LUMINANCE(stage));
}
if(tfactor_used) {
}
shader_addline(&buffer, "PARAM specular_enable = program.env[%u];\n", ARB_FFP_CONST_SPECULAR_ENABLE);
if(settings->sRGB_write) {
}
if(ffp_clip_emul(stateblock) && settings->emul_clipplanes) shader_addline(&buffer, "KIL fragment.texcoord[7];\n");
/* Generate texture sampling instructions) */
default: textype = "unexpected_textype"; break;
}
sat = "";
} else {
sat = "_SAT";
}
instr = "TEX";
instr = "TXP";
} else {
instr = "TXP";
}
if(stage > 0 &&
/* with projective textures, texbem only divides the static texture coord, not the displacement,
* so multiply the displacement with the dividing parameter before passing it to TXP
*/
shader_addline(&buffer, "MUL ret.xyz, ret, fragment.texcoord[%u].w, fragment.texcoord[%u];\n", stage, stage);
} else {
shader_addline(&buffer, "MAD ret.xyz, ret, fragment.texcoord[%u].z, fragment.texcoord[%u];\n", stage, stage);
}
} else {
}
}
} else {
}
}
/* Generate the main shader */
if(stage == 0) {
final_combiner_src = "fragment.color.primary";
}
break;
}
} else {
}
if(stage == 0) {
}
} else if(op_equal) {
} else {
}
}
if(settings->sRGB_write) {
shader_addline(&buffer, "MAD ret, fragment.color.secondary, specular_enable, %s;\n", final_combiner_src);
} else {
shader_addline(&buffer, "MAD result.color, fragment.color.secondary, specular_enable, %s;\n", final_combiner_src);
}
/* Footer */
/* Generate the shader */
checkGLcall("glProgramStringARB()");
if (pos != -1)
{
}
else
{
GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native));
checkGLcall("glGetProgramivARB()");
}
return ret;
}
static void fragment_prog_arbfp(DWORD state, IWineD3DStateBlockImpl *stateblock, struct wined3d_context *context)
{
unsigned int i;
if(!use_pshader && device->shader_backend == &arb_program_shader_backend && context->last_was_pshader) {
/* Reload fixed function constants since they collide with the pixel shader constants */
for(i = 0; i < MAX_TEXTURES; i++) {
}
} else if(use_pshader && !isStateDirty(context, device->StateTable[STATE_VSHADER].representative)) {
}
return;
}
if(!use_pshader) {
/* Find or create a shader implementing the fixed function pipeline settings, then activate it */
if(!desc) {
if (!new_desc)
{
ERR("Out of memory\n");
return;
}
new_desc->num_textures_used = 0;
{
new_desc->num_textures_used = i;
}
}
/* Now activate the replacement program. GL_FRAGMENT_PROGRAM_ARB is already active(however, note the
* comment above the shader_select call below). If e.g. GLSL is active, the shader_select call will
* deactivate it.
*/
checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader)");
/* Reload fixed function constants since they collide with the pixel shader constants */
for(i = 0; i < MAX_TEXTURES; i++) {
}
}
} else {
}
/* Finally, select the shader. If a pixel shader is used, it will be set and enabled by the shader backend.
* If this shader backend is arbfp(most likely), then it will simply overwrite the last fixed function replace-
* ment shader. If the shader backend is not ARB, it currently is important that the opengl implementation
* type overwrites GL_ARB_fragment_program. This is currently the case with GLSL. If we really want to use
* atifs or nvrc pixel shaders with arb fragment programs we'd have to disable GL_FRAGMENT_PROGRAM_ARB here
*
* Don't call shader_select if the vertex shader is dirty, because it will be called later on by the vertex
* shader handler
*/
}
}
/* We can't link the fog states to the fragment state directly since the vertex pipeline links them
* to FOGENABLE. A different linking in different pipeline parts can't be expressed in the combined
* state table, so we need to handle that with a forwarding function. The other invisible side effect
* is that changing the fog start and fog end(which links to FOGENABLE in vertex) results in the
* fragment_prog_arbfp function being called because FOGENABLE is dirty, which calls this function here
*/
static void state_arbfp_fog(DWORD state, IWineD3DStateBlockImpl *stateblock, struct wined3d_context *context)
{
}
if(use_vs(stateblock)) {
} else {
} else {
}
}
} else {
}
}
}
static void textransform(DWORD state, IWineD3DStateBlockImpl *stateblock, struct wined3d_context *context)
{
}
}
{STATE_RENDER(WINED3DRS_TEXTUREFACTOR), { STATE_RENDER(WINED3DRS_TEXTUREFACTOR), state_texfactor_arbfp }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT00), { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT01), { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT10), { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT11), { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVLSCALE), { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVLSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVLOFFSET), { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVLSCALE), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT00), { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT01), { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT10), { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT11), { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVLSCALE), { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVLSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVLOFFSET), { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVLSCALE), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT00), { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT01), { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT10), { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT11), { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVLSCALE), { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVLSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVLOFFSET), { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVLSCALE), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT00), { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT01), { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT10), { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT11), { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVLSCALE), { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVLSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVLOFFSET), { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVLSCALE), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT00), { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT01), { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT10), { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT11), { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVLSCALE), { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVLSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVLOFFSET), { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVLSCALE), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT00), { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT01), { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT10), { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT11), { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVLSCALE), { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVLSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVLOFFSET), { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVLSCALE), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT00), { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT01), { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT10), { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT11), { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVLSCALE), { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVLSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVLOFFSET), { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVLSCALE), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT00), { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT01), { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT10), { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT11), { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVLSCALE), { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVLSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVLOFFSET), { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVLSCALE), NULL }, WINED3D_GL_EXT_NONE },
{STATE_RENDER(WINED3DRS_FOGENABLE), { STATE_RENDER(WINED3DRS_FOGENABLE), state_arbfp_fog }, WINED3D_GL_EXT_NONE },
{STATE_RENDER(WINED3DRS_FOGTABLEMODE), { STATE_RENDER(WINED3DRS_FOGENABLE), NULL }, WINED3D_GL_EXT_NONE },
{STATE_RENDER(WINED3DRS_FOGVERTEXMODE), { STATE_RENDER(WINED3DRS_FOGENABLE), NULL }, WINED3D_GL_EXT_NONE },
{STATE_RENDER(WINED3DRS_FOGSTART), { STATE_RENDER(WINED3DRS_FOGSTART), state_fogstartend }, WINED3D_GL_EXT_NONE },
{STATE_RENDER(WINED3DRS_FOGCOLOR), { STATE_RENDER(WINED3DRS_FOGCOLOR), state_fogcolor }, WINED3D_GL_EXT_NONE },
{STATE_RENDER(WINED3DRS_FOGDENSITY), { STATE_RENDER(WINED3DRS_FOGDENSITY), state_fogdensity }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(0,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(0, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(1,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(1, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(2,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(2, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(3,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(3, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(4,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(4, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(5,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(5, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(6,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(6, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform }, WINED3D_GL_EXT_NONE },
{STATE_TEXTURESTAGE(7,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(7, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform }, WINED3D_GL_EXT_NONE },
{STATE_RENDER(WINED3DRS_SPECULARENABLE), { STATE_RENDER(WINED3DRS_SPECULARENABLE), state_arb_specularenable}, WINED3D_GL_EXT_NONE },
{0 /* Terminate */, { 0, 0 }, WINED3D_GL_EXT_NONE },
};
TRUE /* We can disable projected textures */
};
struct arbfp_blit_priv {
};
ERR("Out of memory\n");
return E_OUTOFMEMORY;
}
return WINED3D_OK;
}
/* Context activation is done by the caller. */
ENTER_GL();
checkGLcall("Delete yuv and p8 programs");
LEAVE_GL();
}
{
char chroma;
if (fixup == COMPLEX_FIXUP_UYVY) {
chroma = 'x';
*luminance = 'w';
} else {
chroma = 'w';
*luminance = 'x';
}
switch(textype) {
default:
/* This is more tricky than just replacing the texture type - we have to navigate
* properly in the texture to find the correct chroma values
*/
FIXME("Implement yuv correction for non-2d, non-rect textures\n");
return FALSE;
}
/* First we have to read the chroma values. This means we need at least two pixels(no filtering),
* or 4 pixels(with filtering). To get the unmodified chromas, we have to rid ourselves of the
* filtering when we sample the texture.
*
* These are the rules for reading the chroma:
*
* Even pixel: Cr
* Even pixel: U
* Odd pixel: V
*
* So we have to get the sampling x position in non-normalized coordinates in integers
*/
if(textype != GL_TEXTURE_RECTANGLE_ARB) {
} else {
}
/* We must not allow filtering between pixel x and x+1, this would mix U and V
* Vertical filtering is ok. However, bear in mind that the pixel center is at
* 0.5, so add 0.5.
*/
/* Divide the x coordinate by 0.5 and get the fraction. This gives 0.25 and 0.75 for the
* even and odd pixels respectively
*/
/* Sample Pixel 1 */
/* Put the value into either of the chroma values */
/* Sample pixel 2. If we read an even pixel(SLT above returned 1), sample
* the pixel right to the current one. Otherwise, sample the left pixel.
* Bias and scale the SLT result to -1;1 and add it to the texcrd.x.
*/
/* Put the value into the other chroma */
/* TODO: If filtering is enabled, sample a 2nd pair of pixels left or right of
* the current one and lerp the two U and V values
*/
/* This gives the correctly filtered luminance value */
return TRUE;
}
{
const char *tex;
switch(textype) {
default:
FIXME("Implement yv12 correction for non-2d, non-rect textures\n");
return FALSE;
}
/* YV12 surfaces contain a WxH sized luminance plane, followed by a (W/2)x(H/2)
* V and a (W/2)x(H/2) U plane, each with 8 bit per pixel. So the effective
* bitdepth is 12 bits per pixel. Since the U and V planes have only half the
* pitch of the luminance plane, the packing into the gl texture is a bit
* unfortunate. If the whole texture is interpreted as luminance data it looks
* approximately like this:
*
* +----------------------------------+----
* | |
* | |
* | |
* | |
* | | 2
* | LUMINANCE | -
* | | 3
* | |
* | |
* | |
* | |
* +----------------+-----------------+----
* | | |
* | U even rows | U odd rows |
* | | | 1
* +----------------+------------------ -
* | | | 3
* | V even rows | V odd rows |
* | | |
* +----------------+-----------------+----
* | | |
* | 0.5 | 0.5 |
*
* So it appears as if there are 4 chroma images, but in fact the odd rows
* in the chroma images are in the same row as the even ones. So its is
* kinda tricky to read
*
* When reading from rectangle textures, keep in mind that the input y coordinates
* go from 0 to d3d_height, whereas the opengl texture height is 1.5 * d3d_height
*/
2.0f / 3.0f, 1.0f / 6.0f, (2.0f / 3.0f) + (1.0f / 6.0f), 1.0f / 3.0f);
/* the chroma planes have only half the width */
/* The first value is between 2/3 and 5/6th of the texture's height, so scale+bias
* the coordinate. Also read the right side of the image when reading odd lines
*
* Don't forget to clamp the y values in into the range, otherwise we'll get filtering
* bleeding
*/
if(textype == GL_TEXTURE_2D) {
/* Read odd lines from the right side(add size * 0.5 to the x coordinate */
shader_addline(buffer, "ADD texcrd2.x, texcrd2.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */
/* clamp, keep the half pixel origin in mind */
} else {
/* Read from [size - size+size/4] */
/* Read odd lines from the right side(add size * 0.5 to the x coordinate */
shader_addline(buffer, "ADD texcrd2.x, texcrd.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */
/* Make sure to read exactly from the pixel center */
/* Clamp */
}
/* Read the texture, put the result into the output register */
/* The other chroma value is 1/6th of the texture lower, from 5/6th to 6/6th
* No need to clamp because we're just reusing the already clamped value from above
*/
if(textype == GL_TEXTURE_2D) {
} else {
}
/* Sample the luminance value. It is in the top 2/3rd of the texture, so scale the y coordinate.
* Clamp the y coordinate to prevent the chroma values from bleeding into the sampled luminance
* values due to filtering
*/
if(textype == GL_TEXTURE_2D) {
/* Multiply the y coordinate by 2/3 and clamp it */
} else {
/* Reading from texture_rectangles is pretty straightforward, just use the unmodified
* texture coordinate. It is still a good idea to clamp it though, since the opengl texture
* is bigger
*/
}
*luminance = 'a';
return TRUE;
}
{
/* Shader header */
if (!shader_buffer_init(&buffer))
{
ERR("Failed to initialize shader buffer.\n");
return 0;
}
ENTER_GL();
LEAVE_GL();
if(!shader) {
return 0;
}
/* { 255/256, 0.5/255*255/256, 0, 0 } */
/* The alpha-component contains the palette index */
if(textype == GL_TEXTURE_RECTANGLE_ARB)
else
/* Scale the index by 255/256 and add a bias of '0.5' in order to sample in the middle */
/* Use the alpha-component as an index in the palette to get the final color */
ENTER_GL();
checkGLcall("glProgramStringARB()");
if (pos != -1)
{
}
if (textype == GL_TEXTURE_RECTANGLE_ARB)
else
LEAVE_GL();
return shader;
}
/* Context activation is done by the caller. */
{
ENTER_GL();
if (!priv->palette_texture)
/* Make sure we have discrete color levels. */
/* Upload the palette */
/* TODO: avoid unneeed uploads in the future by adding some SFLAG_PALETTE_DIRTY mechanism */
/* Switch back to unit 0 in which the 2D texture will be stored. */
LEAVE_GL();
}
/* Context activation is done by the caller. */
static GLuint gen_yuv_shader(IWineD3DDeviceImpl *device, enum complex_fixup yuv_fixup, GLenum textype)
{
char luminance_component;
/* Shader header */
if (!shader_buffer_init(&buffer))
{
ERR("Failed to initialize shader buffer.\n");
return 0;
}
ENTER_GL();
checkGLcall("GL_EXTCALL(glGenProgramsARB(1, &shader))");
checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)");
LEAVE_GL();
if(!shader) {
return 0;
}
/* The YUY2 and UYVY formats contain two pixels packed into a 32 bit macropixel,
* giving effectively 16 bit per pixel. The color consists of a luminance(Y) and
* two chroma(U and V) values. Each macropixel has two luminance values, one for
* each single pixel it contains, and one U and one V value shared between both
* pixels.
*
* The data is loaded into an A8L8 texture. With YUY2, the luminance component
* contains the luminance and alpha the chroma. With UYVY it is vice versa. Thus
* take the format into account when generating the read swizzles
*
* Reading the Y value is straightforward - just sample the texture. The hardware
* takes care of filtering in the horizontal and vertical direction.
*
* Reading the U and V values is harder. We have to avoid filtering horizontally,
* because that would mix the U and V values of one pixel or two adjacent pixels.
* Thus floor the texture coordinate and add 0.5 to get an unfiltered read,
* regardless of the filtering setting. Vertical filtering works automatically
* though - the U and V values of two rows are mixed nicely.
*
* Appart of avoiding filtering issues, the code has to know which value it just
* read, and where it can find the other one. To determine this, it checks if
* it sampled an even or odd pixel, and shifts the 2nd read accordingly.
*
* Handling horizontal filtering of U and V values requires reading a 2nd pair
* of pixels, extracting U and V and mixing them. This is not implemented yet.
*
* An alternative implementation idea is to load the texture as A8R8G8B8 texture,
* with width / 2. This way one read gives all 3 values, finding U and V is easy
* in an unfiltered situation. Finding the luminance on the other hand requires
* finding out if it is an odd or even pixel. The real drawback of this approach
* is filtering. This would have to be emulated completely in the shader, reading
* up two 2 packed pixels in up to 2 rows and interpolating both horizontally and
* vertically. Beyond that it would require adjustments to the texture handling
* code to deal with the width scaling
*/
switch (yuv_fixup)
{
case COMPLEX_FIXUP_UYVY:
case COMPLEX_FIXUP_YUY2:
{
return 0;
}
break;
case COMPLEX_FIXUP_YV12:
{
return 0;
}
break;
default:
return 0;
}
/* Calculate the final result. Formula is taken from
* http://www.fourcc.org/fccyvrgb.php. Note that the chroma
* ranges from -0.5 to 0.5
*/
shader_addline(&buffer, "MAD result.color.x, chroma.x, yuv_coef.x, luminance.%c;\n", luminance_component);
shader_addline(&buffer, "MAD result.color.z, chroma.y, yuv_coef.w, luminance.%c;\n", luminance_component);
ENTER_GL();
checkGLcall("glProgramStringARB()");
if (pos != -1)
{
}
else
{
GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native));
checkGLcall("glGetProgramivARB()");
}
LEAVE_GL();
switch (yuv_fixup)
{
case COMPLEX_FIXUP_YUY2:
break;
case COMPLEX_FIXUP_UYVY:
break;
case COMPLEX_FIXUP_YV12:
break;
default:
}
return shader;
}
/* Context activation is done by the caller. */
{
{
TRACE("Fixup:\n");
/* Don't bother setting up a shader for unconverted formats */
ENTER_GL();
checkGLcall("glEnable(textype)");
LEAVE_GL();
return WINED3D_OK;
}
switch(fixup)
{
case COMPLEX_FIXUP_YUY2:
break;
case COMPLEX_FIXUP_UYVY:
break;
case COMPLEX_FIXUP_YV12:
break;
case COMPLEX_FIXUP_P8:
break;
default:
ENTER_GL();
checkGLcall("glEnable(textype)");
LEAVE_GL();
return E_NOTIMPL;
}
ENTER_GL();
checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)");
checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)");
checkGLcall("glProgramLocalParameter4fvARB");
LEAVE_GL();
return WINED3D_OK;
}
/* Context activation is done by the caller. */
ENTER_GL();
checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
checkGLcall("glDisable(GL_TEXTURE_2D)");
{
checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
}
{
checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
}
LEAVE_GL();
}
static BOOL arbfp_blit_supported(const struct wined3d_gl_info *gl_info, enum blit_operation blit_op,
const struct wined3d_format_desc *src_format_desc,
const struct wined3d_format_desc *dst_format_desc)
{
if (blit_op != BLIT_OP_BLIT)
{
return FALSE;
}
{
TRACE("Checking support for fixup:\n");
}
{
TRACE("Destination fixups are not supported\n");
return FALSE;
}
{
TRACE("[OK]\n");
return TRUE;
}
/* We only support YUV conversions. */
{
TRACE("[FAILED]\n");
return FALSE;
}
switch(src_fixup)
{
case COMPLEX_FIXUP_YUY2:
case COMPLEX_FIXUP_UYVY:
case COMPLEX_FIXUP_YV12:
case COMPLEX_FIXUP_P8:
TRACE("[OK]\n");
return TRUE;
default:
TRACE("[FAILED]\n");
return FALSE;
}
}
HRESULT arbfp_blit_surface(IWineD3DDeviceImpl *device, IWineD3DSurfaceImpl *src_surface, const RECT *src_rect,
{
/* Now load the surface */
/* Activate the destination context, set it up for blitting */
/* The coordinates of the ddraw front buffer are always fullscreen ('screen coordinates',
* while OpenGL coordinates are window relative.
* Also beware of the origin difference(top left vs bottom left).
* Also beware that the front buffer's surface size is screen width x screen height,
* whereas the real gl drawable size is the size of the window. */
dst_swapchain = (dst_surface->Flags & SFLAG_SWAPCHAIN) ? (IWineD3DSwapChainImpl *)dst_surface->container : NULL;
{
#ifndef VBOX_WITH_WDDM
UINT h;
# ifdef VBOX_WITH_WDDM
# else
# endif
dst_rect.top += dst_surface->currentDesc.Height - h; dst_rect.bottom += dst_surface->currentDesc.Height - h;
#endif
}
{
}
ENTER_GL();
/* Draw a textured quad */
LEAVE_GL();
/* Leave the opengl state valid for blitting */
#ifdef VBOX_WITH_WDDM
#else
#endif
)))
wglFlush(); /* Flush to ensure ordering across contexts. */
return WINED3D_OK;
}
static HRESULT arbfp_blit_color_fill(IWineD3DDeviceImpl *device, IWineD3DSurfaceImpl *dst_surface, const RECT *dst_rect, DWORD fill_color)
{
FIXME("Color filling not implemented by arbfp_blit\n");
return WINED3DERR_INVALIDCALL;
}
};