/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include <stdlib.h>
#include "jni_util.h"
#include "math.h"
#include "GraphicsPrimitiveMgr.h"
#include "Region.h"
#include "sun_java2d_loops_TransformHelper.h"
#include "java_awt_image_AffineTransformOp.h"
/*
* The stub functions replace the bilinear and bicubic interpolation
* functions with NOP versions so that the performance of the helper
* functions that fetch the data can be more directly tested. They
* are not compiled or enabled by default. Change the following
* #undef to a #define to build the stub functions.
*
* When compiled, they are enabled by the environment variable TXSTUB.
* When compiled, there is also code to disable the VIS versions and
* use the C versions in this file in their place by defining the TXNOVIS
* environment variable.
*/
/* The number of IntArgbPre samples to store in the temporary buffer. */
/* The size of a stack allocated buffer to hold edge coordinates (see below). */
/* Declare the software interpolation functions. */
#ifdef MAKE_STUBS
/* Optionally Declare the stub interpolation functions. */
#endif /* MAKE_STUBS */
/*
* Initially choose the software interpolation functions.
* These choices can be overridden by platform code that runs during the
* primitive registration phase of initialization by storing pointers to
* better functions in these pointers.
* Compiling the stubs also turns on code below that can re-install the
* software functions or stub functions on the first call to this primitive.
*/
/*
* The dxydxy parameters of the inverse transform determine how
* quickly we step through the source image. For tiny scale
* factors (on the order of 1E-16 or so) the stepping distances
* are huge. The image has been scaled so small that stepping
* a single pixel in device space moves the sampling point by
* billions (or more) pixels in the source image space. These
* huge stepping values can overflow the whole part of the longs
* we use for the fixed point stepping equations and so we need
* a more robust solution. We could simply iterate over every
* device pixel, use the inverse transform to transform it back
* into the source image coordinate system and then test it for
* being in range and sample pixel-by-pixel, but that is quite
* a bit more expensive. Fortunately, if the scale factors are
* so tiny that we overflow our long values then the number of
* pixels we are planning to visit should be very tiny. The only
* exception to that rule is if the scale factor along one
* dimension is tiny (creating the huge stepping values), and
* the scale factor along the other dimension is fairly regular
* or an up-scale. In that case we have a lot of pixels along
* the direction of the larger axis to sample, but few along the
* smaller axis. Though, pessimally, with an added shear factor
* such a linearly tiny image could have bounds that cover a large
* number of pixels. Such odd transformations should be very
* rare and the absolute limit on calculations would involve a
* single reverse transform of every pixel in the output image
* which is not fast, but it should not cause an undue stall
* of the rendering software.
*
* The specific test we will use is to calculate the inverse
* transformed values of every corner of the destination bounds
* (in order to be user-clip independent) and if we can
* perform a fixed-point-long inverse transform of all of
* those points without overflowing we will use the fast
* fixed point algorithm. Otherwise we will use the safe
* per-pixel transform algorithm.
* The 4 corners are 0,0, 0,dsth, dstw,0, dstw,dsth
* Transformed they are:
* tx, ty
* tx +dxdy*H, ty +dydy*H
* tx+dxdx*W, ty+dydx*W
* tx+dxdx*W+dxdy*H, ty+dydx*W+dydy*H
*/
/* We reject coordinates not less than 1<<30 so that the distance between */
/* any 2 of them is less than 1<<31 which would overflow into the sign */
/* bit of a signed long value used to represent fixed point coordinates. */
static jboolean
{
jdouble x, y;
Transform_transform(pItxInfo, &x, &y);
*retx = x;
*rety = y;
if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
return JNI_TRUE;
}
Transform_transform(pItxInfo, &x, &y);
if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
return JNI_TRUE;
}
Transform_transform(pItxInfo, &x, &y);
if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
return JNI_TRUE;
}
Transform_transform(pItxInfo, &x, &y);
if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
return JNI_TRUE;
}
return JNI_FALSE;
}
/*
* Fill the edge buffer with pairs of coordinates representing the maximum
* left and right pixels of the destination surface that should be processed
* on each scanline, clipped to the bounds parameter.
* The number of scanlines to calculate is implied by the bounds parameter.
* Only pixels that map back through the specified (inverse) transform to a
* source coordinate that falls within the (0, 0, sw, sh) bounds of the
* source image should be processed.
* pEdges points to an array of jints that holds 2 + numedges*2 values where
* numedges should match (pBounds->y2 - pBounds->y1).
* The first two jints in pEdges should be set to y1 and y2 and every pair
* of jints after that represent the xmin,xmax of all pixels in range of
* the transformed blit for the corresponding scanline.
*/
static void
{
{
dx1++;
}
{
dx2--;
}
/* Increment to next scanline */
dy1++;
}
}
static void
/*
* Class: sun_java2d_loops_TransformHelper
* Method: Transform
* Signature: (Lsun/java2d/loops/MaskBlit;Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;Ljava/awt/Composite;Lsun/java2d/pipe/Region;Ljava/awt/geom/AffineTransform;IIIIIIIII[I)V
*/
{
union {
} rgb;
#ifdef MAKE_STUBS
static int th_initialized;
/* For debugging only - used to swap in alternate funcs for perf testing */
if (!th_initialized) {
if (getenv("TXSTUB") != 0) {
} else if (getenv("TXNOVIS") != 0) {
}
th_initialized = 1;
}
#endif /* MAKE_STUBS */
if (pHelperPrim == NULL) {
/* Should never happen... */
return;
}
if (pMaskBlitPrim == NULL) {
/* Exception was thrown by GetNativePrim */
return;
}
}
return;
}
return;
}
/*
* Grab the appropriate pointer to the helper and interpolation
* routines and calculate the maximum number of destination pixels
* that can be processed in one intermediate buffer based on the
* size of the buffer and the number of samples needed per pixel.
*/
switch (txtype) {
pInterpFunc = NULL;
break;
break;
break;
}
!= SD_SUCCESS)
{
return;
}
!= SD_SUCCESS)
{
return;
}
if (numedges <= 0) {
/*
* Ideally Java should allocate an array large enough, but if
* we ever have a miscommunication about the number of edge
* lines, or if the Java array calculation should overflow to
* a positive number and succeed in allocating an array that
* is too small, we need to verify that it can still hold the
* number of integers that we plan to store to be safe.
*/
/* (edgesize/2 - 1) should avoid any overflow or underflow. */
: NULL;
/* numedges variable (jlong) can be at most ((1<<32)-1) */
/* memsize can overflow a jint, but not a jlong */
: NULL;
} else {
}
if (numedges > 0) {
}
return;
}
if (!Region_IsEmpty(&clipInfo)) {
{
} else {
void *pDst;
/* Note - process at most one scanline at a time. */
/* All pixels from dx1 to dx2 have centers in bounds */
/* Can process at most one buffer full at a time */
if (numpix > maxlinepix) {
numpix = maxlinepix;
}
xlong =
ylong =
/* Get IntArgbPre pixel data from source */
(*pHelperFunc)(&srcInfo,
/* Interpolate result pixels if needed */
if (pInterpFunc) {
}
0, 0, 0,
numpix, 1,
&compInfo);
/* Increment to next buffer worth of input pixels */
dx1 += maxlinepix;
}
/* Increment to next scanline */
dy1++;
}
}
}
} else {
}
}
}
static void
{
/* row spans are set to max,min until we find a pixel in range below */
}
jdouble x, y;
Transform_transform(pItxInfo, &x, &y);
/* Process only pixels with centers in bounds
* Test double values to avoid overflow in conversion
* to long values and then also test the long values
* in case they rounded up and out of bounds during
* the conversion.
*/
{
void *pDst;
}
}
/* Get IntArgbPre pixel data from source */
(*pHelperFunc)(pSrcInfo,
pData, 1,
xlong, 0,
ylong, 0);
/* Interpolate result pixels if needed */
if (pInterpFunc) {
}
0, 0, 0,
1, 1,
}
/* Increment to next input pixel */
dx1++;
}
/* Increment to next scanline */
dy1++;
}
}
}
do { \
} while (0)
static void
{
jint j;
for (j = 0; j < numpix; j++) {
BL_ACCUM(0);
BL_ACCUM(1);
BL_ACCUM(2);
BL_ACCUM(3);
pRes++;
pRGB += 4;
}
}
do { \
} while (0)
#ifdef __sparc
/* For sparc, floating point multiplies are faster than integer */
#define BICUBIC_USE_DBL_LUT
#else
/* For x86, integer multiplies are faster than floating point */
/* Note that on x86 Linux the choice of best algorithm varies
* depending on the compiler optimization and the processor type.
* all the variations produce mediocre performance.
* For now we will use the choice that works best for the Windows
* build until the (lack of) optimization issues on Linux are resolved.
*/
#define BICUBIC_USE_INT_MATH
#endif
#ifdef BICUBIC_USE_DBL_CAST
#define BC_DblToCoeff(v) (v)
do { \
SAT(a, 255); \
SAT(r, a); \
SAT(g, a); \
SAT(b, a); \
} while (0)
#endif /* BICUBIC_USE_DBL_CAST */
#ifdef BICUBIC_USE_DBL_LUT
};
#define BC_DblToCoeff(v) (v)
do { \
SAT(a, 255); \
SAT(r, a); \
SAT(g, a); \
SAT(b, a); \
} while (0)
#endif /* BICUBIC_USE_DBL_LUT */
#ifdef BICUBIC_USE_INT_MATH
do { \
accumA >>= 16; \
accumR >>= 16; \
accumG >>= 16; \
accumB >>= 16; \
} while (0)
#endif /* BICUBIC_USE_INT_MATH */
do { \
int rgb; \
} while (0)
static void
{
/*
* The following formulas are designed to give smooth
* results when 'A' is -0.5 or -1.0.
*/
int i;
for (i = 0; i < 256; i++) {
/* r(x) = (A + 2)|x|^3 - (A + 3)|x|^2 + 1 , 0 <= |x| < 1 */
jdouble x = i / 256.0;
x = ((A+2)*x - (A+3))*x*x + 1;
bicubic_coeff[i] = BC_DblToCoeff(x);
}
for (; i < 384; i++) {
/* r(x) = A|x|^3 - 5A|x|^2 + 8A|x| - 4A , 1 <= |x| < 2 */
jdouble x = i / 256.0;
x = ((A*x - 5*A)*x + 8*A)*x - 4*A;
bicubic_coeff[i] = BC_DblToCoeff(x);
}
for (i++; i <= 512; i++) {
bicubic_coeff[i-256] +
bicubic_coeff[768-i]);
}
}
static void
{
jint i;
if (!bicubictableinited) {
init_bicubic_table(-0.5);
}
for (i = 0; i < numpix; i++) {
pRes++;
pRGB += 16;
}
}
#ifdef MAKE_STUBS
static void
{
while (--numpix >= 0) {
pRGBbase += 1;
pRGB += 4;
}
}
static void
{
while (--numpix >= 0) {
pRGBbase += 1;
pRGB += 16;
}
}
#endif /* MAKE_STUBS */