0N/A/*
2362N/A * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0N/A *
0N/A * This code is free software; you can redistribute it and/or modify it
0N/A * under the terms of the GNU General Public License version 2 only, as
2362N/A * published by the Free Software Foundation. Oracle designates this
0N/A * particular file as subject to the "Classpath" exception as provided
2362N/A * by Oracle in the LICENSE file that accompanied this code.
0N/A *
0N/A * This code is distributed in the hope that it will be useful, but WITHOUT
0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0N/A * version 2 for more details (a copy is included in the LICENSE file that
0N/A * accompanied this code).
0N/A *
0N/A * You should have received a copy of the GNU General Public License version
0N/A * 2 along with this work; if not, write to the Free Software Foundation,
0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0N/A *
2362N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2362N/A * or visit www.oracle.com if you need additional information or have any
2362N/A * questions.
0N/A */
0N/A
0N/A
0N/A/*
0N/A * FUNCTION
0N/A * Image affine transformation with Bicubic filtering
0N/A * SYNOPSIS
0N/A * mlib_status mlib_ImageAffine_[u8|s16|u16]_?ch_bc(mlib_s32 *leftEdges,
0N/A * mlib_s32 *rightEdges,
0N/A * mlib_s32 *xStarts,
0N/A * mlib_s32 *yStarts,
0N/A * mlib_s32 *sides,
0N/A * mlib_u8 *dstData,
0N/A * mlib_u8 **lineAddr,
0N/A * mlib_s32 dstYStride,
0N/A * mlib_s32 is_affine,
0N/A * mlib_s32 srcYStride,
0N/A * mlib_filter filter)
0N/A *
0N/A * ARGUMENTS
0N/A * leftEdges array[dstHeight] of xLeft coordinates
0N/A * RightEdges array[dstHeight] of xRight coordinates
0N/A * xStarts array[dstHeight] of xStart * 65536 coordinates
0N/A * yStarts array[dstHeight] of yStart * 65536 coordinates
0N/A * sides output array[4]. sides[0] is yStart, sides[1] is yFinish,
0N/A * sides[2] is dx * 65536, sides[3] is dy * 65536
0N/A * dstData pointer to the first pixel on (yStart - 1) line
0N/A * lineAddr array[srcHeight] of pointers to the first pixel on
0N/A * the corresponding lines
0N/A * dstYStride stride of destination image
0N/A * is_affine indicator (Affine - GridWarp)
0N/A * srcYStride stride of source image
0N/A * filter type of resampling filter
0N/A *
0N/A * DESCRIPTION
0N/A * The functions step along the lines from xLeft to xRight and apply
0N/A * the bicubic filtering.
0N/A *
0N/A */
0N/A
0N/A#include "mlib_ImageAffine.h"
0N/A
0N/A#define DTYPE mlib_s16
0N/A#define FILTER_BITS 9
0N/A#define FUN_NAME(CHAN) mlib_ImageAffine_s16_##CHAN##_bc
0N/A
0N/A/***************************************************************/
0N/A#ifdef __sparc /* for SPARC, using floating-point multiplies is faster */
0N/A
0N/A#undef FILTER_ELEM_BITS
0N/A#define FILTER_ELEM_BITS 4
0N/A
0N/A#ifdef MLIB_USE_FTOI_CLAMPING
0N/A
0N/A#define SAT16(DST) \
0N/A DST = ((mlib_s32)val0) >> 16
0N/A
0N/A#else
0N/A
0N/A#define SAT16(DST) \
0N/A if (val0 >= MLIB_S32_MAX) \
0N/A DST = MLIB_S16_MAX; \
0N/A else if (val0 <= MLIB_S32_MIN) \
0N/A DST = MLIB_S16_MIN; \
0N/A else \
0N/A DST = ((mlib_s32)val0) >> 16
0N/A
0N/A#endif /* MLIB_USE_FTOI_CLAMPING */
0N/A
0N/Amlib_status FUN_NAME(1ch)(mlib_affine_param *param)
0N/A{
0N/A DECLAREVAR_BC();
0N/A DTYPE *dstLineEnd;
0N/A const mlib_f32 *mlib_filters_table;
0N/A
0N/A if (filter == MLIB_BICUBIC) {
0N/A mlib_filters_table = mlib_filters_s16f_bc;
0N/A }
0N/A else {
0N/A mlib_filters_table = mlib_filters_s16f_bc2;
0N/A }
0N/A
0N/A for (j = yStart; j <= yFinish; j++) {
0N/A mlib_d64 xf0, xf1, xf2, xf3;
0N/A mlib_d64 yf0, yf1, yf2, yf3;
0N/A mlib_d64 c0, c1, c2, c3, val0;
0N/A mlib_s32 filterpos;
0N/A mlib_f32 *fptr;
0N/A mlib_s32 s0, s1, s2, s3;
0N/A mlib_s32 s4, s5, s6, s7;
0N/A
0N/A CLIP(1);
0N/A dstLineEnd = (DTYPE *) dstData + xRight;
0N/A
0N/A filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A xf0 = fptr[0];
0N/A xf1 = fptr[1];
0N/A xf2 = fptr[2];
0N/A xf3 = fptr[3];
0N/A
0N/A filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A yf0 = fptr[0];
0N/A yf1 = fptr[1];
0N/A yf2 = fptr[2];
0N/A yf3 = fptr[3];
0N/A
0N/A xSrc = (X >> MLIB_SHIFT) - 1;
0N/A ySrc = (Y >> MLIB_SHIFT) - 1;
0N/A
0N/A srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
0N/A s0 = srcPixelPtr[0];
0N/A s1 = srcPixelPtr[1];
0N/A s2 = srcPixelPtr[2];
0N/A s3 = srcPixelPtr[3];
0N/A
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A s4 = srcPixelPtr[0];
0N/A s5 = srcPixelPtr[1];
0N/A s6 = srcPixelPtr[2];
0N/A s7 = srcPixelPtr[3];
0N/A
0N/A for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
0N/A
0N/A X += dX;
0N/A Y += dY;
0N/A
0N/A c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
0N/A c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
0N/A srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
0N/A srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
0N/A
0N/A filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A xf0 = fptr[0];
0N/A xf1 = fptr[1];
0N/A xf2 = fptr[2];
0N/A xf3 = fptr[3];
0N/A
0N/A val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
0N/A
0N/A filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A yf0 = fptr[0];
0N/A yf1 = fptr[1];
0N/A yf2 = fptr[2];
0N/A yf3 = fptr[3];
0N/A
0N/A SAT16(dstPixelPtr[0]);
0N/A
0N/A xSrc = (X >> MLIB_SHIFT) - 1;
0N/A ySrc = (Y >> MLIB_SHIFT) - 1;
0N/A
0N/A srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
0N/A s0 = srcPixelPtr[0];
0N/A s1 = srcPixelPtr[1];
0N/A s2 = srcPixelPtr[2];
0N/A s3 = srcPixelPtr[3];
0N/A
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A s4 = srcPixelPtr[0];
0N/A s5 = srcPixelPtr[1];
0N/A s6 = srcPixelPtr[2];
0N/A s7 = srcPixelPtr[3];
0N/A }
0N/A
0N/A c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
0N/A c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
0N/A srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
0N/A srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
0N/A
0N/A val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
0N/A SAT16(dstPixelPtr[0]);
0N/A }
0N/A
0N/A return MLIB_SUCCESS;
0N/A}
0N/A
0N/Amlib_status FUN_NAME(2ch)(mlib_affine_param *param)
0N/A{
0N/A DECLAREVAR_BC();
0N/A DTYPE *dstLineEnd;
0N/A const mlib_f32 *mlib_filters_table;
0N/A
0N/A if (filter == MLIB_BICUBIC) {
0N/A mlib_filters_table = mlib_filters_s16f_bc;
0N/A }
0N/A else {
0N/A mlib_filters_table = mlib_filters_s16f_bc2;
0N/A }
0N/A
0N/A for (j = yStart; j <= yFinish; j++) {
0N/A mlib_d64 xf0, xf1, xf2, xf3;
0N/A mlib_d64 yf0, yf1, yf2, yf3;
0N/A mlib_d64 c0, c1, c2, c3, val0;
0N/A mlib_s32 filterpos, k;
0N/A mlib_f32 *fptr;
0N/A mlib_s32 s0, s1, s2, s3;
0N/A mlib_s32 s4, s5, s6, s7;
0N/A
0N/A CLIP(2);
0N/A dstLineEnd = (DTYPE *) dstData + 2 * xRight;
0N/A
0N/A for (k = 0; k < 2; k++) {
0N/A mlib_s32 X1 = X;
0N/A mlib_s32 Y1 = Y;
0N/A DTYPE *dPtr = dstPixelPtr + k;
0N/A
0N/A filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A xf0 = fptr[0];
0N/A xf1 = fptr[1];
0N/A xf2 = fptr[2];
0N/A xf3 = fptr[3];
0N/A
0N/A filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A yf0 = fptr[0];
0N/A yf1 = fptr[1];
0N/A yf2 = fptr[2];
0N/A yf3 = fptr[3];
0N/A
0N/A xSrc = (X1 >> MLIB_SHIFT) - 1;
0N/A ySrc = (Y1 >> MLIB_SHIFT) - 1;
0N/A
0N/A srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
0N/A s0 = srcPixelPtr[0];
0N/A s1 = srcPixelPtr[2];
0N/A s2 = srcPixelPtr[4];
0N/A s3 = srcPixelPtr[6];
0N/A
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A s4 = srcPixelPtr[0];
0N/A s5 = srcPixelPtr[2];
0N/A s6 = srcPixelPtr[4];
0N/A s7 = srcPixelPtr[6];
0N/A
0N/A for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
0N/A
0N/A X1 += dX;
0N/A Y1 += dY;
0N/A
0N/A c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
0N/A c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
0N/A srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
0N/A srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
0N/A
0N/A filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A xf0 = fptr[0];
0N/A xf1 = fptr[1];
0N/A xf2 = fptr[2];
0N/A xf3 = fptr[3];
0N/A
0N/A val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
0N/A
0N/A filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A yf0 = fptr[0];
0N/A yf1 = fptr[1];
0N/A yf2 = fptr[2];
0N/A yf3 = fptr[3];
0N/A
0N/A SAT16(dPtr[0]);
0N/A
0N/A xSrc = (X1 >> MLIB_SHIFT) - 1;
0N/A ySrc = (Y1 >> MLIB_SHIFT) - 1;
0N/A
0N/A srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
0N/A s0 = srcPixelPtr[0];
0N/A s1 = srcPixelPtr[2];
0N/A s2 = srcPixelPtr[4];
0N/A s3 = srcPixelPtr[6];
0N/A
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A s4 = srcPixelPtr[0];
0N/A s5 = srcPixelPtr[2];
0N/A s6 = srcPixelPtr[4];
0N/A s7 = srcPixelPtr[6];
0N/A }
0N/A
0N/A c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
0N/A c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
0N/A srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
0N/A srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
0N/A
0N/A val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
0N/A SAT16(dPtr[0]);
0N/A }
0N/A }
0N/A
0N/A return MLIB_SUCCESS;
0N/A}
0N/A
0N/Amlib_status FUN_NAME(3ch)(mlib_affine_param *param)
0N/A{
0N/A DECLAREVAR_BC();
0N/A DTYPE *dstLineEnd;
0N/A const mlib_f32 *mlib_filters_table;
0N/A
0N/A if (filter == MLIB_BICUBIC) {
0N/A mlib_filters_table = mlib_filters_s16f_bc;
0N/A }
0N/A else {
0N/A mlib_filters_table = mlib_filters_s16f_bc2;
0N/A }
0N/A
0N/A for (j = yStart; j <= yFinish; j++) {
0N/A mlib_d64 xf0, xf1, xf2, xf3;
0N/A mlib_d64 yf0, yf1, yf2, yf3;
0N/A mlib_d64 c0, c1, c2, c3, val0;
0N/A mlib_s32 filterpos, k;
0N/A mlib_f32 *fptr;
0N/A mlib_s32 s0, s1, s2, s3;
0N/A mlib_s32 s4, s5, s6, s7;
0N/A
0N/A CLIP(3);
0N/A dstLineEnd = (DTYPE *) dstData + 3 * xRight;
0N/A
0N/A for (k = 0; k < 3; k++) {
0N/A mlib_s32 X1 = X;
0N/A mlib_s32 Y1 = Y;
0N/A DTYPE *dPtr = dstPixelPtr + k;
0N/A
0N/A filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A xf0 = fptr[0];
0N/A xf1 = fptr[1];
0N/A xf2 = fptr[2];
0N/A xf3 = fptr[3];
0N/A
0N/A filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A yf0 = fptr[0];
0N/A yf1 = fptr[1];
0N/A yf2 = fptr[2];
0N/A yf3 = fptr[3];
0N/A
0N/A xSrc = (X1 >> MLIB_SHIFT) - 1;
0N/A ySrc = (Y1 >> MLIB_SHIFT) - 1;
0N/A
0N/A srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
0N/A s0 = srcPixelPtr[0];
0N/A s1 = srcPixelPtr[3];
0N/A s2 = srcPixelPtr[6];
0N/A s3 = srcPixelPtr[9];
0N/A
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A s4 = srcPixelPtr[0];
0N/A s5 = srcPixelPtr[3];
0N/A s6 = srcPixelPtr[6];
0N/A s7 = srcPixelPtr[9];
0N/A
0N/A for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
0N/A
0N/A X1 += dX;
0N/A Y1 += dY;
0N/A
0N/A c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
0N/A c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
0N/A srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
0N/A srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
0N/A
0N/A filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A xf0 = fptr[0];
0N/A xf1 = fptr[1];
0N/A xf2 = fptr[2];
0N/A xf3 = fptr[3];
0N/A
0N/A val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
0N/A
0N/A filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A yf0 = fptr[0];
0N/A yf1 = fptr[1];
0N/A yf2 = fptr[2];
0N/A yf3 = fptr[3];
0N/A
0N/A SAT16(dPtr[0]);
0N/A
0N/A xSrc = (X1 >> MLIB_SHIFT) - 1;
0N/A ySrc = (Y1 >> MLIB_SHIFT) - 1;
0N/A
0N/A srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
0N/A s0 = srcPixelPtr[0];
0N/A s1 = srcPixelPtr[3];
0N/A s2 = srcPixelPtr[6];
0N/A s3 = srcPixelPtr[9];
0N/A
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A s4 = srcPixelPtr[0];
0N/A s5 = srcPixelPtr[3];
0N/A s6 = srcPixelPtr[6];
0N/A s7 = srcPixelPtr[9];
0N/A }
0N/A
0N/A c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
0N/A c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
0N/A srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
0N/A srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
0N/A
0N/A val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
0N/A SAT16(dPtr[0]);
0N/A }
0N/A }
0N/A
0N/A return MLIB_SUCCESS;
0N/A}
0N/A
0N/Amlib_status FUN_NAME(4ch)(mlib_affine_param *param)
0N/A{
0N/A DECLAREVAR_BC();
0N/A DTYPE *dstLineEnd;
0N/A const mlib_f32 *mlib_filters_table;
0N/A
0N/A if (filter == MLIB_BICUBIC) {
0N/A mlib_filters_table = mlib_filters_s16f_bc;
0N/A }
0N/A else {
0N/A mlib_filters_table = mlib_filters_s16f_bc2;
0N/A }
0N/A
0N/A for (j = yStart; j <= yFinish; j++) {
0N/A mlib_d64 xf0, xf1, xf2, xf3;
0N/A mlib_d64 yf0, yf1, yf2, yf3;
0N/A mlib_d64 c0, c1, c2, c3, val0;
0N/A mlib_s32 filterpos, k;
0N/A mlib_f32 *fptr;
0N/A mlib_s32 s0, s1, s2, s3;
0N/A mlib_s32 s4, s5, s6, s7;
0N/A
0N/A CLIP(4);
0N/A dstLineEnd = (DTYPE *) dstData + 4 * xRight;
0N/A
0N/A for (k = 0; k < 4; k++) {
0N/A mlib_s32 X1 = X;
0N/A mlib_s32 Y1 = Y;
0N/A DTYPE *dPtr = dstPixelPtr + k;
0N/A
0N/A filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A xf0 = fptr[0];
0N/A xf1 = fptr[1];
0N/A xf2 = fptr[2];
0N/A xf3 = fptr[3];
0N/A
0N/A filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A yf0 = fptr[0];
0N/A yf1 = fptr[1];
0N/A yf2 = fptr[2];
0N/A yf3 = fptr[3];
0N/A
0N/A xSrc = (X1 >> MLIB_SHIFT) - 1;
0N/A ySrc = (Y1 >> MLIB_SHIFT) - 1;
0N/A
0N/A srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
0N/A s0 = srcPixelPtr[0];
0N/A s1 = srcPixelPtr[4];
0N/A s2 = srcPixelPtr[8];
0N/A s3 = srcPixelPtr[12];
0N/A
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A s4 = srcPixelPtr[0];
0N/A s5 = srcPixelPtr[4];
0N/A s6 = srcPixelPtr[8];
0N/A s7 = srcPixelPtr[12];
0N/A
0N/A for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
0N/A
0N/A X1 += dX;
0N/A Y1 += dY;
0N/A
0N/A c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
0N/A c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
0N/A srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
0N/A srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
0N/A
0N/A filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A xf0 = fptr[0];
0N/A xf1 = fptr[1];
0N/A xf2 = fptr[2];
0N/A xf3 = fptr[3];
0N/A
0N/A val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
0N/A
0N/A filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A yf0 = fptr[0];
0N/A yf1 = fptr[1];
0N/A yf2 = fptr[2];
0N/A yf3 = fptr[3];
0N/A
0N/A SAT16(dPtr[0]);
0N/A
0N/A xSrc = (X1 >> MLIB_SHIFT) - 1;
0N/A ySrc = (Y1 >> MLIB_SHIFT) - 1;
0N/A
0N/A srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
0N/A s0 = srcPixelPtr[0];
0N/A s1 = srcPixelPtr[4];
0N/A s2 = srcPixelPtr[8];
0N/A s3 = srcPixelPtr[12];
0N/A
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A s4 = srcPixelPtr[0];
0N/A s5 = srcPixelPtr[4];
0N/A s6 = srcPixelPtr[8];
0N/A s7 = srcPixelPtr[12];
0N/A }
0N/A
0N/A c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
0N/A c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
0N/A srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
0N/A srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
0N/A
0N/A val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
0N/A SAT16(dPtr[0]);
0N/A }
0N/A }
0N/A
0N/A return MLIB_SUCCESS;
0N/A}
0N/A
0N/A#else /* for x86, using integer multiplies is faster */
0N/A
0N/A#define SHIFT_X 15
0N/A#define ROUND_X 0 /* (1 << (SHIFT_X - 1)) */
0N/A
0N/A#define SHIFT_Y (15 + 15 - SHIFT_X)
0N/A#define ROUND_Y (1 << (SHIFT_Y - 1))
0N/A
0N/A#define S32_TO_S16_SAT(DST) \
0N/A if (val0 >= MLIB_S16_MAX) \
0N/A DST = MLIB_S16_MAX; \
0N/A else if (val0 <= MLIB_S16_MIN) \
0N/A DST = MLIB_S16_MIN; \
0N/A else \
0N/A DST = (mlib_s16)val0
0N/A
0N/Amlib_status FUN_NAME(1ch)(mlib_affine_param *param)
0N/A{
0N/A DECLAREVAR_BC();
0N/A DTYPE *dstLineEnd;
0N/A const mlib_s16 *mlib_filters_table;
0N/A
0N/A if (filter == MLIB_BICUBIC) {
0N/A mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
0N/A }
0N/A else {
0N/A mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
0N/A }
0N/A
0N/A for (j = yStart; j <= yFinish; j++) {
0N/A mlib_s32 xf0, xf1, xf2, xf3;
0N/A mlib_s32 yf0, yf1, yf2, yf3;
0N/A mlib_s32 c0, c1, c2, c3, val0;
0N/A mlib_s32 filterpos;
0N/A mlib_s16 *fptr;
0N/A mlib_s32 s0, s1, s2, s3;
0N/A mlib_s32 s4, s5, s6, s7;
0N/A
0N/A CLIP(1);
0N/A dstLineEnd = (DTYPE *) dstData + xRight;
0N/A
0N/A filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A xf0 = fptr[0];
0N/A xf1 = fptr[1];
0N/A xf2 = fptr[2];
0N/A xf3 = fptr[3];
0N/A
0N/A filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A yf0 = fptr[0];
0N/A yf1 = fptr[1];
0N/A yf2 = fptr[2];
0N/A yf3 = fptr[3];
0N/A
0N/A xSrc = (X >> MLIB_SHIFT) - 1;
0N/A ySrc = (Y >> MLIB_SHIFT) - 1;
0N/A
0N/A srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
0N/A s0 = srcPixelPtr[0];
0N/A s1 = srcPixelPtr[1];
0N/A s2 = srcPixelPtr[2];
0N/A s3 = srcPixelPtr[3];
0N/A
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A s4 = srcPixelPtr[0];
0N/A s5 = srcPixelPtr[1];
0N/A s6 = srcPixelPtr[2];
0N/A s7 = srcPixelPtr[3];
0N/A
0N/A for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
0N/A
0N/A X += dX;
0N/A Y += dY;
0N/A
0N/A c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
0N/A c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
0N/A srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
0N/A srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
0N/A
0N/A filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A xf0 = fptr[0];
0N/A xf1 = fptr[1];
0N/A xf2 = fptr[2];
0N/A xf3 = fptr[3];
0N/A
0N/A val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
0N/A
0N/A filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A yf0 = fptr[0];
0N/A yf1 = fptr[1];
0N/A yf2 = fptr[2];
0N/A yf3 = fptr[3];
0N/A
0N/A S32_TO_S16_SAT(dstPixelPtr[0]);
0N/A
0N/A xSrc = (X >> MLIB_SHIFT) - 1;
0N/A ySrc = (Y >> MLIB_SHIFT) - 1;
0N/A
0N/A srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
0N/A s0 = srcPixelPtr[0];
0N/A s1 = srcPixelPtr[1];
0N/A s2 = srcPixelPtr[2];
0N/A s3 = srcPixelPtr[3];
0N/A
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A s4 = srcPixelPtr[0];
0N/A s5 = srcPixelPtr[1];
0N/A s6 = srcPixelPtr[2];
0N/A s7 = srcPixelPtr[3];
0N/A }
0N/A
0N/A c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
0N/A c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
0N/A srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
0N/A srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
0N/A
0N/A val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
0N/A S32_TO_S16_SAT(dstPixelPtr[0]);
0N/A }
0N/A
0N/A return MLIB_SUCCESS;
0N/A}
0N/A
0N/Amlib_status FUN_NAME(2ch)(mlib_affine_param *param)
0N/A{
0N/A DECLAREVAR_BC();
0N/A DTYPE *dstLineEnd;
0N/A const mlib_s16 *mlib_filters_table;
0N/A
0N/A if (filter == MLIB_BICUBIC) {
0N/A mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
0N/A }
0N/A else {
0N/A mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
0N/A }
0N/A
0N/A for (j = yStart; j <= yFinish; j++) {
0N/A mlib_s32 xf0, xf1, xf2, xf3;
0N/A mlib_s32 yf0, yf1, yf2, yf3;
0N/A mlib_s32 c0, c1, c2, c3, val0;
0N/A mlib_s32 filterpos, k;
0N/A mlib_s16 *fptr;
0N/A mlib_s32 s0, s1, s2, s3;
0N/A mlib_s32 s4, s5, s6, s7;
0N/A
0N/A CLIP(2);
0N/A dstLineEnd = (DTYPE *) dstData + 2 * xRight;
0N/A
0N/A for (k = 0; k < 2; k++) {
0N/A mlib_s32 X1 = X;
0N/A mlib_s32 Y1 = Y;
0N/A DTYPE *dPtr = dstPixelPtr + k;
0N/A
0N/A filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A xf0 = fptr[0];
0N/A xf1 = fptr[1];
0N/A xf2 = fptr[2];
0N/A xf3 = fptr[3];
0N/A
0N/A filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A yf0 = fptr[0];
0N/A yf1 = fptr[1];
0N/A yf2 = fptr[2];
0N/A yf3 = fptr[3];
0N/A
0N/A xSrc = (X1 >> MLIB_SHIFT) - 1;
0N/A ySrc = (Y1 >> MLIB_SHIFT) - 1;
0N/A
0N/A srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
0N/A s0 = srcPixelPtr[0];
0N/A s1 = srcPixelPtr[2];
0N/A s2 = srcPixelPtr[4];
0N/A s3 = srcPixelPtr[6];
0N/A
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A s4 = srcPixelPtr[0];
0N/A s5 = srcPixelPtr[2];
0N/A s6 = srcPixelPtr[4];
0N/A s7 = srcPixelPtr[6];
0N/A
0N/A for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
0N/A
0N/A X1 += dX;
0N/A Y1 += dY;
0N/A
0N/A c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
0N/A c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
0N/A srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
0N/A srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
0N/A
0N/A filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A xf0 = fptr[0];
0N/A xf1 = fptr[1];
0N/A xf2 = fptr[2];
0N/A xf3 = fptr[3];
0N/A
0N/A val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
0N/A
0N/A filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A yf0 = fptr[0];
0N/A yf1 = fptr[1];
0N/A yf2 = fptr[2];
0N/A yf3 = fptr[3];
0N/A
0N/A S32_TO_S16_SAT(dPtr[0]);
0N/A
0N/A xSrc = (X1 >> MLIB_SHIFT) - 1;
0N/A ySrc = (Y1 >> MLIB_SHIFT) - 1;
0N/A
0N/A srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
0N/A s0 = srcPixelPtr[0];
0N/A s1 = srcPixelPtr[2];
0N/A s2 = srcPixelPtr[4];
0N/A s3 = srcPixelPtr[6];
0N/A
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A s4 = srcPixelPtr[0];
0N/A s5 = srcPixelPtr[2];
0N/A s6 = srcPixelPtr[4];
0N/A s7 = srcPixelPtr[6];
0N/A }
0N/A
0N/A c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
0N/A c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
0N/A srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
0N/A srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
0N/A
0N/A val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
0N/A S32_TO_S16_SAT(dPtr[0]);
0N/A }
0N/A }
0N/A
0N/A return MLIB_SUCCESS;
0N/A}
0N/A
0N/Amlib_status FUN_NAME(3ch)(mlib_affine_param *param)
0N/A{
0N/A DECLAREVAR_BC();
0N/A DTYPE *dstLineEnd;
0N/A const mlib_s16 *mlib_filters_table;
0N/A
0N/A if (filter == MLIB_BICUBIC) {
0N/A mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
0N/A }
0N/A else {
0N/A mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
0N/A }
0N/A
0N/A for (j = yStart; j <= yFinish; j++) {
0N/A mlib_s32 xf0, xf1, xf2, xf3;
0N/A mlib_s32 yf0, yf1, yf2, yf3;
0N/A mlib_s32 c0, c1, c2, c3, val0;
0N/A mlib_s32 filterpos, k;
0N/A mlib_s16 *fptr;
0N/A mlib_s32 s0, s1, s2, s3;
0N/A mlib_s32 s4, s5, s6, s7;
0N/A
0N/A CLIP(3);
0N/A dstLineEnd = (DTYPE *) dstData + 3 * xRight;
0N/A
0N/A for (k = 0; k < 3; k++) {
0N/A mlib_s32 X1 = X;
0N/A mlib_s32 Y1 = Y;
0N/A DTYPE *dPtr = dstPixelPtr + k;
0N/A
0N/A filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A xf0 = fptr[0];
0N/A xf1 = fptr[1];
0N/A xf2 = fptr[2];
0N/A xf3 = fptr[3];
0N/A
0N/A filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A yf0 = fptr[0];
0N/A yf1 = fptr[1];
0N/A yf2 = fptr[2];
0N/A yf3 = fptr[3];
0N/A
0N/A xSrc = (X1 >> MLIB_SHIFT) - 1;
0N/A ySrc = (Y1 >> MLIB_SHIFT) - 1;
0N/A
0N/A srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
0N/A s0 = srcPixelPtr[0];
0N/A s1 = srcPixelPtr[3];
0N/A s2 = srcPixelPtr[6];
0N/A s3 = srcPixelPtr[9];
0N/A
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A s4 = srcPixelPtr[0];
0N/A s5 = srcPixelPtr[3];
0N/A s6 = srcPixelPtr[6];
0N/A s7 = srcPixelPtr[9];
0N/A
0N/A for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
0N/A
0N/A X1 += dX;
0N/A Y1 += dY;
0N/A
0N/A c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
0N/A c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
0N/A srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
0N/A srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
0N/A
0N/A filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A xf0 = fptr[0];
0N/A xf1 = fptr[1];
0N/A xf2 = fptr[2];
0N/A xf3 = fptr[3];
0N/A
0N/A val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
0N/A
0N/A filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A yf0 = fptr[0];
0N/A yf1 = fptr[1];
0N/A yf2 = fptr[2];
0N/A yf3 = fptr[3];
0N/A
0N/A S32_TO_S16_SAT(dPtr[0]);
0N/A
0N/A xSrc = (X1 >> MLIB_SHIFT) - 1;
0N/A ySrc = (Y1 >> MLIB_SHIFT) - 1;
0N/A
0N/A srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
0N/A s0 = srcPixelPtr[0];
0N/A s1 = srcPixelPtr[3];
0N/A s2 = srcPixelPtr[6];
0N/A s3 = srcPixelPtr[9];
0N/A
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A s4 = srcPixelPtr[0];
0N/A s5 = srcPixelPtr[3];
0N/A s6 = srcPixelPtr[6];
0N/A s7 = srcPixelPtr[9];
0N/A }
0N/A
0N/A c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
0N/A c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
0N/A srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
0N/A srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
0N/A
0N/A val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
0N/A S32_TO_S16_SAT(dPtr[0]);
0N/A }
0N/A }
0N/A
0N/A return MLIB_SUCCESS;
0N/A}
0N/A
0N/Amlib_status FUN_NAME(4ch)(mlib_affine_param *param)
0N/A{
0N/A DECLAREVAR_BC();
0N/A DTYPE *dstLineEnd;
0N/A const mlib_s16 *mlib_filters_table;
0N/A
0N/A if (filter == MLIB_BICUBIC) {
0N/A mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
0N/A }
0N/A else {
0N/A mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
0N/A }
0N/A
0N/A for (j = yStart; j <= yFinish; j++) {
0N/A mlib_s32 xf0, xf1, xf2, xf3;
0N/A mlib_s32 yf0, yf1, yf2, yf3;
0N/A mlib_s32 c0, c1, c2, c3, val0;
0N/A mlib_s32 filterpos, k;
0N/A mlib_s16 *fptr;
0N/A mlib_s32 s0, s1, s2, s3;
0N/A mlib_s32 s4, s5, s6, s7;
0N/A
0N/A CLIP(4);
0N/A dstLineEnd = (DTYPE *) dstData + 4 * xRight;
0N/A
0N/A for (k = 0; k < 4; k++) {
0N/A mlib_s32 X1 = X;
0N/A mlib_s32 Y1 = Y;
0N/A DTYPE *dPtr = dstPixelPtr + k;
0N/A
0N/A filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A xf0 = fptr[0];
0N/A xf1 = fptr[1];
0N/A xf2 = fptr[2];
0N/A xf3 = fptr[3];
0N/A
0N/A filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A yf0 = fptr[0];
0N/A yf1 = fptr[1];
0N/A yf2 = fptr[2];
0N/A yf3 = fptr[3];
0N/A
0N/A xSrc = (X1 >> MLIB_SHIFT) - 1;
0N/A ySrc = (Y1 >> MLIB_SHIFT) - 1;
0N/A
0N/A srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
0N/A s0 = srcPixelPtr[0];
0N/A s1 = srcPixelPtr[4];
0N/A s2 = srcPixelPtr[8];
0N/A s3 = srcPixelPtr[12];
0N/A
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A s4 = srcPixelPtr[0];
0N/A s5 = srcPixelPtr[4];
0N/A s6 = srcPixelPtr[8];
0N/A s7 = srcPixelPtr[12];
0N/A
0N/A for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
0N/A
0N/A X1 += dX;
0N/A Y1 += dY;
0N/A
0N/A c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
0N/A c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
0N/A srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
0N/A srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
0N/A
0N/A filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A xf0 = fptr[0];
0N/A xf1 = fptr[1];
0N/A xf2 = fptr[2];
0N/A xf3 = fptr[3];
0N/A
0N/A val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
0N/A
0N/A filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
0N/A fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
0N/A
0N/A yf0 = fptr[0];
0N/A yf1 = fptr[1];
0N/A yf2 = fptr[2];
0N/A yf3 = fptr[3];
0N/A
0N/A S32_TO_S16_SAT(dPtr[0]);
0N/A
0N/A xSrc = (X1 >> MLIB_SHIFT) - 1;
0N/A ySrc = (Y1 >> MLIB_SHIFT) - 1;
0N/A
0N/A srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
0N/A s0 = srcPixelPtr[0];
0N/A s1 = srcPixelPtr[4];
0N/A s2 = srcPixelPtr[8];
0N/A s3 = srcPixelPtr[12];
0N/A
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A s4 = srcPixelPtr[0];
0N/A s5 = srcPixelPtr[4];
0N/A s6 = srcPixelPtr[8];
0N/A s7 = srcPixelPtr[12];
0N/A }
0N/A
0N/A c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
0N/A c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
0N/A srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
0N/A srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
0N/A c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
0N/A srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
0N/A
0N/A val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
0N/A S32_TO_S16_SAT(dPtr[0]);
0N/A }
0N/A }
0N/A
0N/A return MLIB_SUCCESS;
0N/A}
0N/A
0N/A#endif /* __sparc ( for SPARC, using floating-point multiplies is faster ) */
0N/A
0N/A/***************************************************************/