0N/A/*
2362N/A * Copyright (c) 2003, 2008, Oracle and/or its affiliates. All rights reserved.
0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0N/A *
0N/A * This code is free software; you can redistribute it and/or modify it
0N/A * under the terms of the GNU General Public License version 2 only, as
2362N/A * published by the Free Software Foundation. Oracle designates this
0N/A * particular file as subject to the "Classpath" exception as provided
2362N/A * by Oracle in the LICENSE file that accompanied this code.
0N/A *
0N/A * This code is distributed in the hope that it will be useful, but WITHOUT
0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0N/A * version 2 for more details (a copy is included in the LICENSE file that
0N/A * accompanied this code).
0N/A *
0N/A * You should have received a copy of the GNU General Public License version
0N/A * 2 along with this work; if not, write to the Free Software Foundation,
0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0N/A *
2362N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2362N/A * or visit www.oracle.com if you need additional information or have any
2362N/A * questions.
0N/A */
0N/A
0N/A#if !defined(JAVA2D_NO_MLIB) || defined(MLIB_ADD_SUFF)
0N/A
0N/A#include "vis_AlphaMacros.h"
0N/A
0N/A/***************************************************************/
0N/A
0N/A#define Gray2Argb(x) \
0N/A 0xff000000 | (x << 16) | (x << 8) | x
0N/A
0N/A/***************************************************************/
0N/A
0N/A#if VIS >= 0x200
0N/A
0N/A#define BMASK_FOR_ARGB \
0N/A vis_write_bmask(0x03214765, 0);
0N/A
0N/A#else
0N/A
0N/A#define BMASK_FOR_ARGB
0N/A
0N/A#endif
0N/A
0N/A/***************************************************************/
0N/A
0N/A#define RGB2ABGR_DB(x) \
0N/A x = vis_for(x, amask); \
0N/A ARGB2ABGR_DB(x)
0N/A
0N/A/***************************************************************/
0N/A
0N/A#define INSERT_U8_34R \
0N/A sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1)); \
0N/A sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2)); \
0N/A sdc = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2)); \
0N/A sdd = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb)); \
0N/A sde = vis_fpmerge(vis_read_lo(sda), vis_read_hi(sdc)); \
0N/A sdf = vis_fpmerge(vis_read_hi(sdb), vis_read_lo(sdc)); \
0N/A sdg = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sde)); \
0N/A sdh = vis_fpmerge(vis_read_lo(sdd), vis_read_hi(sdf)); \
0N/A sdi = vis_fpmerge(vis_read_hi(sde), vis_read_lo(sdf)); \
0N/A sdj = vis_fpmerge(vis_read_hi(sdg), vis_read_hi(sdi)); \
0N/A sdk = vis_fpmerge(vis_read_lo(sdg), vis_read_lo(sdi)); \
0N/A sdl = vis_fpmerge(vis_read_hi(sFF), vis_read_hi(sdh)); \
0N/A sdm = vis_fpmerge(vis_read_lo(sFF), vis_read_lo(sdh)); \
0N/A dd0 = vis_fpmerge(vis_read_hi(sdl), vis_read_hi(sdj)); \
0N/A dd1 = vis_fpmerge(vis_read_lo(sdl), vis_read_lo(sdj)); \
0N/A dd2 = vis_fpmerge(vis_read_hi(sdm), vis_read_hi(sdk)); \
0N/A dd3 = vis_fpmerge(vis_read_lo(sdm), vis_read_lo(sdk))
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid IntArgbToIntAbgrConvert_line(mlib_s32 *srcBase,
0N/A mlib_s32 *dstBase,
0N/A mlib_s32 width)
0N/A{
0N/A mlib_s32 *dst_end = dstBase + width;
0N/A mlib_d64 dd;
0N/A mlib_f32 ff;
0N/A
0N/A BMASK_FOR_ARGB
0N/A
0N/A if ((mlib_s32)srcBase & 7) {
0N/A ff = *(mlib_f32*)srcBase;
0N/A ARGB2ABGR_FL(ff)
0N/A *(mlib_f32*)dstBase = ff;
0N/A srcBase++;
0N/A dstBase++;
0N/A }
0N/A
0N/A if ((mlib_s32)dstBase & 7) {
0N/A#pragma pipeloop(0)
0N/A for (; dstBase <= (dst_end - 2); dstBase += 2) {
0N/A dd = *(mlib_d64*)srcBase;
0N/A ARGB2ABGR_DB(dd)
0N/A ((mlib_f32*)dstBase)[0] = vis_read_hi(dd);
0N/A ((mlib_f32*)dstBase)[1] = vis_read_lo(dd);
0N/A srcBase += 2;
0N/A }
0N/A } else {
0N/A#pragma pipeloop(0)
0N/A for (; dstBase <= (dst_end - 2); dstBase += 2) {
0N/A dd = *(mlib_d64*)srcBase;
0N/A ARGB2ABGR_DB(dd)
0N/A *(mlib_d64*)dstBase = dd;
0N/A srcBase += 2;
0N/A }
0N/A }
0N/A
0N/A if (dstBase < dst_end) {
0N/A ff = *(mlib_f32*)srcBase;
0N/A ARGB2ABGR_FL(ff)
0N/A *(mlib_f32*)dstBase = ff;
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(FourByteAbgrToIntArgbConvert)(BLIT_PARAMS)
0N/A{
0N/A mlib_u32 *argb = (mlib_u32 *)dstBase;
0N/A mlib_u8 *pabgr = (mlib_u8 *)srcBase;
0N/A mlib_s32 dstScan = (pDstInfo)->scanStride;
0N/A mlib_s32 srcScan = (pSrcInfo)->scanStride;
0N/A mlib_s32 i, j, count, left;
0N/A mlib_d64 w_abgr;
0N/A
0N/A if (width < 16) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_s32 *dst = dstBase;
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A *dst++ = (src[0] << 24) | (src[3] << 16) |
0N/A (src[2] << 8) | (src[1]);
0N/A src += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A return;
0N/A }
0N/A
0N/A if (dstScan == 4*width && srcScan == dstScan) {
0N/A width *= height;
0N/A height = 1;
0N/A }
0N/A count = width >> 1;
0N/A left = width & 1;
0N/A
0N/A BMASK_FOR_ARGB
0N/A
0N/A if ((((mlib_addr)pabgr & 3) == 0) && ((srcScan & 3) == 0)) {
0N/A mlib_u32 *abgr = (mlib_u32 *)pabgr;
0N/A
0N/A dstScan >>= 2;
0N/A srcScan >>= 2;
0N/A
0N/A for (i = 0; i < height; i++, argb += dstScan, abgr += srcScan) {
0N/A if ((((mlib_addr) argb | (mlib_addr) abgr) & 7) == 0) {
0N/A mlib_d64 *d_abgr = (mlib_d64 *) abgr;
0N/A mlib_d64 *d_argb = (mlib_d64 *) argb;
0N/A
0N/A#pragma pipeloop(0)
0N/A for (j = 0; j < count; j++) {
0N/A w_abgr = d_abgr[j];
0N/A ARGB2ABGR_DB(w_abgr)
0N/A d_argb[j] = w_abgr;
0N/A }
0N/A
0N/A if (left) {
0N/A w_abgr = d_abgr[count];
0N/A ARGB2ABGR_DB(w_abgr)
0N/A ((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);
0N/A }
0N/A } else {
0N/A mlib_f32 v_abgr0, v_abgr1;
0N/A
0N/A#pragma pipeloop(0)
0N/A for (j = 0; j < count; j++) {
0N/A v_abgr0 = ((mlib_f32 *) abgr)[2 * j];
0N/A v_abgr1 = ((mlib_f32 *) abgr)[2 * j + 1];
0N/A w_abgr = vis_freg_pair(v_abgr0, v_abgr1);
0N/A ARGB2ABGR_DB(w_abgr)
0N/A ((mlib_f32 *) argb)[2 * j] = vis_read_hi(w_abgr);
0N/A ((mlib_f32 *) argb)[2 * j + 1] = vis_read_lo(w_abgr);
0N/A }
0N/A
0N/A if (left) {
0N/A v_abgr0 = ((mlib_f32 *) abgr)[2 * count];
0N/A w_abgr = vis_freg_pair(v_abgr0, 0);
0N/A ARGB2ABGR_DB(w_abgr)
0N/A ((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);
0N/A }
0N/A }
0N/A }
0N/A } else { /* abgr is not aligned */
0N/A mlib_u8 *abgr = pabgr;
0N/A mlib_d64 *d_abgr, db0, db1;
0N/A
0N/A dstScan >>= 2;
0N/A
0N/A for (i = 0; i < height; i++, argb += dstScan, abgr += srcScan) {
0N/A d_abgr = vis_alignaddr(abgr, 0);
0N/A db0 = *d_abgr++;
0N/A
0N/A if (((mlib_addr) argb & 7) == 0) {
0N/A mlib_d64 *d_argb = (mlib_d64 *) argb;
0N/A
0N/A#pragma pipeloop(0)
0N/A for (j = 0; j < count; j++) {
0N/A db1 = d_abgr[j];
0N/A w_abgr = vis_faligndata(db0, db1);
0N/A db0 = db1;
0N/A ARGB2ABGR_DB(w_abgr)
0N/A d_argb[j] = w_abgr;
0N/A }
0N/A
0N/A if (left) {
0N/A db1 = d_abgr[j];
0N/A w_abgr = vis_faligndata(db0, db1);
0N/A ARGB2ABGR_DB(w_abgr)
0N/A ((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);
0N/A }
0N/A } else {
0N/A mlib_d64 w_abgr;
0N/A
0N/A db1 = *d_abgr++;
0N/A w_abgr = vis_faligndata(db0, db1);
0N/A db0 = db1;
0N/A#pragma pipeloop(0)
0N/A for (j = 0; j < count; j++) {
0N/A ARGB2ABGR_DB(w_abgr)
0N/A ((mlib_f32 *) argb)[2 * j] = vis_read_hi(w_abgr);
0N/A ((mlib_f32 *) argb)[2 * j + 1] = vis_read_lo(w_abgr);
0N/A db1 = d_abgr[j];
0N/A w_abgr = vis_faligndata(db0, db1);
0N/A db0 = db1;
0N/A }
0N/A
0N/A if (left) {
0N/A ARGB2ABGR_DB(w_abgr)
0N/A ((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);
0N/A }
0N/A }
0N/A }
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(IntArgbToFourByteAbgrConvert)(BLIT_PARAMS)
0N/A{
0N/A mlib_u32 *argb = (mlib_u32 *)srcBase;
0N/A mlib_u8 *abgr = (mlib_u8 *)dstBase;
0N/A mlib_s32 dstScan = (pDstInfo)->scanStride;
0N/A mlib_s32 srcScan = (pSrcInfo)->scanStride;
0N/A mlib_s32 i, j, count, left;
0N/A mlib_d64 w_abgr;
0N/A
0N/A if (width < 16) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_s32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A mlib_u32 x = *src++;
0N/A dst[0] = x >> 24;
0N/A dst[1] = x;
0N/A dst[2] = x >> 8;
0N/A dst[3] = x >> 16;
0N/A dst += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A return;
0N/A }
0N/A
0N/A if (dstScan == 4*width && srcScan == dstScan) {
0N/A width *= height;
0N/A height = 1;
0N/A }
0N/A count = width >> 1;
0N/A left = width & 1;
0N/A
0N/A BMASK_FOR_ARGB
0N/A
0N/A srcScan >>= 2;
0N/A
0N/A for (i = 0; i < height; i++, argb += srcScan, abgr += dstScan) {
0N/A
0N/A if ((((mlib_addr) abgr | (mlib_addr) argb) & 7) == 0) {
0N/A mlib_d64 *d_argb = (mlib_d64 *) argb;
0N/A mlib_d64 *d_abgr = (mlib_d64 *) abgr;
0N/A
0N/A#pragma pipeloop(0)
0N/A for (j = 0; j < count; j++) {
0N/A w_abgr = d_argb[j];
0N/A ARGB2ABGR_DB(w_abgr)
0N/A d_abgr[j] = w_abgr;
0N/A }
0N/A
0N/A if (left) {
0N/A w_abgr = d_argb[count];
0N/A ARGB2ABGR_DB(w_abgr)
0N/A ((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);
0N/A }
0N/A
0N/A } else if (((mlib_addr) abgr & 3) == 0) {
0N/A mlib_f32 v_argb0, v_argb1;
0N/A
0N/A#pragma pipeloop(0)
0N/A for (j = 0; j < count; j++) {
0N/A v_argb0 = ((mlib_f32 *) argb)[2 * j];
0N/A v_argb1 = ((mlib_f32 *) argb)[2 * j + 1];
0N/A w_abgr = vis_freg_pair(v_argb0, v_argb1);
0N/A
0N/A ARGB2ABGR_DB(w_abgr)
0N/A ((mlib_f32 *) abgr)[2 * j] = vis_read_hi(w_abgr);
0N/A ((mlib_f32 *) abgr)[2 * j + 1] = vis_read_lo(w_abgr);
0N/A }
0N/A
0N/A if (left) {
0N/A v_argb0 = ((mlib_f32 *) argb)[2 * count];
0N/A w_abgr = vis_freg_pair(v_argb0, vis_fzeros());
0N/A
0N/A ARGB2ABGR_DB(w_abgr)
0N/A ((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);
0N/A }
0N/A
0N/A } else { /* abgr is not aligned */
0N/A
0N/A mlib_u8 *pend = abgr + (width << 2) - 1;
0N/A mlib_d64 *d_abgr, db0, db1;
0N/A mlib_s32 emask, off;
0N/A mlib_f32 *f_argb = (mlib_f32 *) argb;
0N/A
0N/A off = (mlib_addr)abgr & 7;
0N/A vis_alignaddr((void *)(8 - off), 0);
0N/A d_abgr = (mlib_d64 *) (abgr - off);
0N/A
0N/A db1 = vis_freg_pair(*f_argb++, *f_argb++);
0N/A ARGB2ABGR_DB(db1)
0N/A w_abgr = vis_faligndata(db1, db1);
0N/A emask = vis_edge8(abgr, pend);
0N/A vis_pst_8(w_abgr, d_abgr++, emask);
0N/A db0 = db1;
0N/A
0N/A db1 = vis_freg_pair(f_argb[0], f_argb[1]);
0N/A#pragma pipeloop(0)
0N/A for (; (mlib_addr)d_abgr < (mlib_addr)(pend - 6); ) {
0N/A ARGB2ABGR_DB(db1)
0N/A w_abgr = vis_faligndata(db0, db1);
0N/A *d_abgr++ = w_abgr;
0N/A db0 = db1;
0N/A f_argb += 2;
0N/A db1 = vis_freg_pair(f_argb[0], f_argb[1]);
0N/A }
0N/A
0N/A if ((mlib_addr)d_abgr <= (mlib_addr)pend) {
0N/A ARGB2ABGR_DB(db1)
0N/A w_abgr = vis_faligndata(db0, db1);
0N/A emask = vis_edge8(d_abgr, pend);
0N/A vis_pst_8(w_abgr, d_abgr, emask);
0N/A }
0N/A }
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(IntRgbToFourByteAbgrConvert)(BLIT_PARAMS)
0N/A{
0N/A mlib_u32 *argb = (mlib_u32 *)srcBase;
0N/A mlib_u8 *abgr = (mlib_u8 *)dstBase;
0N/A mlib_s32 dstScan = (pDstInfo)->scanStride;
0N/A mlib_s32 srcScan = (pSrcInfo)->scanStride;
0N/A mlib_s32 i, j, count, left;
0N/A mlib_d64 w_abgr;
0N/A mlib_d64 amask = vis_to_double_dup(0xFF000000);
0N/A
0N/A if (width < 16) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_s32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A mlib_u32 x = *src++;
0N/A dst[0] = 0xFF;
0N/A dst[1] = x;
0N/A dst[2] = x >> 8;
0N/A dst[3] = x >> 16;
0N/A dst += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A return;
0N/A }
0N/A
0N/A if (dstScan == 4*width && srcScan == dstScan) {
0N/A width *= height;
0N/A height = 1;
0N/A }
0N/A count = width >> 1;
0N/A left = width & 1;
0N/A
0N/A BMASK_FOR_ARGB
0N/A
0N/A srcScan >>= 2;
0N/A
0N/A for (i = 0; i < height; i++, argb += srcScan, abgr += dstScan) {
0N/A
0N/A if ((((mlib_addr) abgr | (mlib_addr) argb) & 7) == 0) {
0N/A mlib_d64 *d_argb = (mlib_d64 *) argb;
0N/A mlib_d64 *d_abgr = (mlib_d64 *) abgr;
0N/A
0N/A#pragma pipeloop(0)
0N/A for (j = 0; j < count; j++) {
0N/A w_abgr = d_argb[j];
0N/A RGB2ABGR_DB(w_abgr)
0N/A d_abgr[j] = w_abgr;
0N/A }
0N/A
0N/A if (left) {
0N/A w_abgr = d_argb[count];
0N/A RGB2ABGR_DB(w_abgr)
0N/A ((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);
0N/A }
0N/A
0N/A } else if (((mlib_addr) abgr & 3) == 0) {
0N/A mlib_f32 v_argb0, v_argb1;
0N/A
0N/A#pragma pipeloop(0)
0N/A for (j = 0; j < count; j++) {
0N/A v_argb0 = ((mlib_f32 *) argb)[2 * j];
0N/A v_argb1 = ((mlib_f32 *) argb)[2 * j + 1];
0N/A w_abgr = vis_freg_pair(v_argb0, v_argb1);
0N/A
0N/A RGB2ABGR_DB(w_abgr)
0N/A ((mlib_f32 *) abgr)[2 * j] = vis_read_hi(w_abgr);
0N/A ((mlib_f32 *) abgr)[2 * j + 1] = vis_read_lo(w_abgr);
0N/A }
0N/A
0N/A if (left) {
0N/A v_argb0 = ((mlib_f32 *) argb)[2 * count];
0N/A w_abgr = vis_freg_pair(v_argb0, vis_fzeros());
0N/A
0N/A RGB2ABGR_DB(w_abgr)
0N/A ((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);
0N/A }
0N/A
0N/A } else { /* abgr is not aligned */
0N/A
0N/A mlib_u8 *pend = abgr + (width << 2) - 1;
0N/A mlib_d64 *d_abgr, db0, db1;
0N/A mlib_s32 emask, off;
0N/A mlib_f32 *f_argb = (mlib_f32 *) argb;
0N/A
0N/A off = (mlib_addr)abgr & 7;
0N/A vis_alignaddr((void *)(8 - off), 0);
0N/A d_abgr = (mlib_d64 *) (abgr - off);
0N/A
0N/A db1 = vis_freg_pair(*f_argb++, *f_argb++);
0N/A RGB2ABGR_DB(db1)
0N/A w_abgr = vis_faligndata(db1, db1);
0N/A emask = vis_edge8(abgr, pend);
0N/A vis_pst_8(w_abgr, d_abgr++, emask);
0N/A db0 = db1;
0N/A
0N/A db1 = vis_freg_pair(f_argb[0], f_argb[1]);
0N/A#pragma pipeloop(0)
0N/A for (; (mlib_addr)d_abgr < (mlib_addr)(pend - 6); ) {
0N/A RGB2ABGR_DB(db1)
0N/A w_abgr = vis_faligndata(db0, db1);
0N/A *d_abgr++ = w_abgr;
0N/A db0 = db1;
0N/A f_argb += 2;
0N/A db1 = vis_freg_pair(f_argb[0], f_argb[1]);
0N/A }
0N/A
0N/A if ((mlib_addr)d_abgr <= (mlib_addr)pend) {
0N/A RGB2ABGR_DB(db1)
0N/A w_abgr = vis_faligndata(db0, db1);
0N/A emask = vis_edge8(d_abgr, pend);
0N/A vis_pst_8(w_abgr, d_abgr, emask);
0N/A }
0N/A }
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(ThreeByteBgrToFourByteAbgrConvert)(BLIT_PARAMS)
0N/A{
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_d64 sd0, sd1, sd2;
0N/A mlib_d64 dd0, dd1, dd2, dd3;
0N/A mlib_d64 sda, sdb, sdc, sdd;
0N/A mlib_d64 sde, sdf, sdg, sdh;
0N/A mlib_d64 sdi, sdj, sdk, sdl;
0N/A mlib_d64 sdm;
0N/A mlib_d64 sFF;
0N/A mlib_s32 r, g, b;
0N/A mlib_s32 i, j;
0N/A
0N/A if (width < 16) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A#pragma pipeloop(0)
0N/A for (i = 0; i < width; i++) {
0N/A dst[0] = 0xFF;
0N/A dst[1] = src[0];
0N/A dst[2] = src[1];
0N/A dst[3] = src[2];
0N/A src += 3;
0N/A dst += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A return;
0N/A }
0N/A
0N/A if (dstScan == 4*width && srcScan == 3*width) {
0N/A width *= height;
0N/A height = 1;
0N/A }
0N/A
0N/A sFF = vis_fone();
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *pSrc = srcBase;
0N/A mlib_u8 *pDst = dstBase;
0N/A
0N/A if (!(((mlib_s32)pSrc | (mlib_s32)pDst) & 7)) {
0N/A#pragma pipeloop(0)
0N/A for (i = 0; i <= ((mlib_s32)width - 8); i += 8) {
0N/A sd0 = ((mlib_d64*)pSrc)[0];
0N/A sd1 = ((mlib_d64*)pSrc)[1];
0N/A sd2 = ((mlib_d64*)pSrc)[2];
0N/A pSrc += 3*8;
0N/A INSERT_U8_34R;
0N/A ((mlib_d64*)pDst)[0] = dd0;
0N/A ((mlib_d64*)pDst)[1] = dd1;
0N/A ((mlib_d64*)pDst)[2] = dd2;
0N/A ((mlib_d64*)pDst)[3] = dd3;
0N/A pDst += 4*8;
0N/A }
0N/A
0N/A for (; i < width; i++) {
0N/A b = pSrc[0];
0N/A g = pSrc[1];
0N/A r = pSrc[2];
0N/A ((mlib_u16*)pDst)[0] = 0xff00 | b;
0N/A ((mlib_u16*)pDst)[1] = (g << 8) | r;
0N/A pSrc += 3;
0N/A pDst += 4;
0N/A }
0N/A } else if (!((mlib_s32)pDst & 1)) {
0N/A#pragma pipeloop(0)
0N/A for (i = 0; i < width; i++) {
0N/A b = pSrc[0];
0N/A g = pSrc[1];
0N/A r = pSrc[2];
0N/A ((mlib_u16*)pDst)[0] = 0xff00 | b;
0N/A ((mlib_u16*)pDst)[1] = (g << 8) | r;
0N/A pSrc += 3;
0N/A pDst += 4;
0N/A }
0N/A } else {
0N/A *pDst++ = 0xff;
0N/A#pragma pipeloop(0)
0N/A for (i = 0; i < (mlib_s32)width - 1; i++) {
0N/A b = pSrc[0];
0N/A g = pSrc[1];
0N/A r = pSrc[2];
0N/A ((mlib_u16*)pDst)[0] = (b << 8) | g;
0N/A ((mlib_u16*)pDst)[1] = (r << 8) | 0xff;
0N/A pSrc += 3;
0N/A pDst += 4;
0N/A }
0N/A if (width) {
0N/A pDst[0] = pSrc[0];
0N/A pDst[1] = pSrc[1];
0N/A pDst[2] = pSrc[2];
0N/A }
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/A#if 1
0N/A
0N/A#define LOAD_BGR(dd) { \
0N/A mlib_u8 *sp = pSrc - 1 + 3*(tmpsxloc >> shift); \
0N/A mlib_d64 *ap = (void*)((mlib_addr)sp &~ 7); \
0N/A vis_alignaddr(sp, 0); \
0N/A dd = vis_faligndata(ap[0], ap[1]); \
0N/A tmpsxloc += sxinc; \
0N/A}
0N/A
0N/A#else
0N/A
0N/A#define LOAD_BGR(dd) { \
0N/A mlib_u8 *sp = pSrc + 3*(tmpsxloc >> shift); \
0N/A dd = vis_faligndata(vis_ld_u8(sp + 2), dd); \
0N/A dd = vis_faligndata(vis_ld_u8(sp + 1), dd); \
0N/A dd = vis_faligndata(vis_ld_u8(sp ), dd); \
0N/A dd = vis_faligndata(amask, dd); \
0N/A tmpsxloc += sxinc; \
0N/A}
0N/A
0N/A#endif
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(ThreeByteBgrToFourByteAbgrScaleConvert)(SCALE_PARAMS)
0N/A{
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_d64 d0;
0N/A mlib_d64 amask;
0N/A mlib_s32 r, g, b;
0N/A mlib_s32 i, j;
0N/A
0N/A if (width < 16 /*|| (((mlib_s32)dstBase | dstScan) & 3)*/) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *pSrc = srcBase;
0N/A mlib_u8 *pDst = dstBase;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A
0N/A PTR_ADD(pSrc, (syloc >> shift) * srcScan);
0N/A
0N/A#pragma pipeloop(0)
0N/A for (i = 0; i < width; i++) {
0N/A mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);
0N/A pDst[0] = 0xff;
0N/A pDst[1] = pp[0];
0N/A pDst[2] = pp[1];
0N/A pDst[3] = pp[2];
0N/A tmpsxloc += sxinc;
0N/A pDst += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A return;
0N/A }
0N/A
0N/A vis_alignaddr(NULL, 7);
0N/A amask = vis_to_double_dup(0xFF000000);
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *pSrc = srcBase;
0N/A mlib_u8 *pDst = dstBase;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A
0N/A PTR_ADD(pSrc, (syloc >> shift) * srcScan);
0N/A
0N/A if (!((mlib_s32)pDst & 3)) {
0N/A#pragma pipeloop(0)
0N/A for (i = 0; i < width; i++) {
0N/A LOAD_BGR(d0);
0N/A ((mlib_f32*)pDst)[0] = vis_fors(vis_read_hi(d0),
0N/A vis_read_hi(amask));
0N/A pDst += 4;
0N/A }
0N/A } else if (!((mlib_s32)pDst & 1)) {
0N/A#pragma pipeloop(0)
0N/A for (i = 0; i < width; i++) {
0N/A mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);
0N/A tmpsxloc += sxinc;
0N/A b = pp[0];
0N/A g = pp[1];
0N/A r = pp[2];
0N/A ((mlib_u16*)pDst)[2*i ] = 0xff00 | b;
0N/A ((mlib_u16*)pDst)[2*i + 1] = (g << 8) | r;
0N/A }
0N/A } else {
0N/A *pDst++ = 0xff;
0N/A#pragma pipeloop(0)
0N/A for (i = 0; i < (mlib_s32)width - 1; i++) {
0N/A mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);
0N/A tmpsxloc += sxinc;
0N/A b = pp[0];
0N/A g = pp[1];
0N/A r = pp[2];
0N/A ((mlib_u16*)pDst)[2*i ] = (b << 8) | g;
0N/A ((mlib_u16*)pDst)[2*i + 1] = (r << 8) | 0xff;
0N/A }
0N/A if (width) {
0N/A mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);
0N/A tmpsxloc += sxinc;
0N/A pDst[4*i ] = pp[0];
0N/A pDst[4*i+1] = pp[1];
0N/A pDst[4*i+2] = pp[2];
0N/A }
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(ByteGrayToFourByteAbgrConvert)(BLIT_PARAMS)
0N/A{
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_d64 d0, d1, d2, d3;
0N/A mlib_f32 ff, aa = vis_fones();
0N/A mlib_s32 i, j, x;
0N/A
0N/A if (!(((mlib_s32)dstBase | dstScan) & 3)) {
0N/A ADD_SUFF(ByteGrayToIntArgbConvert)(BLIT_CALL_PARAMS);
0N/A return;
0N/A }
0N/A
0N/A if (width < 16) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A x = *src++;
0N/A dst[0] = 0xff;
0N/A dst[1] = x;
0N/A dst[2] = x;
0N/A dst[3] = x;
0N/A dst += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A return;
0N/A }
0N/A
0N/A if (srcScan == width && dstScan == 4*width) {
0N/A width *= height;
0N/A height = 1;
0N/A }
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_u8 *dst_end;
0N/A
0N/A dst_end = dst + 4*width;
0N/A
0N/A while (((mlib_s32)src & 3) && dst < dst_end) {
0N/A x = *src++;
0N/A dst[0] = 0xff;
0N/A dst[1] = x;
0N/A dst[2] = x;
0N/A dst[3] = x;
0N/A dst += 4;
0N/A }
0N/A
0N/A if (!((mlib_s32)dst & 3)) {
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 4*4); dst += 4*4) {
0N/A ff = *(mlib_f32*)src;
0N/A d0 = vis_fpmerge(aa, ff);
0N/A d1 = vis_fpmerge(ff, ff);
0N/A d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
0N/A d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
0N/A ((mlib_f32*)dst)[0] = vis_read_hi(d2);
0N/A ((mlib_f32*)dst)[1] = vis_read_lo(d2);
0N/A ((mlib_f32*)dst)[2] = vis_read_hi(d3);
0N/A ((mlib_f32*)dst)[3] = vis_read_lo(d3);
0N/A src += 4;
0N/A }
0N/A } else {
0N/A mlib_d64 *dp;
0N/A
0N/A dp = vis_alignaddr(dst, 0);
0N/A d3 = vis_faligndata(dp[0], dp[0]);
0N/A vis_alignaddrl(dst, 0);
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 4*4); dst += 4*4) {
0N/A ff = *(mlib_f32*)src;
0N/A d0 = vis_fpmerge(aa, ff);
0N/A d1 = vis_fpmerge(ff, ff);
0N/A d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
0N/A *dp++ = vis_faligndata(d3, d2);
0N/A d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
0N/A *dp++ = vis_faligndata(d2, d3);
0N/A src += 4;
0N/A }
0N/A
0N/A vis_pst_8(vis_faligndata(d3, d3), dp, vis_edge8(dp, dst - 1));
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A x = *src++;
0N/A dst[0] = 0xff;
0N/A dst[1] = x;
0N/A dst[2] = x;
0N/A dst[3] = x;
0N/A dst += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(IntArgbToFourByteAbgrXorBlit)(BLIT_PARAMS)
0N/A{
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_u32 xorpixel = pCompInfo->details.xorPixel;
0N/A mlib_u32 alphamask = pCompInfo->alphaMask;
0N/A mlib_d64 dd, d_xorpixel, d_alphamask, d_zero;
0N/A mlib_s32 i, j, x, neg_mask;
0N/A
0N/A if (width < 16) {
0N/A xorpixel = (xorpixel << 24) | (xorpixel >> 8);
0N/A alphamask = (alphamask << 24) | (alphamask >> 8);
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_s32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A x = src[i];
0N/A neg_mask = x >> 31;
0N/A x = (x ^ xorpixel) & (neg_mask &~ alphamask);
0N/A dst[0] ^= x >> 24;
0N/A dst[1] ^= x;
0N/A dst[2] ^= x >> 8;
0N/A dst[3] ^= x >> 16;
0N/A dst += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A return;
0N/A }
0N/A
0N/A if (srcScan == 4*width && dstScan == 4*width) {
0N/A width *= height;
0N/A height = 1;
0N/A }
0N/A
0N/A d_zero = vis_fzero();
0N/A d_xorpixel = vis_freg_pair(vis_ldfa_ASI_PL(&xorpixel),
0N/A vis_ldfa_ASI_PL(&xorpixel));
0N/A d_alphamask = vis_freg_pair(vis_ldfa_ASI_PL(&alphamask),
0N/A vis_ldfa_ASI_PL(&alphamask));
0N/A
0N/A dd = vis_freg_pair(vis_read_hi(d_xorpixel), vis_read_hi(d_alphamask));
0N/A ARGB2ABGR_DB(dd)
0N/A xorpixel = ((mlib_s32*)&dd)[0];
0N/A alphamask = ((mlib_s32*)&dd)[1];
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_s32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_u8 *dst_end;
0N/A
0N/A dst_end = dst + 4*width;
0N/A
0N/A if (!((mlib_s32)dst & 7)) {
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 8); dst += 8) {
0N/A dd = vis_freg_pair(((mlib_f32*)src)[0], ((mlib_f32*)src)[1]);
0N/A src += 2;
0N/A neg_mask = vis_fcmplt32(dd, d_zero);
0N/A ARGB2ABGR_DB(dd)
0N/A dd = vis_fxor(dd, d_xorpixel);
0N/A dd = vis_fandnot(d_alphamask, dd);
0N/A dd = vis_fxor(dd, *(mlib_d64*)dst);
0N/A vis_pst_32(dd, dst, neg_mask);
0N/A }
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A x = *src++;
0N/A neg_mask = x >> 31;
0N/A x = (x ^ xorpixel) & (neg_mask &~ alphamask);
0N/A dst[0] ^= x >> 24;
0N/A dst[1] ^= x;
0N/A dst[2] ^= x >> 8;
0N/A dst[3] ^= x >> 16;
0N/A dst += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(ByteGrayToFourByteAbgrScaleConvert)(SCALE_PARAMS)
0N/A{
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_d64 d0, d1, d2, d3, dd;
0N/A mlib_f32 ff, aa;
0N/A mlib_s32 i, j, x;
0N/A
0N/A/* if (!(((mlib_s32)dstBase | dstScan) & 3)) {
0N/A ADD_SUFF(ByteGrayToIntArgbScaleConvert)(SCALE_CALL_PARAMS);
0N/A return;
0N/A }*/
0N/A
0N/A if (width < 16) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A
0N/A PTR_ADD(src, (syloc >> shift) * srcScan);
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A x = src[tmpsxloc >> shift];
0N/A tmpsxloc += sxinc;
0N/A dst[4*i ] = 0xff;
0N/A dst[4*i + 1] = x;
0N/A dst[4*i + 2] = x;
0N/A dst[4*i + 3] = x;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A return;
0N/A }
0N/A
0N/A aa = vis_fones();
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_u8 *dst_end;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A
0N/A PTR_ADD(src, (syloc >> shift) * srcScan);
0N/A
0N/A dst_end = dst + 4*width;
0N/A
0N/A if (!((mlib_s32)dst & 3)) {
0N/A vis_alignaddr(NULL, 7);
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 4*4); dst += 4*4) {
0N/A LOAD_NEXT_U8(dd, src + ((tmpsxloc + 3*sxinc) >> shift));
0N/A LOAD_NEXT_U8(dd, src + ((tmpsxloc + 2*sxinc) >> shift));
0N/A LOAD_NEXT_U8(dd, src + ((tmpsxloc + sxinc) >> shift));
0N/A LOAD_NEXT_U8(dd, src + ((tmpsxloc ) >> shift));
0N/A tmpsxloc += 4*sxinc;
0N/A ff = vis_read_hi(dd);
0N/A d0 = vis_fpmerge(aa, ff);
0N/A d1 = vis_fpmerge(ff, ff);
0N/A d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
0N/A d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
0N/A ((mlib_f32*)dst)[0] = vis_read_hi(d2);
0N/A ((mlib_f32*)dst)[1] = vis_read_lo(d2);
0N/A ((mlib_f32*)dst)[2] = vis_read_hi(d3);
0N/A ((mlib_f32*)dst)[3] = vis_read_lo(d3);
0N/A }
0N/A } else {
0N/A mlib_d64 *dp;
0N/A
0N/A dp = vis_alignaddr(dst, 0);
0N/A d3 = vis_faligndata(dp[0], dp[0]);
0N/A vis_alignaddrl(dst, 0);
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 4*4); dst += 4*4) {
0N/A mlib_d64 s0, s1, s2, s3;
0N/A s0 = vis_ld_u8(src + ((tmpsxloc ) >> shift));
0N/A s1 = vis_ld_u8(src + ((tmpsxloc + sxinc) >> shift));
0N/A s2 = vis_ld_u8(src + ((tmpsxloc + 2*sxinc) >> shift));
0N/A s3 = vis_ld_u8(src + ((tmpsxloc + 3*sxinc) >> shift));
0N/A tmpsxloc += 4*sxinc;
0N/A s0 = vis_fpmerge(vis_read_lo(s0), vis_read_lo(s2));
0N/A s1 = vis_fpmerge(vis_read_lo(s1), vis_read_lo(s3));
0N/A dd = vis_fpmerge(vis_read_lo(s0), vis_read_lo(s1));
0N/A ff = vis_read_lo(dd);
0N/A d0 = vis_fpmerge(aa, ff);
0N/A d1 = vis_fpmerge(ff, ff);
0N/A d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
0N/A *dp++ = vis_faligndata(d3, d2);
0N/A d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
0N/A *dp++ = vis_faligndata(d2, d3);
0N/A }
0N/A
0N/A vis_pst_8(vis_faligndata(d3, d3), dp, vis_edge8(dp, dst - 1));
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A x = src[tmpsxloc >> shift];
0N/A tmpsxloc += sxinc;
0N/A dst[0] = 0xff;
0N/A dst[1] = x;
0N/A dst[2] = x;
0N/A dst[3] = x;
0N/A dst += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(ByteIndexedToFourByteAbgrConvert)(BLIT_PARAMS)
0N/A{
0N/A jint *pixLut = pSrcInfo->lutBase;
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_d64 dd, d_old;
0N/A mlib_s32 i, j, x;
0N/A
0N/A/* if (!(((mlib_s32)dstBase | dstScan) & 3)) {
0N/A ADD_SUFF(ByteIndexedToIntAbgrConvert)(BLIT_CALL_PARAMS);
0N/A return;
0N/A }*/
0N/A
0N/A if (width < 8) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A x = pixLut[src[i]];
0N/A dst[4*i ] = x >> 24;
0N/A dst[4*i + 1] = x;
0N/A dst[4*i + 2] = x >> 8;
0N/A dst[4*i + 3] = x >> 16;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A return;
0N/A }
0N/A
0N/A if (srcScan == width && dstScan == 4*width) {
0N/A width *= height;
0N/A height = 1;
0N/A }
0N/A
0N/A BMASK_FOR_ARGB
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_u8 *dst_end;
0N/A
0N/A dst_end = dst + 4*width;
0N/A
0N/A if (!((mlib_s32)dst & 7)) {
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 2*4); dst += 2*4) {
0N/A dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],
0N/A ((mlib_f32*)pixLut)[src[1]]);
0N/A ARGB2ABGR_DB(dd)
0N/A *(mlib_d64*)dst = dd;
0N/A src += 2;
0N/A }
0N/A } else {
0N/A mlib_d64 *dp;
0N/A
0N/A dp = vis_alignaddr(dst, 0);
0N/A dd = vis_faligndata(dp[0], dp[0]);
0N/A vis_alignaddrl(dst, 0);
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 2*4); dst += 2*4) {
0N/A d_old = dd;
0N/A dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],
0N/A ((mlib_f32*)pixLut)[src[1]]);
0N/A ARGB2ABGR_DB(dd)
0N/A *dp++ = vis_faligndata(d_old, dd);
0N/A src += 2;
0N/A }
0N/A
0N/A vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A x = pixLut[*src++];
0N/A dst[0] = x >> 24;
0N/A dst[1] = x;
0N/A dst[2] = x >> 8;
0N/A dst[3] = x >> 16;
0N/A dst += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(ByteIndexedBmToFourByteAbgrXparOver)(BLIT_PARAMS)
0N/A{
0N/A jint *pixLut = pSrcInfo->lutBase;
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_d64 dd, dzero;
0N/A mlib_s32 i, j, x, mask;
0N/A
0N/A/* if (!(((mlib_s32)dstBase | dstScan) & 3)) {
0N/A ADD_SUFF(ByteIndexedToIntAbgrConvert)(BLIT_CALL_PARAMS);
0N/A return;
0N/A }*/
0N/A
0N/A if (width < 8) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A x = pixLut[src[i]];
0N/A if (x < 0) {
0N/A dst[4*i ] = x >> 24;
0N/A dst[4*i + 1] = x;
0N/A dst[4*i + 2] = x >> 8;
0N/A dst[4*i + 3] = x >> 16;
0N/A }
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A return;
0N/A }
0N/A
0N/A if (srcScan == width && dstScan == 4*width) {
0N/A width *= height;
0N/A height = 1;
0N/A }
0N/A
0N/A BMASK_FOR_ARGB
0N/A
0N/A dzero = vis_fzero();
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_u8 *dst_end;
0N/A
0N/A dst_end = dst + 4*width;
0N/A
0N/A if (!((mlib_s32)dst & 7)) {
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 2*4); dst += 2*4) {
0N/A dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],
0N/A ((mlib_f32*)pixLut)[src[1]]);
0N/A mask = vis_fcmplt32(dd, dzero);
0N/A ARGB2ABGR_DB(dd)
0N/A vis_pst_32(dd, dst, mask);
0N/A src += 2;
0N/A }
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A x = pixLut[*src++];
0N/A if (x < 0) {
0N/A dst[0] = x >> 24;
0N/A dst[1] = x;
0N/A dst[2] = x >> 8;
0N/A dst[3] = x >> 16;
0N/A }
0N/A dst += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(ByteIndexedBmToFourByteAbgrXparBgCopy)(BCOPY_PARAMS)
0N/A{
0N/A jint *pixLut = pSrcInfo->lutBase;
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_d64 dd, dzero, d_bgpixel;
0N/A mlib_s32 i, j, x, mask;
0N/A mlib_s32 bgpix0 = bgpixel;
0N/A mlib_s32 bgpix1 = bgpixel >> 8;
0N/A mlib_s32 bgpix2 = bgpixel >> 16;
0N/A mlib_s32 bgpix3 = bgpixel >> 24;
0N/A
0N/A if (width < 8) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A x = pixLut[src[i]];
0N/A if (x < 0) {
0N/A dst[4*i ] = x >> 24;
0N/A dst[4*i + 1] = x;
0N/A dst[4*i + 2] = x >> 8;
0N/A dst[4*i + 3] = x >> 16;
0N/A } else {
0N/A dst[4*i ] = bgpix0;
0N/A dst[4*i + 1] = bgpix1;
0N/A dst[4*i + 2] = bgpix2;
0N/A dst[4*i + 3] = bgpix3;
0N/A }
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A return;
0N/A }
0N/A
0N/A if (srcScan == width && dstScan == 4*width) {
0N/A width *= height;
0N/A height = 1;
0N/A }
0N/A
0N/A BMASK_FOR_ARGB
0N/A
0N/A dzero = vis_fzero();
0N/A d_bgpixel = vis_freg_pair(vis_ldfa_ASI_PL(&bgpixel),
0N/A vis_ldfa_ASI_PL(&bgpixel));
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_u8 *dst_end;
0N/A
0N/A dst_end = dst + 4*width;
0N/A
0N/A if (!((mlib_s32)dst & 7)) {
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 2*4); dst += 2*4) {
0N/A dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],
0N/A ((mlib_f32*)pixLut)[src[1]]);
0N/A mask = vis_fcmplt32(dd, dzero);
0N/A ARGB2ABGR_DB(dd)
0N/A *(mlib_d64*)dst = d_bgpixel;
0N/A vis_pst_32(dd, dst, mask);
0N/A src += 2;
0N/A }
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A x = pixLut[*src++];
0N/A if (x < 0) {
0N/A dst[0] = x >> 24;
0N/A dst[1] = x;
0N/A dst[2] = x >> 8;
0N/A dst[3] = x >> 16;
0N/A } else {
0N/A dst[0] = bgpix0;
0N/A dst[1] = bgpix1;
0N/A dst[2] = bgpix2;
0N/A dst[3] = bgpix3;
0N/A }
0N/A dst += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(ByteIndexedToFourByteAbgrScaleConvert)(SCALE_PARAMS)
0N/A{
0N/A jint *pixLut = pSrcInfo->lutBase;
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_d64 dd, d_old;
0N/A mlib_s32 i, j, x;
0N/A
0N/A/*
0N/A if (!(((mlib_s32)dstBase | dstScan) & 3)) {
0N/A ADD_SUFF(ByteIndexedToIntAbgrScaleConvert)(SCALE_CALL_PARAMS);
0N/A return;
0N/A }
0N/A*/
0N/A
0N/A if (width < 8) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A
0N/A PTR_ADD(src, (syloc >> shift) * srcScan);
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A x = pixLut[src[tmpsxloc >> shift]];
0N/A tmpsxloc += sxinc;
0N/A dst[4*i ] = x >> 24;
0N/A dst[4*i + 1] = x;
0N/A dst[4*i + 2] = x >> 8;
0N/A dst[4*i + 3] = x >> 16;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A return;
0N/A }
0N/A
0N/A BMASK_FOR_ARGB
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_u8 *dst_end;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A
0N/A PTR_ADD(src, (syloc >> shift) * srcScan);
0N/A
0N/A dst_end = dst + 4*width;
0N/A
0N/A if (!((mlib_s32)dst & 7)) {
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 2*4); dst += 2*4) {
0N/A dd = LOAD_2F32(pixLut, src[tmpsxloc >> shift],
0N/A src[(tmpsxloc + sxinc) >> shift]);
0N/A tmpsxloc += 2*sxinc;
0N/A ARGB2ABGR_DB(dd)
0N/A *(mlib_d64*)dst = dd;
0N/A }
0N/A } else {
0N/A mlib_d64 *dp;
0N/A
0N/A dp = vis_alignaddr(dst, 0);
0N/A dd = vis_faligndata(dp[0], dp[0]);
0N/A vis_alignaddrl(dst, 0);
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 2*4); dst += 2*4) {
0N/A d_old = dd;
0N/A dd = LOAD_2F32(pixLut, src[tmpsxloc >> shift],
0N/A src[(tmpsxloc + sxinc) >> shift]);
0N/A tmpsxloc += 2*sxinc;
0N/A ARGB2ABGR_DB(dd)
0N/A *dp++ = vis_faligndata(d_old, dd);
0N/A }
0N/A
0N/A vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A x = pixLut[src[tmpsxloc >> shift]];
0N/A tmpsxloc += sxinc;
0N/A dst[0] = x >> 24;
0N/A dst[1] = x;
0N/A dst[2] = x >> 8;
0N/A dst[3] = x >> 16;
0N/A dst += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(ByteIndexedBmToFourByteAbgrScaleXparOver)(SCALE_PARAMS)
0N/A{
0N/A jint *pixLut = pSrcInfo->lutBase;
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_d64 dd, dzero;
0N/A mlib_s32 i, j, x, mask;
0N/A
0N/A/*
0N/A if (!(((mlib_s32)dstBase | dstScan) & 3)) {
0N/A ADD_SUFF(ByteIndexedToIntAbgrScaleConvert)(SCALE_CALL_PARAMS);
0N/A return;
0N/A }
0N/A*/
0N/A
0N/A if (width < 8) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A
0N/A PTR_ADD(src, (syloc >> shift) * srcScan);
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A x = pixLut[src[tmpsxloc >> shift]];
0N/A tmpsxloc += sxinc;
0N/A if (x < 0) {
0N/A dst[4*i ] = x >> 24;
0N/A dst[4*i + 1] = x;
0N/A dst[4*i + 2] = x >> 8;
0N/A dst[4*i + 3] = x >> 16;
0N/A }
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A return;
0N/A }
0N/A
0N/A BMASK_FOR_ARGB
0N/A
0N/A dzero = vis_fzero();
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_u8 *dst_end;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A
0N/A PTR_ADD(src, (syloc >> shift) * srcScan);
0N/A
0N/A dst_end = dst + 4*width;
0N/A
0N/A if (!((mlib_s32)dst & 7)) {
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 2*4); dst += 2*4) {
0N/A dd = LOAD_2F32(pixLut, src[tmpsxloc >> shift],
0N/A src[(tmpsxloc + sxinc) >> shift]);
0N/A tmpsxloc += 2*sxinc;
0N/A mask = vis_fcmplt32(dd, dzero);
0N/A ARGB2ABGR_DB(dd)
0N/A vis_pst_32(dd, dst, mask);
0N/A }
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A x = pixLut[src[tmpsxloc >> shift]];
0N/A tmpsxloc += sxinc;
0N/A if (x < 0) {
0N/A dst[0] = x >> 24;
0N/A dst[1] = x;
0N/A dst[2] = x >> 8;
0N/A dst[3] = x >> 16;
0N/A }
0N/A dst += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(IntArgbBmToFourByteAbgrScaleXparOver)(SCALE_PARAMS)
0N/A{
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_d64 dd, amask;
0N/A mlib_s32 i, j, x, mask;
0N/A
0N/A if (width < 16) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_s32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A
0N/A PTR_ADD(src, (syloc >> shift) * srcScan);
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A x = src[tmpsxloc >> shift];
0N/A tmpsxloc += sxinc;
0N/A if (x >> 24) {
0N/A dst[4*i ] = 0xFF;
0N/A dst[4*i + 1] = x;
0N/A dst[4*i + 2] = x >> 8;
0N/A dst[4*i + 3] = x >> 16;
0N/A }
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A return;
0N/A }
0N/A
0N/A BMASK_FOR_ARGB
0N/A
0N/A amask = vis_to_double_dup(0xFF000000);
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_s32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_u8 *dst_end;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A
0N/A PTR_ADD(src, (syloc >> shift) * srcScan);
0N/A
0N/A dst_end = dst + 4*width;
0N/A
0N/A if (!((mlib_s32)dst & 7)) {
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 2*4); dst += 2*4) {
0N/A mlib_s32 *pp0 = src + (tmpsxloc >> shift);
0N/A mlib_s32 *pp1 = src + ((tmpsxloc + sxinc) >> shift);
0N/A dd = vis_freg_pair(*(mlib_f32*)pp0, *(mlib_f32*)pp1);
0N/A tmpsxloc += 2*sxinc;
0N/A ARGB2ABGR_DB(dd)
0N/A dd = vis_for(dd, amask);
0N/A mask = (((-*(mlib_u8*)pp0) >> 31) & 2) |
0N/A (((-*(mlib_u8*)pp1) >> 31) & 1);
0N/A vis_pst_32(dd, dst, mask);
0N/A }
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A x = src[tmpsxloc >> shift];
0N/A tmpsxloc += sxinc;
0N/A if (x >> 24) {
0N/A dst[0] = 0xFF;
0N/A dst[1] = x;
0N/A dst[2] = x >> 8;
0N/A dst[3] = x >> 16;
0N/A }
0N/A dst += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/A#ifdef MLIB_ADD_SUFF
0N/A#pragma weak IntArgbBmToFourByteAbgrPreScaleXparOver_F = \
0N/A IntArgbBmToFourByteAbgrScaleXparOver_F
0N/A#else
0N/A#pragma weak IntArgbBmToFourByteAbgrPreScaleXparOver = \
0N/A IntArgbBmToFourByteAbgrScaleXparOver
0N/A#endif
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(FourByteAbgrToIntArgbScaleConvert)(SCALE_PARAMS)
0N/A{
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_s32 i, j;
0N/A
0N/A if (width < 16) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_s32 *dst = dstBase;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A
0N/A PTR_ADD(src, (syloc >> shift) * srcScan);
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A mlib_u8 *pp = src + 4*(tmpsxloc >> shift);
0N/A *dst++ = (pp[0] << 24) | (pp[3] << 16) | (pp[2] << 8) | pp[1];
0N/A tmpsxloc += sxinc;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A return;
0N/A }
0N/A
0N/A BMASK_FOR_ARGB
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_s32 *dst = dstBase;
0N/A mlib_s32 *dst_end = dst + width;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A mlib_s32 off;
0N/A mlib_d64 dd, dd0, dd1;
0N/A mlib_f32 *pp0, *pp1;
0N/A
0N/A PTR_ADD(src, (syloc >> shift) * srcScan);
0N/A
0N/A if ((mlib_s32)dst & 7) {
0N/A mlib_u8 *pp = src + 4*(tmpsxloc >> shift);
0N/A *dst++ = (pp[0] << 24) | (pp[3] << 16) | (pp[2] << 8) | pp[1];
0N/A tmpsxloc += sxinc;
0N/A }
0N/A
0N/A off = (mlib_s32)src & 3;
0N/A if (!off) {
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 2); dst += 2) {
0N/A pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
0N/A pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
0N/A tmpsxloc += 2*sxinc;
0N/A dd = vis_freg_pair(pp0[0], pp1[0]);
0N/A ARGB2ABGR_DB(dd)
0N/A *(mlib_d64*)dst = dd;
0N/A }
0N/A } else {
0N/A vis_alignaddr(NULL, off);
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 2); dst += 2) {
0N/A pp0 = (mlib_f32*)(src - off) + (tmpsxloc >> shift);
0N/A pp1 = (mlib_f32*)(src - off) + ((tmpsxloc + sxinc) >> shift);
0N/A tmpsxloc += 2*sxinc;
0N/A dd0 = vis_freg_pair(pp0[0], pp0[1]);
0N/A dd1 = vis_freg_pair(pp1[0], pp1[1]);
0N/A dd0 = vis_faligndata(dd0, dd0);
0N/A dd1 = vis_faligndata(dd1, dd1);
0N/A ARGB2ABGR_FL2(dd, vis_read_hi(dd0), vis_read_hi(dd1))
0N/A *(mlib_d64*)dst = dd;
0N/A }
0N/A }
0N/A
0N/A if (dst < dst_end) {
0N/A mlib_u8 *pp = src + 4*(tmpsxloc >> shift);
0N/A *dst++ = (pp[0] << 24) | (pp[3] << 16) | (pp[2] << 8) | pp[1];
0N/A tmpsxloc += sxinc;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(IntArgbToFourByteAbgrScaleConvert)(SCALE_PARAMS)
0N/A{
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_s32 i, j;
0N/A mlib_s32 x;
0N/A
0N/A if (width < 16) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_s32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A
0N/A PTR_ADD(src, (syloc >> shift) * srcScan);
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A x = src[tmpsxloc >> shift];
0N/A tmpsxloc += sxinc;
0N/A dst[4*i ] = x >> 24;
0N/A dst[4*i + 1] = x;
0N/A dst[4*i + 2] = x >> 8;
0N/A dst[4*i + 3] = x >> 16;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A return;
0N/A }
0N/A
0N/A BMASK_FOR_ARGB
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_s32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_u8 *dst_end = dst + 4*width;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A mlib_d64 dd, d_old;
0N/A mlib_f32 *pp0, *pp1;
0N/A
0N/A PTR_ADD(src, (syloc >> shift) * srcScan);
0N/A
0N/A if (!((mlib_s32)dst & 3)) {
0N/A if ((mlib_s32)dst & 7) {
0N/A x = src[tmpsxloc >> shift];
0N/A tmpsxloc += sxinc;
0N/A dst[0] = x >> 24;
0N/A dst[1] = x;
0N/A dst[2] = x >> 8;
0N/A dst[3] = x >> 16;
0N/A dst += 4;
0N/A }
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 2*4); dst += 2*4) {
0N/A pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
0N/A pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
0N/A tmpsxloc += 2*sxinc;
0N/A dd = vis_freg_pair(pp0[0], pp1[0]);
0N/A ARGB2ABGR_DB(dd)
0N/A *(mlib_d64*)dst = dd;
0N/A }
0N/A } else {
0N/A mlib_d64 *dp;
0N/A
0N/A dp = vis_alignaddr(dst, 0);
0N/A dd = vis_faligndata(dp[0], dp[0]);
0N/A vis_alignaddrl(dst, 0);
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 2*4); dst += 2*4) {
0N/A d_old = dd;
0N/A pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
0N/A pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
0N/A tmpsxloc += 2*sxinc;
0N/A dd = vis_freg_pair(pp0[0], pp1[0]);
0N/A ARGB2ABGR_DB(dd)
0N/A *dp++ = vis_faligndata(d_old, dd);
0N/A }
0N/A
0N/A vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));
0N/A }
0N/A
0N/A if (dst < dst_end) {
0N/A x = src[tmpsxloc >> shift];
0N/A tmpsxloc += sxinc;
0N/A dst[0] = x >> 24;
0N/A dst[1] = x;
0N/A dst[2] = x >> 8;
0N/A dst[3] = x >> 16;
0N/A dst += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(IntRgbToFourByteAbgrScaleConvert)(SCALE_PARAMS)
0N/A{
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_s32 i, j;
0N/A mlib_s32 x;
0N/A mlib_d64 amask = vis_to_double_dup(0xFF000000);
0N/A
0N/A if (width < 16) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_s32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A
0N/A PTR_ADD(src, (syloc >> shift) * srcScan);
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A x = src[tmpsxloc >> shift];
0N/A tmpsxloc += sxinc;
0N/A dst[4*i ] = 0xFF;
0N/A dst[4*i + 1] = x;
0N/A dst[4*i + 2] = x >> 8;
0N/A dst[4*i + 3] = x >> 16;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A return;
0N/A }
0N/A
0N/A BMASK_FOR_ARGB
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_s32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_u8 *dst_end = dst + 4*width;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A mlib_d64 dd, d_old;
0N/A mlib_f32 *pp0, *pp1;
0N/A
0N/A PTR_ADD(src, (syloc >> shift) * srcScan);
0N/A
0N/A if (!((mlib_s32)dst & 3)) {
0N/A if ((mlib_s32)dst & 7) {
0N/A x = src[tmpsxloc >> shift];
0N/A tmpsxloc += sxinc;
0N/A dst[0] = 0xFF;
0N/A dst[1] = x;
0N/A dst[2] = x >> 8;
0N/A dst[3] = x >> 16;
0N/A dst += 4;
0N/A }
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 2*4); dst += 2*4) {
0N/A pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
0N/A pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
0N/A tmpsxloc += 2*sxinc;
0N/A dd = vis_freg_pair(pp0[0], pp1[0]);
0N/A RGB2ABGR_DB(dd)
0N/A *(mlib_d64*)dst = dd;
0N/A }
0N/A } else {
0N/A mlib_d64 *dp;
0N/A
0N/A dp = vis_alignaddr(dst, 0);
0N/A dd = vis_faligndata(dp[0], dp[0]);
0N/A vis_alignaddrl(dst, 0);
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 2*4); dst += 2*4) {
0N/A d_old = dd;
0N/A pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
0N/A pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
0N/A tmpsxloc += 2*sxinc;
0N/A dd = vis_freg_pair(pp0[0], pp1[0]);
0N/A RGB2ABGR_DB(dd)
0N/A *dp++ = vis_faligndata(d_old, dd);
0N/A }
0N/A
0N/A vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));
0N/A }
0N/A
0N/A if (dst < dst_end) {
0N/A x = src[tmpsxloc >> shift];
0N/A tmpsxloc += sxinc;
0N/A dst[0] = 0xFF;
0N/A dst[1] = x;
0N/A dst[2] = x >> 8;
0N/A dst[3] = x >> 16;
0N/A dst += 4;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(FourByteAbgrDrawGlyphListAA)(SurfaceDataRasInfo * pRasInfo,
0N/A ImageRef *glyphs,
0N/A jint totalGlyphs,
0N/A jint fgpixel, jint argbcolor,
0N/A jint clipLeft, jint clipTop,
0N/A jint clipRight, jint clipBottom,
0N/A NativePrimitive * pPrim,
0N/A CompositeInfo * pCompInfo)
0N/A{
0N/A mlib_d64 buff[BUFF_SIZE/2];
0N/A void *pbuff = buff;
0N/A mlib_s32 glyphCounter;
0N/A mlib_s32 scan = pRasInfo->scanStride;
0N/A mlib_u8 *dstBase;
0N/A mlib_s32 i, j;
0N/A mlib_d64 dmix0, dmix1, dd, d0, d1, e0, e1, fgpixel_d;
0N/A mlib_d64 done, done16, d_half;
0N/A mlib_s32 pix, mask;
0N/A mlib_f32 fgpixel_f, srcG_f;
0N/A mlib_s32 max_width = BUFF_SIZE;
0N/A
0N/A done = vis_to_double_dup(0x7fff7fff);
0N/A done16 = vis_to_double_dup(0x7fff);
0N/A d_half = vis_to_double_dup((1 << (16 + 6)) | (1 << 6));
0N/A
0N/A fgpixel_f = vis_ldfa_ASI_PL(&fgpixel);
0N/A fgpixel_d = vis_freg_pair(fgpixel_f, fgpixel_f);
0N/A srcG_f = vis_to_float(argbcolor);
0N/A ARGB2ABGR_FL(srcG_f)
0N/A
0N/A vis_write_gsr(0 << 3);
0N/A
0N/A for (glyphCounter = 0; glyphCounter < totalGlyphs; glyphCounter++) {
0N/A const jubyte *pixels;
0N/A unsigned int rowBytes;
0N/A int left, top;
0N/A int width, height;
0N/A int right, bottom;
0N/A
0N/A pixels = (const jubyte *) glyphs[glyphCounter].pixels;
0N/A
0N/A if (!pixels) continue;
0N/A
0N/A left = glyphs[glyphCounter].x;
0N/A top = glyphs[glyphCounter].y;
0N/A width = glyphs[glyphCounter].width;
0N/A height = glyphs[glyphCounter].height;
0N/A rowBytes = width;
0N/A right = left + width;
0N/A bottom = top + height;
0N/A if (left < clipLeft) {
0N/A pixels += clipLeft - left;
0N/A left = clipLeft;
0N/A }
0N/A if (top < clipTop) {
0N/A pixels += (clipTop - top) * rowBytes;
0N/A top = clipTop;
0N/A }
0N/A if (right > clipRight) {
0N/A right = clipRight;
0N/A }
0N/A if (bottom > clipBottom) {
0N/A bottom = clipBottom;
0N/A }
0N/A if (right <= left || bottom <= top) {
0N/A continue;
0N/A }
0N/A width = right - left;
0N/A height = bottom - top;
0N/A
0N/A dstBase = pRasInfo->rasBase;
0N/A PTR_ADD(dstBase, top*scan + 4*left);
0N/A
0N/A if (((mlib_s32)dstBase | scan) & 3) {
0N/A if (width > max_width) {
0N/A if (pbuff != buff) {
0N/A mlib_free(pbuff);
0N/A }
0N/A pbuff = mlib_malloc(width*sizeof(mlib_s32));
0N/A if (pbuff == NULL) return;
0N/A max_width = width;
0N/A }
0N/A }
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = (void*)pixels;
0N/A mlib_s32 *dst, *dst_end;
300N/A mlib_u8 *dst_start;
0N/A
0N/A if ((mlib_s32)dstBase & 3) {
0N/A COPY_NA(dstBase, pbuff, width*sizeof(mlib_s32));
0N/A dst = pbuff;
0N/A } else {
0N/A dst = (void*)dstBase;
0N/A }
300N/A dst_start = (void*)dst;
0N/A dst_end = dst + width;
0N/A
300N/A /* Need to reset the GSR from the values set by the
300N/A * convert call near the end of this loop.
300N/A */
300N/A vis_write_gsr(7 << 0);
300N/A
0N/A if ((mlib_s32)dst & 7) {
0N/A pix = *src++;
0N/A dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
0N/A dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);
0N/A *(mlib_f32*)dst = vis_fpack16(dd);
0N/A if (pix == 255) *(mlib_f32*)dst = vis_read_hi(fgpixel_d);
0N/A dst++;
0N/A }
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 2); dst += 2) {
0N/A dmix0 = vis_freg_pair(((mlib_f32 *)vis_mul8s_tbl)[src[0]],
0N/A ((mlib_f32 *)vis_mul8s_tbl)[src[1]]);
0N/A mask = vis_fcmplt32(dmix0, done16);
0N/A dmix1 = vis_fpsub16(done, dmix0);
0N/A src += 2;
0N/A
0N/A dd = *(mlib_d64*)dst;
0N/A d0 = vis_fmul8x16al(srcG_f, vis_read_hi(dmix0));
0N/A d1 = vis_fmul8x16al(srcG_f, vis_read_lo(dmix0));
0N/A e0 = vis_fmul8x16al(vis_read_hi(dd), vis_read_hi(dmix1));
0N/A e1 = vis_fmul8x16al(vis_read_lo(dd), vis_read_lo(dmix1));
0N/A d0 = vis_fpadd16(vis_fpadd16(d0, d_half), e0);
0N/A d1 = vis_fpadd16(vis_fpadd16(d1, d_half), e1);
0N/A dd = vis_fpack16_pair(d0, d1);
0N/A
0N/A *(mlib_d64*)dst = fgpixel_d;
0N/A vis_pst_32(dd, dst, mask);
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A pix = *src++;
0N/A dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
0N/A dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);
0N/A *(mlib_f32*)dst = vis_fpack16(dd);
0N/A if (pix == 255) *(mlib_f32*)dst = vis_read_hi(fgpixel_d);
0N/A dst++;
0N/A }
0N/A
300N/A ADD_SUFF(IntArgbPreToIntArgbConvert)(dst_start, dst_start,
300N/A width, 1,
300N/A pRasInfo, pRasInfo,
300N/A pPrim, pCompInfo);
300N/A
0N/A if ((mlib_s32)dstBase & 3) {
300N/A COPY_NA(dst_start, dstBase, width*sizeof(mlib_s32));
0N/A }
0N/A
0N/A PTR_ADD(dstBase, scan);
0N/A pixels += rowBytes;
0N/A }
0N/A }
0N/A
0N/A if (pbuff != buff) {
0N/A mlib_free(pbuff);
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/A#endif /* JAVA2D_NO_MLIB */