0N/A/*
2362N/A * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0N/A *
0N/A * This code is free software; you can redistribute it and/or modify it
0N/A * under the terms of the GNU General Public License version 2 only, as
2362N/A * published by the Free Software Foundation. Oracle designates this
0N/A * particular file as subject to the "Classpath" exception as provided
2362N/A * by Oracle in the LICENSE file that accompanied this code.
0N/A *
0N/A * This code is distributed in the hope that it will be useful, but WITHOUT
0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0N/A * version 2 for more details (a copy is included in the LICENSE file that
0N/A * accompanied this code).
0N/A *
0N/A * You should have received a copy of the GNU General Public License version
0N/A * 2 along with this work; if not, write to the Free Software Foundation,
0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0N/A *
2362N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2362N/A * or visit www.oracle.com if you need additional information or have any
2362N/A * questions.
0N/A */
0N/A
0N/A#if !defined(JAVA2D_NO_MLIB) || defined(MLIB_ADD_SUFF)
0N/A
0N/A#include <vis_proto.h>
0N/A#include "java2d_Mlib.h"
0N/A#include "vis_AlphaMacros.h"
0N/A
0N/A/***************************************************************/
0N/A
0N/Aextern mlib_d64 vis_d64_div_tbl[256];
0N/A
0N/A/***************************************************************/
0N/A
0N/A#define RGB2GRAY(r, g, b) \
0N/A (((77 * (r)) + (150 * (g)) + (29 * (b)) + 128) >> 8)
0N/A
0N/A/***************************************************************/
0N/A
0N/Astatic const mlib_s32 RGB_weight[] = {
0N/A 128*77,
0N/A 128*150,
0N/A 128*29,
0N/A (1 << (16 + 6)) | (1 << 6)
0N/A};
0N/A
0N/A/***************************************************************/
0N/A
0N/A#define RGB_VARS \
0N/A mlib_d64 r, g, b, ar, gb, s02, s13; \
0N/A mlib_f32 ff; \
0N/A mlib_f32 alpha = ((mlib_f32*)RGB_weight)[0]; \
0N/A mlib_f32 beta = ((mlib_f32*)RGB_weight)[1]; \
0N/A mlib_f32 gamma = ((mlib_f32*)RGB_weight)[2]; \
0N/A mlib_d64 d_half = vis_to_double_dup(RGB_weight[3]); \
0N/A \
0N/A vis_write_gsr((0 << 3) | 6)
0N/A
0N/A/***************************************************************/
0N/A
0N/A#define GRAY_U8(ff, r, g, b) \
0N/A{ \
0N/A mlib_d64 dr, dg, db; \
0N/A dr = vis_fmul8x16al(r, alpha); \
0N/A dg = vis_fmul8x16al(g, beta); \
0N/A db = vis_fmul8x16al(b, gamma); \
0N/A dr = vis_fpadd16(dr, dg); \
0N/A db = vis_fpadd16(db, d_half); \
0N/A dr = vis_fpadd16(dr, db); \
0N/A ff = vis_fpack16(dr); \
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/A#define GRAY_S16(dd, r, g, b) \
0N/A{ \
0N/A mlib_d64 dr, dg, db; \
0N/A dr = vis_fmul8x16al(r, alpha); \
0N/A dg = vis_fmul8x16al(g, beta); \
0N/A db = vis_fmul8x16al(b, gamma); \
0N/A dr = vis_fpadd16(dr, dg); \
0N/A db = vis_fpadd16(db, d_half); \
0N/A dd = vis_fpadd16(dr, db); \
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/A#define LOAD_BGR(ind) \
0N/A b = vis_faligndata(vis_ld_u8(src + (ind )), b); \
0N/A g = vis_faligndata(vis_ld_u8(src + (ind + 1)), g); \
0N/A r = vis_faligndata(vis_ld_u8(src + (ind + 2)), r)
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(IntArgbToByteGrayConvert)(BLIT_PARAMS)
0N/A{
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_u8 *dst_end;
0N/A mlib_s32 j;
0N/A RGB_VARS;
0N/A
0N/A if (dstScan == width && srcScan == 4*width) {
0N/A width *= height;
0N/A height = 1;
0N/A }
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_f32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A dst_end = dst + width;
0N/A
0N/A while (((mlib_s32)dst & 3) && dst < dst_end) {
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 4); dst += 4) {
0N/A s02 = vis_fpmerge(src[0], src[2]);
0N/A s13 = vis_fpmerge(src[1], src[3]);
0N/A ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13));
0N/A gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13));
0N/A GRAY_U8(ff, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb));
0N/A *(mlib_f32*)dst = ff;
0N/A src += 4;
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(ThreeByteBgrToByteGrayConvert)(BLIT_PARAMS)
0N/A{
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_u8 *dst_end;
0N/A mlib_s32 j;
0N/A RGB_VARS;
0N/A
0N/A vis_alignaddr(NULL, 7);
0N/A
0N/A if (dstScan == width && srcScan == 3*width) {
0N/A width *= height;
0N/A height = 1;
0N/A }
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A dst_end = dst + width;
0N/A
0N/A while (((mlib_s32)dst & 3) && dst < dst_end) {
0N/A b = vis_ld_u8(src);
0N/A g = vis_ld_u8(src + 1);
0N/A r = vis_ld_u8(src + 2);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A dst++;
0N/A src += 3;
0N/A }
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 4); dst += 4) {
0N/A LOAD_BGR(9);
0N/A LOAD_BGR(6);
0N/A LOAD_BGR(3);
0N/A LOAD_BGR(0);
0N/A GRAY_U8(ff, vis_read_hi(r), vis_read_hi(g), vis_read_hi(b));
0N/A *(mlib_f32*)dst = ff;
0N/A src += 3*4;
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A b = vis_ld_u8(src);
0N/A g = vis_ld_u8(src + 1);
0N/A r = vis_ld_u8(src + 2);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A dst++;
0N/A src += 3;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(IntArgbToByteGrayScaleConvert)(SCALE_PARAMS)
0N/A{
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_u8 *dst_end;
0N/A mlib_s32 i, j;
0N/A RGB_VARS;
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_f32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A
0N/A PTR_ADD(src, (syloc >> shift) * srcScan);
0N/A
0N/A dst_end = dst + width;
0N/A
0N/A while (((mlib_s32)dst & 3) && dst < dst_end) {
0N/A i = tmpsxloc >> shift;
0N/A tmpsxloc += sxinc;
0N/A r = vis_ld_u8((mlib_u8*)(src + i) + 1);
0N/A g = vis_ld_u8((mlib_u8*)(src + i) + 2);
0N/A b = vis_ld_u8((mlib_u8*)(src + i) + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A dst++;
0N/A }
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 4); dst += 4) {
0N/A s02 = vis_fpmerge(src[(tmpsxloc ) >> shift],
0N/A src[(tmpsxloc + 2*sxinc) >> shift]);
0N/A s13 = vis_fpmerge(src[(tmpsxloc + sxinc) >> shift],
0N/A src[(tmpsxloc + 3*sxinc) >> shift]);
0N/A tmpsxloc += 4*sxinc;
0N/A ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13));
0N/A gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13));
0N/A GRAY_U8(ff, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb));
0N/A *(mlib_f32*)dst = ff;
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A i = tmpsxloc >> shift;
0N/A tmpsxloc += sxinc;
0N/A r = vis_ld_u8((mlib_u8*)(src + i) + 1);
0N/A g = vis_ld_u8((mlib_u8*)(src + i) + 2);
0N/A b = vis_ld_u8((mlib_u8*)(src + i) + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A dst++;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(ThreeByteBgrToByteGrayScaleConvert)(SCALE_PARAMS)
0N/A{
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_u8 *dst_end;
0N/A mlib_s32 j, i0, i1, i2, i3;
0N/A RGB_VARS;
0N/A
0N/A vis_alignaddr(NULL, 7);
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A
0N/A PTR_ADD(src, (syloc >> shift) * srcScan);
0N/A
0N/A dst_end = dst + width;
0N/A
0N/A while (((mlib_s32)dst & 3) && dst < dst_end) {
0N/A i0 = 3*(tmpsxloc >> shift);
0N/A tmpsxloc += sxinc;
0N/A b = vis_ld_u8(src + i0);
0N/A g = vis_ld_u8(src + i0 + 1);
0N/A r = vis_ld_u8(src + i0 + 2);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A dst++;
0N/A }
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 4); dst += 4) {
0N/A i0 = 3*(tmpsxloc >> shift);
0N/A tmpsxloc += sxinc;
0N/A i1 = 3*(tmpsxloc >> shift);
0N/A tmpsxloc += sxinc;
0N/A i2 = 3*(tmpsxloc >> shift);
0N/A tmpsxloc += sxinc;
0N/A i3 = 3*(tmpsxloc >> shift);
0N/A tmpsxloc += sxinc;
0N/A LOAD_BGR(i3);
0N/A LOAD_BGR(i2);
0N/A LOAD_BGR(i1);
0N/A LOAD_BGR(i0);
0N/A GRAY_U8(ff, vis_read_hi(r), vis_read_hi(g), vis_read_hi(b));
0N/A *(mlib_f32*)dst = ff;
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A i0 = 3*(tmpsxloc >> shift);
0N/A tmpsxloc += sxinc;
0N/A b = vis_ld_u8(src + i0);
0N/A g = vis_ld_u8(src + i0 + 1);
0N/A r = vis_ld_u8(src + i0 + 2);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A dst++;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(IntArgbBmToByteGrayXparOver)(BLIT_PARAMS)
0N/A{
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_u8 *dst_end;
0N/A mlib_d64 dzero = vis_fzero();
0N/A mlib_f32 f0, f1;
0N/A mlib_s32 i, j, mask0, mask1;
0N/A RGB_VARS;
0N/A
0N/A if (width < 8) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A if (src[4*i]) {
0N/A dst[i] = RGB2GRAY(src[4*i + 1], src[4*i + 2], src[4*i + 3]);
0N/A }
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A return;
0N/A }
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_f32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A dst_end = dst + width;
0N/A
0N/A while (((mlib_s32)dst & 7) && dst < dst_end) {
0N/A if (*(mlib_u8*)src) {
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A }
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 8); dst += 8) {
0N/A s02 = vis_fpmerge(src[0], src[2]);
0N/A s13 = vis_fpmerge(src[1], src[3]);
0N/A src += 4;
0N/A ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13));
0N/A gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13));
0N/A mask0 = vis_fcmpne16(vis_fpmerge(vis_read_hi(ar), vis_read_hi(ar)),
0N/A dzero);
0N/A GRAY_U8(f0, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb));
0N/A
0N/A s02 = vis_fpmerge(src[0], src[2]);
0N/A s13 = vis_fpmerge(src[1], src[3]);
0N/A src += 4;
0N/A ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13));
0N/A gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13));
0N/A mask1 = vis_fcmpne16(vis_fpmerge(vis_read_hi(ar), vis_read_hi(ar)),
0N/A dzero);
0N/A GRAY_U8(f1, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb));
0N/A
0N/A vis_pst_8(vis_freg_pair(f0, f1), dst, (mask0 << 4) | mask1);
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A if (*(mlib_u8*)src) {
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A }
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(IntArgbBmToByteGrayXparBgCopy)(BCOPY_PARAMS)
0N/A{
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_u8 *dst_end;
0N/A mlib_d64 dzero = vis_fzero(), d_bgpixel;
0N/A mlib_f32 f0, f1;
0N/A mlib_s32 i, j, mask0, mask1;
0N/A RGB_VARS;
0N/A
0N/A if (width < 8) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A if (src[4*i]) {
0N/A dst[i] = RGB2GRAY(src[4*i + 1], src[4*i + 2], src[4*i + 3]);
0N/A } else {
0N/A dst[i] = bgpixel;
0N/A }
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A return;
0N/A }
0N/A
0N/A D64_FROM_U8x8(d_bgpixel, bgpixel);
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_f32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A dst_end = dst + width;
0N/A
0N/A while (((mlib_s32)dst & 7) && dst < dst_end) {
0N/A if (*(mlib_u8*)src) {
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A } else {
0N/A *dst = bgpixel;
0N/A }
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 8); dst += 8) {
0N/A s02 = vis_fpmerge(src[0], src[2]);
0N/A s13 = vis_fpmerge(src[1], src[3]);
0N/A src += 4;
0N/A ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13));
0N/A gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13));
0N/A mask0 = vis_fcmpne16(vis_fpmerge(vis_read_hi(ar), vis_read_hi(ar)),
0N/A dzero);
0N/A GRAY_U8(f0, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb));
0N/A
0N/A s02 = vis_fpmerge(src[0], src[2]);
0N/A s13 = vis_fpmerge(src[1], src[3]);
0N/A src += 4;
0N/A ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13));
0N/A gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13));
0N/A mask1 = vis_fcmpne16(vis_fpmerge(vis_read_hi(ar), vis_read_hi(ar)),
0N/A dzero);
0N/A GRAY_U8(f1, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb));
0N/A
0N/A *(mlib_d64*)dst = d_bgpixel;
0N/A vis_pst_8(vis_freg_pair(f0, f1), dst, (mask0 << 4) | mask1);
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A if (*(mlib_u8*)src) {
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A } else {
0N/A *dst = bgpixel;
0N/A }
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(IntArgbToByteGrayXorBlit)(BLIT_PARAMS)
0N/A{
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_u8 *dst_end;
0N/A mlib_d64 dd, d_xorpixel, d_alphamask, dzero = vis_fzero();
0N/A mlib_f32 f0, f1;
0N/A mlib_s32 i, j, mask0, mask1;
0N/A jint xorpixel = pCompInfo->details.xorPixel;
0N/A juint alphamask = pCompInfo->alphaMask;
0N/A RGB_VARS;
0N/A
0N/A if (width < 8) {
0N/A for (j = 0; j < height; j++) {
0N/A mlib_s32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_s32 srcpixel, r, g, b;
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A srcpixel = src[i];
0N/A if (srcpixel >= 0) continue;
0N/A b = (srcpixel) & 0xff;
0N/A g = (srcpixel >> 8) & 0xff;
0N/A r = (srcpixel >> 16) & 0xff;
0N/A srcpixel = (77*r + 150*g + 29*b + 128) / 256;
0N/A dst[i] ^= (((srcpixel) ^ (xorpixel)) & ~(alphamask));
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A return;
0N/A }
0N/A
0N/A D64_FROM_U8x8(d_xorpixel, xorpixel);
0N/A D64_FROM_U8x8(d_alphamask, alphamask);
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_f32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A dst_end = dst + width;
0N/A
0N/A while (((mlib_s32)dst & 7) && dst < dst_end) {
0N/A if ((*(mlib_u8*)src) & 0x80) {
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A dd = vis_fxor(D64_FROM_F32x2(ff), d_xorpixel);
0N/A dd = vis_fandnot(d_alphamask, dd);
0N/A vis_st_u8(vis_fxor(vis_ld_u8(dst), dd), dst);
0N/A }
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 8); dst += 8) {
0N/A s02 = vis_fpmerge(src[0], src[2]);
0N/A s13 = vis_fpmerge(src[1], src[3]);
0N/A src += 4;
0N/A ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13));
0N/A gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13));
0N/A mask0 = vis_fcmplt16(vis_fpmerge(vis_read_hi(ar), vis_read_hi(ar)),
0N/A dzero);
0N/A GRAY_U8(f0, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb));
0N/A
0N/A s02 = vis_fpmerge(src[0], src[2]);
0N/A s13 = vis_fpmerge(src[1], src[3]);
0N/A src += 4;
0N/A ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13));
0N/A gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13));
0N/A mask1 = vis_fcmplt16(vis_fpmerge(vis_read_hi(ar), vis_read_hi(ar)),
0N/A dzero);
0N/A GRAY_U8(f1, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb));
0N/A
0N/A dd = vis_freg_pair(f0, f1);
0N/A dd = vis_fandnot(d_alphamask, vis_fxor(dd, d_xorpixel));
0N/A vis_pst_8(vis_fxor(*(mlib_d64*)dst, dd), dst, (mask0 << 4) | mask1);
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A if ((*(mlib_u8*)src) & 0x80) {
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A dd = vis_fxor(D64_FROM_F32x2(ff), d_xorpixel);
0N/A dd = vis_fandnot(d_alphamask, dd);
0N/A vis_st_u8(vis_fxor(vis_ld_u8(dst), dd), dst);
0N/A }
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(IntArgbBmToByteGrayScaleXparOver)(SCALE_PARAMS)
0N/A{
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_u8 *dst_end;
0N/A mlib_d64 dzero = vis_fzero();
0N/A mlib_f32 f0, f1;
0N/A mlib_s32 i, j, mask0, mask1;
0N/A RGB_VARS;
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_f32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_s32 tmpsxloc = sxloc;
0N/A
0N/A PTR_ADD(src, (syloc >> shift) * srcScan);
0N/A
0N/A dst_end = dst + width;
0N/A
0N/A while (((mlib_s32)dst & 7) && dst < dst_end) {
0N/A i = tmpsxloc >> shift;
0N/A tmpsxloc += sxinc;
0N/A if (*(mlib_u8*)(src + i)) {
0N/A r = vis_ld_u8((mlib_u8*)(src + i) + 1);
0N/A g = vis_ld_u8((mlib_u8*)(src + i) + 2);
0N/A b = vis_ld_u8((mlib_u8*)(src + i) + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A }
0N/A dst++;
0N/A }
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 8); dst += 8) {
0N/A s02 = vis_fpmerge(src[(tmpsxloc ) >> shift],
0N/A src[(tmpsxloc + 2*sxinc) >> shift]);
0N/A s13 = vis_fpmerge(src[(tmpsxloc + sxinc) >> shift],
0N/A src[(tmpsxloc + 3*sxinc) >> shift]);
0N/A tmpsxloc += 4*sxinc;
0N/A ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13));
0N/A gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13));
0N/A mask0 = vis_fcmpne16(vis_fpmerge(vis_read_hi(ar), vis_read_hi(ar)),
0N/A dzero);
0N/A GRAY_U8(f0, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb));
0N/A
0N/A s02 = vis_fpmerge(src[(tmpsxloc ) >> shift],
0N/A src[(tmpsxloc + 2*sxinc) >> shift]);
0N/A s13 = vis_fpmerge(src[(tmpsxloc + sxinc) >> shift],
0N/A src[(tmpsxloc + 3*sxinc) >> shift]);
0N/A tmpsxloc += 4*sxinc;
0N/A ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13));
0N/A gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13));
0N/A mask1 = vis_fcmpne16(vis_fpmerge(vis_read_hi(ar), vis_read_hi(ar)),
0N/A dzero);
0N/A GRAY_U8(f1, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb));
0N/A
0N/A vis_pst_8(vis_freg_pair(f0, f1), dst, (mask0 << 4) | mask1);
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A i = tmpsxloc >> shift;
0N/A tmpsxloc += sxinc;
0N/A if (*(mlib_u8*)(src + i)) {
0N/A r = vis_ld_u8((mlib_u8*)(src + i) + 1);
0N/A g = vis_ld_u8((mlib_u8*)(src + i) + 2);
0N/A b = vis_ld_u8((mlib_u8*)(src + i) + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A }
0N/A dst++;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A syloc += syinc;
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/A#define TBL_MUL ((mlib_s16*)vis_mul8s_tbl + 1)
0N/A#define TBL_DIV ((mlib_u8*)vis_div8_tbl + 2)
0N/A
0N/Avoid ADD_SUFF(IntArgbToByteGraySrcOverMaskBlit)(MASKBLIT_PARAMS)
0N/A{
0N/A mlib_s32 extraA;
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_u8 *mul8_extra;
0N/A mlib_u8 *dst_end;
0N/A mlib_d64 srcAx4, dd, d0, d1;
0N/A mlib_d64 done = vis_to_double_dup(0x7fff7fff);
0N/A mlib_s32 j, srcA0, srcA1, srcA2, srcA3;
0N/A RGB_VARS;
0N/A
0N/A extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5);
0N/A mul8_extra = mul8table[extraA];
0N/A
0N/A if (pMask != NULL) {
0N/A pMask += maskOff;
0N/A
0N/A if (dstScan == width && srcScan == 4*width && maskScan == width) {
0N/A width *= height;
0N/A height = 1;
0N/A }
0N/A
0N/A maskScan -= width;
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_f32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A dst_end = dst + width;
0N/A
0N/A while (((mlib_s32)dst & 3) && dst < dst_end) {
0N/A srcA0 = mul8table[mul8_extra[*pMask++]][*(mlib_u8*)src];
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A d0 = vis_fpadd16(MUL8_VIS(ff, srcA0), d_half);
0N/A d1 = MUL8_VIS(vis_read_lo(vis_ld_u8(dst)), 255 - srcA0);
0N/A dd = vis_fpadd16(d0, d1);
0N/A vis_st_u8(D64_FROM_F32x2(vis_fpack16(dd)), dst);
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 4); dst += 4) {
0N/A srcA0 = mul8table[mul8_extra[*pMask++]][*(mlib_u8*)src];
0N/A srcA1 = mul8table[mul8_extra[*pMask++]][*(mlib_u8*)(src + 1)];
0N/A srcA2 = mul8table[mul8_extra[*pMask++]][*(mlib_u8*)(src + 2)];
0N/A srcA3 = mul8table[mul8_extra[*pMask++]][*(mlib_u8*)(src + 3)];
0N/A srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA3), srcAx4);
0N/A srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA2), srcAx4);
0N/A srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA1), srcAx4);
0N/A srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA0), srcAx4);
0N/A
0N/A s02 = vis_fpmerge(src[0], src[2]);
0N/A s13 = vis_fpmerge(src[1], src[3]);
0N/A ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13));
0N/A gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13));
0N/A GRAY_U8(ff, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb));
0N/A d0 = vis_fpadd16(vis_fmul8x16(ff, srcAx4), d_half);
0N/A d1 = vis_fmul8x16(*(mlib_f32*)dst, vis_fpsub16(done, srcAx4));
0N/A dd = vis_fpadd16(d0, d1);
0N/A *(mlib_f32*)dst = vis_fpack16(dd);
0N/A src += 4;
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A srcA0 = mul8table[mul8_extra[*pMask++]][*(mlib_u8*)src];
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A d0 = vis_fpadd16(MUL8_VIS(ff, srcA0), d_half);
0N/A d1 = MUL8_VIS(vis_read_lo(vis_ld_u8(dst)), 255 - srcA0);
0N/A dd = vis_fpadd16(d0, d1);
0N/A vis_st_u8(D64_FROM_F32x2(vis_fpack16(dd)), dst);
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A PTR_ADD(pMask, maskScan);
0N/A }
0N/A } else {
0N/A
0N/A if (dstScan == width && srcScan == 4*width) {
0N/A width *= height;
0N/A height = 1;
0N/A }
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_f32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A dst_end = dst + width;
0N/A
0N/A while (((mlib_s32)dst & 3) && dst < dst_end) {
0N/A srcA0 = mul8_extra[*(mlib_u8*)src];
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A d0 = vis_fpadd16(MUL8_VIS(ff, srcA0), d_half);
0N/A d1 = MUL8_VIS(vis_read_lo(vis_ld_u8(dst)), 255 - srcA0);
0N/A dd = vis_fpadd16(d0, d1);
0N/A vis_st_u8(D64_FROM_F32x2(vis_fpack16(dd)), dst);
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 4); dst += 4) {
0N/A srcA0 = mul8_extra[*(mlib_u8*)src];
0N/A srcA1 = mul8_extra[*(mlib_u8*)(src + 1)];
0N/A srcA2 = mul8_extra[*(mlib_u8*)(src + 2)];
0N/A srcA3 = mul8_extra[*(mlib_u8*)(src + 3)];
0N/A srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA3), srcAx4);
0N/A srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA2), srcAx4);
0N/A srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA1), srcAx4);
0N/A srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA0), srcAx4);
0N/A
0N/A s02 = vis_fpmerge(src[0], src[2]);
0N/A s13 = vis_fpmerge(src[1], src[3]);
0N/A ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13));
0N/A gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13));
0N/A GRAY_U8(ff, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb));
0N/A d0 = vis_fpadd16(vis_fmul8x16(ff, srcAx4), d_half);
0N/A d1 = vis_fmul8x16(*(mlib_f32*)dst, vis_fpsub16(done, srcAx4));
0N/A dd = vis_fpadd16(d0, d1);
0N/A *(mlib_f32*)dst = vis_fpack16(dd);
0N/A src += 4;
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A srcA0 = mul8_extra[*(mlib_u8*)src];
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A d0 = vis_fpadd16(MUL8_VIS(ff, srcA0), d_half);
0N/A d1 = MUL8_VIS(vis_read_lo(vis_ld_u8(dst)), 255 - srcA0);
0N/A dd = vis_fpadd16(d0, d1);
0N/A vis_st_u8(D64_FROM_F32x2(vis_fpack16(dd)), dst);
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/A#define GET_COEF(i) \
0N/A pathA = pMask[i]; \
0N/A srcA = *(mlib_u8*)(src + i); \
0N/A srcA = mul8table[extraA][srcA]; \
0N/A dstF = ((((srcA) & DstOpAnd) ^ DstOpXor) + DstOpAdd); \
0N/A srcF = mul8table[pathA][srcFbase]; \
0N/A dstA = 0xff - pathA + mul8table[pathA][dstF]; \
0N/A srcA = mul8table[srcF][srcA]; \
0N/A resA = srcA + dstA; \
0N/A srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA), srcAx4); \
0N/A divAx4 = vis_faligndata(vis_ld_u16(TBL_DIV + 8*resA), divAx4)
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(IntArgbToByteGrayAlphaMaskBlit)(MASKBLIT_PARAMS)
0N/A{
0N/A mlib_s32 extraA;
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_u8 *dst_end;
0N/A mlib_d64 srcAx4, dstAx4, divAx4, dd, ds;
0N/A mlib_d64 done = vis_to_double_dup(0x01000100);
0N/A mlib_f32 fscale = vis_to_float(0x02020202);
0N/A mlib_s32 j;
0N/A mlib_s32 SrcOpAnd, SrcOpXor, SrcOpAdd;
0N/A mlib_s32 DstOpAnd, DstOpXor, DstOpAdd;
0N/A mlib_s32 pathA, srcFbase, resA, resG, srcF, dstF, srcA, dstA;
0N/A
0N/A RGB_VARS;
0N/A
0N/A SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval;
0N/A SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval;
0N/A SrcOpAdd =
0N/A (jint) (AlphaRules[pCompInfo->rule].srcOps).addval - SrcOpXor;
0N/A
0N/A DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval;
0N/A DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval;
0N/A DstOpAdd =
0N/A (jint) (AlphaRules[pCompInfo->rule].dstOps).addval - DstOpXor;
0N/A
0N/A extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5);
0N/A
0N/A srcFbase = ((((0xff) & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd);
0N/A
0N/A vis_write_gsr((7 << 3) | 6);
0N/A
0N/A if (pMask != NULL) {
0N/A pMask += maskOff;
0N/A
0N/A if (dstScan == width && srcScan == 4*width && maskScan == width) {
0N/A width *= height;
0N/A height = 1;
0N/A }
0N/A
0N/A maskScan -= width;
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_f32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A dst_end = dst + width;
0N/A
0N/A while (((mlib_s32)dst & 3) && dst < dst_end) {
0N/A pathA = *pMask++;
0N/A srcA = *(mlib_u8*)src;
0N/A srcA = mul8table[extraA][srcA];
0N/A dstF = ((((srcA) & DstOpAnd) ^ DstOpXor) + DstOpAdd);
0N/A srcF = mul8table[pathA][srcFbase];
0N/A dstA = 0xff - pathA + mul8table[pathA][dstF];
0N/A srcA = mul8table[srcF][srcA];
0N/A resA = srcA + dstA;
0N/A
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_S16(dd, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A dd = vis_fmul8x16(fscale, dd);
0N/A ff = vis_fpack16(dd);
0N/A
0N/A dd = vis_freg_pair(vis_fzeros(),
0N/A ((mlib_f32*)vis_mul8s_tbl)[dstA]);
0N/A DIV_ALPHA(dd, resA);
0N/A ds = vis_fpsub16(done, dd);
0N/A dd = vis_fmul8x16(vis_read_lo(vis_ld_u8(dst)), dd);
0N/A ds = vis_fmul8x16(ff, ds);
0N/A dd = vis_fpadd16(dd, ds);
0N/A ff = vis_fpack16(dd);
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 4); dst += 4) {
0N/A GET_COEF(3);
0N/A GET_COEF(2);
0N/A GET_COEF(1);
0N/A GET_COEF(0);
0N/A pMask += 4;
0N/A srcAx4 = FMUL_16x16(srcAx4, divAx4);
0N/A dstAx4 = vis_fpsub16(done, srcAx4);
0N/A
0N/A s02 = vis_fpmerge(src[0], src[2]);
0N/A s13 = vis_fpmerge(src[1], src[3]);
0N/A ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13));
0N/A gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13));
0N/A GRAY_S16(dd, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb));
0N/A dd = vis_fmul8x16(fscale, dd);
0N/A ff = vis_fpack16(dd);
0N/A
0N/A dd = vis_fmul8x16(*(mlib_f32*)dst, dstAx4);
0N/A ds = vis_fmul8x16(ff, srcAx4);
0N/A dd = vis_fpadd16(dd, ds);
0N/A *(mlib_f32*)dst = vis_fpack16(dd);
0N/A
0N/A src += 4;
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A pathA = *pMask++;
0N/A srcA = *(mlib_u8*)src;
0N/A srcA = mul8table[extraA][srcA];
0N/A dstF = ((((srcA) & DstOpAnd) ^ DstOpXor) + DstOpAdd);
0N/A srcF = mul8table[pathA][srcFbase];
0N/A dstA = 0xff - pathA + mul8table[pathA][dstF];
0N/A srcA = mul8table[srcF][srcA];
0N/A resA = srcA + dstA;
0N/A
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_S16(dd, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A dd = vis_fmul8x16(fscale, dd);
0N/A ff = vis_fpack16(dd);
0N/A
0N/A dd = vis_freg_pair(vis_fzeros(),
0N/A ((mlib_f32*)vis_mul8s_tbl)[dstA]);
0N/A DIV_ALPHA(dd, resA);
0N/A ds = vis_fpsub16(done, dd);
0N/A dd = vis_fmul8x16(vis_read_lo(vis_ld_u8(dst)), dd);
0N/A ds = vis_fmul8x16(ff, ds);
0N/A dd = vis_fpadd16(dd, ds);
0N/A ff = vis_fpack16(dd);
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A PTR_ADD(pMask, maskScan);
0N/A }
0N/A } else {
0N/A
0N/A if (dstScan == width && srcScan == 4*width) {
0N/A width *= height;
0N/A height = 1;
0N/A }
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_f32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A dst_end = dst + width;
0N/A
0N/A while (dst < dst_end) {
0N/A srcA = *(mlib_u8*)src;
0N/A srcA = mul8table[extraA][srcA];
0N/A dstA = ((((srcA) & DstOpAnd) ^ DstOpXor) + DstOpAdd);
0N/A srcA = mul8table[srcFbase][srcA];
0N/A resA = srcA + dstA;
0N/A
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_S16(dd, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A dd = vis_fmul8x16(fscale, dd);
0N/A ff = vis_fpack16(dd);
0N/A
0N/A resG = mul8table[dstA][*dst] +
0N/A mul8table[srcA][((mlib_u8*)&ff)[3]];
0N/A *dst = div8table[resA][resG];
0N/A
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/Avoid ADD_SUFF(IntRgbToByteGrayAlphaMaskBlit)(MASKBLIT_PARAMS)
0N/A{
0N/A mlib_s32 extraA;
0N/A mlib_s32 dstScan = pDstInfo->scanStride;
0N/A mlib_s32 srcScan = pSrcInfo->scanStride;
0N/A mlib_u8 *dst_end;
0N/A mlib_d64 srcA_d, dstA_d, dd, d0, d1;
0N/A mlib_s32 i, j, srcG;
0N/A mlib_s32 SrcOpAnd, SrcOpXor, SrcOpAdd;
0N/A mlib_s32 DstOpAnd, DstOpXor, DstOpAdd;
0N/A mlib_s32 pathA, srcFbase, dstFbase, resA, resG, srcA, dstA;
0N/A
0N/A RGB_VARS;
0N/A
0N/A SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval;
0N/A SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval;
0N/A SrcOpAdd =
0N/A (jint) (AlphaRules[pCompInfo->rule].srcOps).addval - SrcOpXor;
0N/A
0N/A DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval;
0N/A DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval;
0N/A DstOpAdd =
0N/A (jint) (AlphaRules[pCompInfo->rule].dstOps).addval - DstOpXor;
0N/A
0N/A extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5);
0N/A
0N/A srcFbase = ((((0xff) & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd);
0N/A dstFbase = (((extraA & DstOpAnd) ^ DstOpXor) + DstOpAdd);
0N/A
0N/A srcFbase = mul8table[srcFbase][extraA];
0N/A
0N/A if (width < 16) {
0N/A if (pMask != NULL) {
0N/A pMask += maskOff;
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_u8 *src = srcBase;
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A pathA = pMask[i];
0N/A dstA = 0xff - pathA + mul8table[dstFbase][pathA];
0N/A srcA = mul8table[srcFbase][pathA];
0N/A resA = srcA + dstA;
0N/A
0N/A srcG = RGB2GRAY(src[4*i + 1], src[4*i + 2], src[4*i + 3]);
0N/A resG = mul8table[dstA][dst[i]] + mul8table[srcA][srcG];
0N/A resG = div8table[resA][resG];
0N/A dst[i] = resG;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A PTR_ADD(pMask, maskScan);
0N/A }
0N/A } else {
0N/A dstA = dstFbase;
0N/A srcA = srcFbase;
0N/A resA = srcA + dstA;
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_u8 *dst = dstBase;
0N/A mlib_u8 *src = srcBase;
0N/A
0N/A for (i = 0; i < width; i++) {
0N/A srcG = RGB2GRAY(src[4*i + 1], src[4*i + 2], src[4*i + 3]);
0N/A resG = mul8table[dstA][dst[i]] + mul8table[srcA][srcG];
0N/A resG = div8table[resA][resG];
0N/A dst[i] = resG;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A }
0N/A return;
0N/A }
0N/A
0N/A if (pMask != NULL) {
0N/A mlib_s32 srcA_buff[256];
0N/A mlib_d64 dscale = (mlib_d64)(1 << 15)*(1 << 16), ddiv;
0N/A mlib_d64 d_one = vis_to_double_dup(0x7FFF7FFF);
0N/A
0N/A srcA_buff[0] = 0;
0N/A#pragma pipeloop(0)
0N/A for (pathA = 1; pathA < 256; pathA++) {
0N/A dstA = 0xff - pathA + mul8table[dstFbase][pathA];
0N/A srcA = mul8table[srcFbase][pathA];
0N/A resA = dstA + srcA;
0N/A ddiv = dscale*vis_d64_div_tbl[resA];
0N/A srcA_buff[pathA] = srcA*ddiv + (1 << 15);
0N/A }
0N/A
0N/A pMask += maskOff;
0N/A maskScan -= width;
0N/A
0N/A if (dstScan == width && srcScan == 4*width && maskScan == width) {
0N/A width *= height;
0N/A height = 1;
0N/A }
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_f32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A dst_end = dst + width;
0N/A
0N/A while (((mlib_s32)dst & 3) && dst < dst_end) {
0N/A pathA = *pMask++;
0N/A srcA_d = vis_ld_u16(srcA_buff + pathA);
0N/A dstA_d = vis_fpsub16(d_one, srcA_d);
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A d0 = vis_fpadd16(vis_fmul8x16(ff, srcA_d), d_half);
0N/A d1 = vis_fmul8x16(vis_read_lo(vis_ld_u8(dst)), dstA_d);
0N/A dd = vis_fpadd16(d0, d1);
0N/A vis_st_u8(D64_FROM_F32x2(vis_fpack16(dd)), dst);
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 4); dst += 4) {
0N/A LOAD_NEXT_U16(srcA_d, srcA_buff + pMask[3]);
0N/A LOAD_NEXT_U16(srcA_d, srcA_buff + pMask[2]);
0N/A LOAD_NEXT_U16(srcA_d, srcA_buff + pMask[1]);
0N/A LOAD_NEXT_U16(srcA_d, srcA_buff + pMask[0]);
0N/A dstA_d = vis_fpsub16(d_one, srcA_d);
0N/A pMask += 4;
0N/A
0N/A s02 = vis_fpmerge(src[0], src[2]);
0N/A s13 = vis_fpmerge(src[1], src[3]);
0N/A ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13));
0N/A gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13));
0N/A GRAY_U8(ff, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb));
0N/A dd = vis_fpadd16(vis_fmul8x16(ff, srcA_d), d_half);
0N/A dd = vis_fpadd16(vis_fmul8x16(*(mlib_f32*)dst, dstA_d), dd);
0N/A *(mlib_f32*)dst = vis_fpack16(dd);
0N/A src += 4;
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A pathA = *pMask++;
0N/A srcA_d = vis_ld_u16(srcA_buff + pathA);
0N/A dstA_d = vis_fpsub16(d_one, srcA_d);
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A d0 = vis_fpadd16(vis_fmul8x16(ff, srcA_d), d_half);
0N/A d1 = vis_fmul8x16(vis_read_lo(vis_ld_u8(dst)), dstA_d);
0N/A dd = vis_fpadd16(d0, d1);
0N/A ff = vis_fpack16(dd);
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A PTR_ADD(pMask, maskScan);
0N/A }
0N/A } else {
0N/A mlib_d64 dscale = (mlib_d64)(1 << 15)*(1 << 16), ddiv;
0N/A mlib_d64 d_one = vis_to_double_dup(0x7FFF7FFF);
0N/A
0N/A dstA = dstFbase;
0N/A srcA = srcFbase;
0N/A resA = dstA + srcA;
0N/A ddiv = dscale*vis_d64_div_tbl[resA];
0N/A srcA = (mlib_s32)(srcA*ddiv + (1 << 15)) >> 16;
0N/A srcA_d = vis_to_double_dup((srcA << 16) | srcA);
0N/A dstA_d = vis_fpsub16(d_one, srcA_d);
0N/A
0N/A if (dstScan == width && srcScan == 4*width) {
0N/A width *= height;
0N/A height = 1;
0N/A }
0N/A
0N/A for (j = 0; j < height; j++) {
0N/A mlib_f32 *src = srcBase;
0N/A mlib_u8 *dst = dstBase;
0N/A
0N/A dst_end = dst + width;
0N/A
0N/A while (((mlib_s32)dst & 3) && dst < dst_end) {
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A d0 = vis_fpadd16(vis_fmul8x16(ff, srcA_d), d_half);
0N/A d1 = vis_fmul8x16(vis_read_lo(vis_ld_u8(dst)), dstA_d);
0N/A dd = vis_fpadd16(d0, d1);
0N/A vis_st_u8(D64_FROM_F32x2(vis_fpack16(dd)), dst);
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A#pragma pipeloop(0)
0N/A for (; dst <= (dst_end - 4); dst += 4) {
0N/A s02 = vis_fpmerge(src[0], src[2]);
0N/A s13 = vis_fpmerge(src[1], src[3]);
0N/A ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13));
0N/A gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13));
0N/A GRAY_U8(ff, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb));
0N/A dd = vis_fpadd16(vis_fmul8x16(ff, srcA_d), d_half);
0N/A dd = vis_fpadd16(vis_fmul8x16(*(mlib_f32*)dst, dstA_d), dd);
0N/A *(mlib_f32*)dst = vis_fpack16(dd);
0N/A src += 4;
0N/A }
0N/A
0N/A while (dst < dst_end) {
0N/A r = vis_ld_u8((mlib_u8*)src + 1);
0N/A g = vis_ld_u8((mlib_u8*)src + 2);
0N/A b = vis_ld_u8((mlib_u8*)src + 3);
0N/A GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b));
0N/A d0 = vis_fpadd16(vis_fmul8x16(ff, srcA_d), d_half);
0N/A d1 = vis_fmul8x16(vis_read_lo(vis_ld_u8(dst)), dstA_d);
0N/A dd = vis_fpadd16(d0, d1);
0N/A ff = vis_fpack16(dd);
0N/A vis_st_u8(D64_FROM_F32x2(ff), dst);
0N/A dst++;
0N/A src++;
0N/A }
0N/A
0N/A PTR_ADD(dstBase, dstScan);
0N/A PTR_ADD(srcBase, srcScan);
0N/A }
0N/A }
0N/A}
0N/A
0N/A/***************************************************************/
0N/A
0N/A#endif