nr-filter-gaussian.cpp revision 208e5a33acc4a8ad9d8c0488f047c260346f1258
096dfde2c1cb7bb1e0a4b76e21f2abf548b900d5Campbell Barton * Gaussian blur renderer
096dfde2c1cb7bb1e0a4b76e21f2abf548b900d5Campbell Barton * Niko Kiirala <niko@kiirala.com>
096dfde2c1cb7bb1e0a4b76e21f2abf548b900d5Campbell Barton * Jasper van de Gronde <th.v.d.gronde@hccnet.nl>
096dfde2c1cb7bb1e0a4b76e21f2abf548b900d5Campbell Barton * Copyright (C) 2006 authors
096dfde2c1cb7bb1e0a4b76e21f2abf548b900d5Campbell Barton * Released under GNU GPL, read the file 'COPYING' for more information
874cad03a8450ed3464f6dfae2eb16108bec5bbdCampbell Barton// IIR filtering method based on:
874cad03a8450ed3464f6dfae2eb16108bec5bbdCampbell Barton// L.J. van Vliet, I.T. Young, and P.W. Verbeek, Recursive Gaussian Derivative Filters,
874cad03a8450ed3464f6dfae2eb16108bec5bbdCampbell Barton// in: A.K. Jain, S. Venkatesh, B.C. Lovell (eds.),
874cad03a8450ed3464f6dfae2eb16108bec5bbdCampbell Barton// ICPR'98, Proc. 14th Int. Conference on Pattern Recognition (Brisbane, Aug. 16-20),
874cad03a8450ed3464f6dfae2eb16108bec5bbdCampbell Barton// IEEE Computer Society Press, Los Alamitos, 1998, 509-514.
874cad03a8450ed3464f6dfae2eb16108bec5bbdCampbell Barton// Using the backwards-pass initialization procedure from:
874cad03a8450ed3464f6dfae2eb16108bec5bbdCampbell Barton// Boundary Conditions for Young - van Vliet Recursive Filtering
874cad03a8450ed3464f6dfae2eb16108bec5bbdCampbell Barton// Bill Triggs, Michael Sdika
ccee8a9aa5c7646c5e05f860a0e8221151551c51verbalshadow// IEEE Transactions on Signal Processing, Volume 54, Number 5 - may 2006
874cad03a8450ed3464f6dfae2eb16108bec5bbdCampbell Barton// Number of IIR filter coefficients used. Currently only 3 is supported.
// Type used for IIR filter coefficients (can be 10.21 signed fixed point, see Anisotropic Gaussian Filtering Using Fixed Point Arithmetic, Christoph H. Lampert & Oliver Wirjadi, 2006)
typedef double IIRValue;
// Type used for FIR filter coefficients (can be 16.16 unsigned fixed point, should have 8 or more bits in the fractional part, the integer part should be capable of storing approximately 20*255)
template<typename T> static inline T sqr(T const& v) { return v*v; }
template<typename T> static inline T clip(T const& v, T const& a, T const& b) {
static inline Tt clip_round_cast(Ts const& v, Tt const minval=std::numeric_limits<Tt>::min(), Tt const maxval=std::numeric_limits<Tt>::max()) {
namespace NR {
return new FilterGaussian();
double k[scr_len+1]; // This is only called for small kernel sizes (above approximately 10 coefficients the IIR filter is used)
double sum = 0;
for ( int i = scr_len; i >= 0 ; i-- ) {
if ( i > 0 ) sum += k[i];
// the sum of the complete kernel is twice as large (plus the center element which we skipped above to prevent counting it twice)
double ksum = 0;
int stepsize_l2;
switch (quality) {
case BLUR_QUALITY_WORST:
case BLUR_QUALITY_WORSE:
case BLUR_QUALITY_BETTER:
case BLUR_QUALITY_BEST:
case BLUR_QUALITY_NORMAL:
return stepsize_l2;
double qbeg = 1; // Don't go lower than sigma==2 (we'd probably want a normal convolution in that case anyway)
do { // Binary search for right q (a linear interpolation scheme is suggested, but this should work fine as well)
qbeg = q;
qend = q;
static void calcTriggsSdikaM(double const b[N], double M[N*N]) {
template<unsigned int SIZE>
static void calcTriggsSdikaInitialization(double const M[N*N], IIRValue const uold[N][SIZE], IIRValue const uplus[SIZE], IIRValue const vplus[SIZE], IIRValue const alpha, IIRValue vold[N][SIZE]) {
for(unsigned int c=0; c<SIZE; c++) {
double uminp[N];
double voldf = 0;
// Properly takes care of the scaling coefficient alpha and vplus (which is already appropriately scaled)
// This was arrived at by starting from a version of the blur filter that ignored the scaling coefficient
// (and scaled the final output by alpha^2) and then gradually reintroducing the scaling coefficient.
for(unsigned int c=0; c<PC; c++) u[0][c] *= b[0];
for(unsigned int c=0; c<PC; c++) u[0][c] += u[i][c]*b[i];
if ( PREMULTIPLIED_ALPHA ) {
for(unsigned int c=0; c<PC-1; c++) dstimg[c] = clip_round_cast<PT>(v[0][c], std::numeric_limits<PT>::min(), dstimg[PC-1]);
while(c1-->0) {
for(unsigned int c=0; c<PC; c++) v[0][c] *= b[0];
for(unsigned int c=0; c<PC; c++) v[0][c] += v[i][c]*b[i];
if ( PREMULTIPLIED_ALPHA ) {
for(unsigned int c=0; c<PC-1; c++) dstimg[c] = clip_round_cast<PT>(v[0][c], std::numeric_limits<PT>::min(), dstimg[PC-1]);
int different_count = 0;
for ( int i = 0 ; i <= scr_len ; i++ ) {
pos++;
upsample(PT *const dst, int const dstr1, int const dstr2, unsigned int const dn1, unsigned int const dn2,
PT const *const src, int const sstr1, int const sstr2, unsigned int const sn1, unsigned int const sn2,
assert(((sn1-1)<<step1_l2)>=dn1 && ((sn2-1)<<step2_l2)>=dn2); // The last pixel of the source image should fall outside the destination image
int const width = resampling ? static_cast<int>(ceil(static_cast<double>(width_org)/x_step))+1 : width_org;
int const height = resampling ? static_cast<int>(ceil(static_cast<double>(height_org)/y_step))+1 : height_org;
delete out;
if ( resampling ) {
downsample<unsigned char,1>(NR_PIXBLOCK_PX(out), 1, out->rs, width, height, NR_PIXBLOCK_PX(in), 1, in->rs, width_org, height_org, x_step_l2, y_step_l2);
downsample<unsigned char,3>(NR_PIXBLOCK_PX(out), 3, out->rs, width, height, NR_PIXBLOCK_PX(in), 3, in->rs, width_org, height_org, x_step_l2, y_step_l2);
downsample<unsigned char,4>(NR_PIXBLOCK_PX(out), 4, out->rs, width, height, NR_PIXBLOCK_PX(in), 4, in->rs, width_org, height_org, x_step_l2, y_step_l2);
downsample<unsigned char,4>(NR_PIXBLOCK_PX(out), 4, out->rs, width, height, NR_PIXBLOCK_PX(in), 4, in->rs, width_org, height_org, x_step_l2, y_step_l2);
assert(false);
if (use_IIR_x) {
for(size_t i=0; i<N; i++) {
filter2D_IIR<unsigned char,1,false>(NR_PIXBLOCK_PX(out), 1, out->rs, NR_PIXBLOCK_PX(ssin), 1, ssin->rs, width, height, b, M, tmpdata);
filter2D_IIR<unsigned char,3,false>(NR_PIXBLOCK_PX(out), 3, out->rs, NR_PIXBLOCK_PX(ssin), 3, ssin->rs, width, height, b, M, tmpdata);
filter2D_IIR<unsigned char,4,false>(NR_PIXBLOCK_PX(out), 4, out->rs, NR_PIXBLOCK_PX(ssin), 4, ssin->rs, width, height, b, M, tmpdata);
filter2D_IIR<unsigned char,4,true >(NR_PIXBLOCK_PX(out), 4, out->rs, NR_PIXBLOCK_PX(ssin), 4, ssin->rs, width, height, b, M, tmpdata);
assert(false);
filter2D_FIR<unsigned char,1>(NR_PIXBLOCK_PX(out), 1, out->rs, NR_PIXBLOCK_PX(ssin), 1, ssin->rs, width, height, kernel, scr_len_x);
filter2D_FIR<unsigned char,3>(NR_PIXBLOCK_PX(out), 3, out->rs, NR_PIXBLOCK_PX(ssin), 3, ssin->rs, width, height, kernel, scr_len_x);
filter2D_FIR<unsigned char,4>(NR_PIXBLOCK_PX(out), 4, out->rs, NR_PIXBLOCK_PX(ssin), 4, ssin->rs, width, height, kernel, scr_len_x);
filter2D_FIR<unsigned char,4>(NR_PIXBLOCK_PX(out), 4, out->rs, NR_PIXBLOCK_PX(ssin), 4, ssin->rs, width, height, kernel, scr_len_x);
assert(false);
if (use_IIR_y) {
for(size_t i=0; i<N; i++) {
filter2D_IIR<unsigned char,1,false>(NR_PIXBLOCK_PX(out), out->rs, 1, NR_PIXBLOCK_PX(out), out->rs, 1, height, width, b, M, tmpdata);
filter2D_IIR<unsigned char,3,false>(NR_PIXBLOCK_PX(out), out->rs, 3, NR_PIXBLOCK_PX(out), out->rs, 3, height, width, b, M, tmpdata);
filter2D_IIR<unsigned char,4,false>(NR_PIXBLOCK_PX(out), out->rs, 4, NR_PIXBLOCK_PX(out), out->rs, 4, height, width, b, M, tmpdata);
filter2D_IIR<unsigned char,4,true >(NR_PIXBLOCK_PX(out), out->rs, 4, NR_PIXBLOCK_PX(out), out->rs, 4, height, width, b, M, tmpdata);
assert(false);
filter2D_FIR<unsigned char,1>(NR_PIXBLOCK_PX(out), out->rs, 1, NR_PIXBLOCK_PX(out), out->rs, 1, height, width, kernel, scr_len_y);
filter2D_FIR<unsigned char,3>(NR_PIXBLOCK_PX(out), out->rs, 3, NR_PIXBLOCK_PX(out), out->rs, 3, height, width, kernel, scr_len_y);
filter2D_FIR<unsigned char,4>(NR_PIXBLOCK_PX(out), out->rs, 4, NR_PIXBLOCK_PX(out), out->rs, 4, height, width, kernel, scr_len_y);
filter2D_FIR<unsigned char,4>(NR_PIXBLOCK_PX(out), out->rs, 4, NR_PIXBLOCK_PX(out), out->rs, 4, height, width, kernel, scr_len_y);
assert(false);
if ( !resampling ) {
delete out;
upsample<unsigned char,1>(NR_PIXBLOCK_PX(finalout), 1, finalout->rs, width_org, height_org, NR_PIXBLOCK_PX(out), 1, out->rs, width, height, x_step_l2, y_step_l2);
upsample<unsigned char,3>(NR_PIXBLOCK_PX(finalout), 3, finalout->rs, width_org, height_org, NR_PIXBLOCK_PX(out), 3, out->rs, width, height, x_step_l2, y_step_l2);
upsample<unsigned char,4>(NR_PIXBLOCK_PX(finalout), 4, finalout->rs, width_org, height_org, NR_PIXBLOCK_PX(out), 4, out->rs, width, height, x_step_l2, y_step_l2);
upsample<unsigned char,4>(NR_PIXBLOCK_PX(finalout), 4, finalout->rs, width_org, height_org, NR_PIXBLOCK_PX(out), 4, out->rs, width, height, x_step_l2, y_step_l2);
assert(false);
delete out;
return TRAIT_PARALLER;
_deviation_x = x;
_deviation_y = y;