0% found this document useful (0 votes)
103 views37 pages

Convolution

This C++ code defines functions for performing 1D and 2D convolution on input data with a kernel. It includes functions for unsigned char, unsigned short, signed integer, and float data types. The 2D convolution functions use pointers to efficiently iterate through input and kernel data without copying, compute the valid boundary region for each output element, and accumulate the results into the output array.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
103 views37 pages

Convolution

This C++ code defines functions for performing 1D and 2D convolution on input data with a kernel. It includes functions for unsigned char, unsigned short, signed integer, and float data types. The 2D convolution functions use pointers to efficiently iterate through input and kernel data without copying, compute the valid boundary region for each output element, and accumulate the results into the output array.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 37

#include <cmath>

#include "convolution.h"
///////////////////////////////////////////////////////////////////////////////
// 1D convolution
// We assume input and kernel signal start from t=0.
///////////////////////////////////////////////////////////////////////////////
bool convolve1D(float* in, float* out, int dataSize, float* kernel, int kernelSi
ze)
{
int i, j, k;
// check validity of params
if(!in || !out || !kernel) return false;
if(dataSize <=0 || kernelSize <= 0) return false;
// start convolution from out[kernelSize-1] to out[dataSize-1] (last)
for(i = kernelSize-1; i < dataSize; ++i)
{
out[i] = 0;
// init to 0 before accumulate
for(j = i, k = 0; k < kernelSize; --j, ++k)
out[i] += in[j] * kernel[k];
}
// convolution from out[0] to out[kernelSize-2]
for(i = 0; i < kernelSize - 1; ++i)
{
out[i] = 0;
// init to 0 before sum
for(j = i, k = 0; j >= 0; --j, ++k)
out[i] += in[j] * kernel[k];
}
return true;
}
///////////////////////////////////////////////////////////////////////////////
// Simplest 2D convolution routine. It is easy to understand how convolution
// works, but is very slow, because of no optimization.
///////////////////////////////////////////////////////////////////////////////
bool convolve2DSlow(unsigned char* in, unsigned char* out, int dataSizeX, int da
taSizeY,
float* kernel, int kernelSizeX, int kernelSizeY)
{
int i, j, m, n, mm, nn;
int kCenterX, kCenterY;
// center index of kernel
float sum;
// temp accumulation buffer
int rowIndex, colIndex;
// check validity of params
if(!in || !out || !kernel) return false;
if(dataSizeX <= 0 || kernelSizeX <= 0) return false;
// find center position of kernel (half of kernel size)
kCenterX = kernelSizeX / 2;
kCenterY = kernelSizeY / 2;

for(i=0; i < dataSizeY; ++i)


{
for(j=0; j < dataSizeX; ++j)
{
sum = 0;
for(m=0; m < kernelSizeY; ++m)
{
mm = kernelSizeY - 1 - m;

// rows
// columns
// init to 0 before sum
// kernel rows
// row index of flipped kernel

for(n=0; n < kernelSizeX; ++n) // kernel columns


{
nn = kernelSizeX - 1 - n; // column index of flipped kerne
l
// index of input signal, used for checking boundary
rowIndex = i + m - kCenterY;
colIndex = j + n - kCenterX;
// ignore input samples which are out of bound
if(rowIndex >= 0 && rowIndex < dataSizeY && colIndex >= 0 &&
colIndex < dataSizeX)
sum += in[dataSizeX * rowIndex + colIndex] * kernel[kern
elSizeX * mm + nn];
}
}
out[dataSizeX * i + j] = (unsigned char)((float)fabs(sum) + 0.5f);
}
}
return true;
}

///////////////////////////////////////////////////////////////////////////////
// 2D convolution
// 2D data are usually stored in computer memory as contiguous 1D array.
// So, we are using 1D array for 2D data.
// 2D convolution assumes the kernel is center originated, which means, if
// kernel size 3 then, k[-1], k[0], k[1]. The middle of index is always 0.
// The following programming logics are somewhat complicated because of using
// pointer indexing in order to minimize the number of multiplications.
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// unsigned char version (8bit): Note that the output is always positive number
///////////////////////////////////////////////////////////////////////////////
bool convolve2D(unsigned char* in, unsigned char* out, int dataSizeX, int dataSi
zeY,
float* kernel, int kernelSizeX, int kernelSizeY)
{
int i, j, m, n;
unsigned char *inPtr, *inPtr2, *outPtr;
float *kPtr;
int kCenterX, kCenterY;
int rowMin, rowMax;
// to check boundary of inpu
t array
int colMin, colMax;
//
float sum;
// temp accumulation buffer

// check validity of params


if(!in || !out || !kernel) return false;
if(dataSizeX <= 0 || kernelSizeX <= 0) return false;
// find center position of kernel (half of kernel size)
kCenterX = kernelSizeX >> 1;
kCenterY = kernelSizeY >> 1;
// init working pointers
inPtr = inPtr2 = &in[dataSizeX * kCenterY + kCenterX]; // note that it is
shifted (kCenterX, kCenterY),
outPtr = out;
kPtr = kernel;
// start convolution
for(i= 0; i < dataSizeY; ++i)
// number of rows
{
// compute the range of convolution, the current row of kernel should be
between these
rowMax = i + kCenterY;
rowMin = i - dataSizeY + kCenterY;
for(j = 0; j <
{
// compute
ould be between these
colMax = j
colMin = j

dataSizeX; ++j)

// number of columns

the range of convolution, the current column of kernel sh


+ kCenterX;
- dataSizeX + kCenterX;

sum = 0;

// set to 0 before accumulat

e
// flip the kernel and traverse all the kernel values
// multiply each kernel value with underlying input data
for(m = 0; m < kernelSizeY; ++m)
// kernel rows
{
// check if the index is out of bound of input array
if(m <= rowMax && m > rowMin)
{
for(n = 0; n < kernelSizeX; ++n)
{
// check the boundary of array
if(n <= colMax && n > colMin)
sum += *(inPtr - n) * *kPtr;
++kPtr;

// next kernel

}
}
else
kPtr += kernelSizeX;

// out of bound, move to nex

t row of kernel
inPtr -= dataSizeX;

// move input data 1 raw up

}
// convert negative number to positive
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
kPtr = kernel;
inPtr = ++inPtr2;

// reset kernel to (0,0)


// next input

++outPtr;

// next output

}
}
return true;
}
///////////////////////////////////////////////////////////////////////////////
// unsigned short (16bit)
///////////////////////////////////////////////////////////////////////////////
bool convolve2D(unsigned short* in, unsigned short* out, int dataSizeX, int data
SizeY,
float* kernel, int kernelSizeX, int kernelSizeY)
{
int i, j, m, n;
unsigned short *inPtr, *inPtr2, *outPtr;
float *kPtr;
int kCenterX, kCenterY;
int rowMin, rowMax;
// to check boundary of inpu
t array
int colMin, colMax;
//
float sum;
// temp accumulation buffer
// check validity of params
if(!in || !out || !kernel) return false;
if(dataSizeX <= 0 || kernelSizeX <= 0) return false;
// find center position of kernel (half of kernel size)
kCenterX = kernelSizeX >> 1;
kCenterY = kernelSizeY >> 1;
// init working pointers
inPtr = inPtr2 = &in[dataSizeX * kCenterY + kCenterX]; // note that it is
shifted (kCenterX, kCenterY),
outPtr = out;
kPtr = kernel;
// start convolution
for(i= 0; i < dataSizeY; ++i)
// number of rows
{
// compute the range of convolution, the current row of kernel should be
between these
rowMax = i + kCenterY;
rowMin = i - dataSizeY + kCenterY;
for(j = 0; j <
{
// compute
ould be between these
colMax = j
colMin = j
sum = 0;

dataSizeX; ++j)

// number of columns

the range of convolution, the current column of kernel sh


+ kCenterX;
- dataSizeX + kCenterX;
// set to 0 before accumulat

e
// flip the kernel and traverse all the kernel values
// multiply each kernel value with underlying input data
for(m = 0; m < kernelSizeY; ++m)
// kernel rows
{
// check if the index is out of bound of input array

if(m <= rowMax && m > rowMin)


{
for(n = 0; n < kernelSizeX; ++n)
{
// check the boundary of array
if(n <= colMax && n > colMin)
sum += *(inPtr - n) * *kPtr;
++kPtr;

// next kernel

}
}
else
kPtr += kernelSizeX;

// out of bound, move to nex

t row of kernel
inPtr -= dataSizeX;

// move input data 1 raw up

}
// convert negative number to positive
*outPtr = (unsigned short)((float)fabs(sum) + 0.5f);
kPtr = kernel;
inPtr = ++inPtr2;
++outPtr;

// reset kernel to (0,0)


// next input
// next output

}
}
return true;
}
///////////////////////////////////////////////////////////////////////////////
// signed integer (32bit) version:
///////////////////////////////////////////////////////////////////////////////
bool convolve2D(int* in, int* out, int dataSizeX, int dataSizeY,
float* kernel, int kernelSizeX, int kernelSizeY)
{
int i, j, m, n;
int *inPtr, *inPtr2, *outPtr;
float *kPtr;
int kCenterX, kCenterY;
int rowMin, rowMax;
// to check boundary of inpu
t array
int colMin, colMax;
//
float sum;
// temp accumulation buffer
// check validity of params
if(!in || !out || !kernel) return false;
if(dataSizeX <= 0 || kernelSizeX <= 0) return false;
// find center position of kernel (half of kernel size)
kCenterX = kernelSizeX >> 1;
kCenterY = kernelSizeY >> 1;
// init working pointers
inPtr = inPtr2 = &in[dataSizeX * kCenterY + kCenterX]; // note that it is
shifted (kCenterX, kCenterY),
outPtr = out;
kPtr = kernel;
// start convolution

for(i= 0; i < dataSizeY; ++i)


// number of rows
{
// compute the range of convolution, the current row of kernel should be
between these
rowMax = i + kCenterY;
rowMin = i - dataSizeY + kCenterY;
for(j = 0; j <
{
// compute
ould be between these
colMax = j
colMin = j

dataSizeX; ++j)

// number of columns

the range of convolution, the current column of kernel sh


+ kCenterX;
- dataSizeX + kCenterX;

sum = 0;

// set to 0 before accumulat

e
// flip the kernel and traverse all the kernel values
// multiply each kernel value with underlying input data
for(m = 0; m < kernelSizeY; ++m)
// kernel rows
{
// check if the index is out of bound of input array
if(m <= rowMax && m > rowMin)
{
for(n = 0; n < kernelSizeX; ++n)
{
// check the boundary of array
if(n <= colMax && n > colMin)
sum += *(inPtr - n) * *kPtr;
++kPtr;

// next kernel

}
}
else
kPtr += kernelSizeX;

// out of bound, move to nex

t row of kernel
inPtr -= dataSizeX;

// move input data 1 raw up

}
// convert integer number
if(sum >= 0) *outPtr = (int)(sum + 0.5f);
else *outPtr = (int)(sum - 0.5f);
kPtr = kernel;
inPtr = ++inPtr2;
++outPtr;

// reset kernel to (0,0)


// next input
// next output

}
}
return true;
}
///////////////////////////////////////////////////////////////////////////////
// single float precision version:
///////////////////////////////////////////////////////////////////////////////
bool convolve2D(float* in, float* out, int dataSizeX, int dataSizeY,
float* kernel, int kernelSizeX, int kernelSizeY)
{
int i, j, m, n;

float *inPtr, *inPtr2, *outPtr, *kPtr;


int kCenterX, kCenterY;
int rowMin, rowMax;
t array
int colMin, colMax;

// to check boundary of inpu


//

// check validity of params


if(!in || !out || !kernel) return false;
if(dataSizeX <= 0 || kernelSizeX <= 0) return false;
// find center position of kernel (half of kernel size)
kCenterX = kernelSizeX >> 1;
kCenterY = kernelSizeY >> 1;
// init working pointers
inPtr = inPtr2 = &in[dataSizeX * kCenterY + kCenterX]; // note that it is
shifted (kCenterX, kCenterY),
outPtr = out;
kPtr = kernel;
// start convolution
for(i= 0; i < dataSizeY; ++i)
// number of rows
{
// compute the range of convolution, the current row of kernel should be
between these
rowMax = i + kCenterY;
rowMin = i - dataSizeY + kCenterY;
for(j = 0; j <
{
// compute
ould be between these
colMax = j
colMin = j

dataSizeX; ++j)

// number of columns

the range of convolution, the current column of kernel sh


+ kCenterX;
- dataSizeX + kCenterX;

*outPtr = 0;

// set to 0 before accumulat

e
// flip the kernel and traverse all the kernel values
// multiply each kernel value with underlying input data
for(m = 0; m < kernelSizeY; ++m)
// kernel rows
{
// check if the index is out of bound of input array
if(m <= rowMax && m > rowMin)
{
for(n = 0; n < kernelSizeX; ++n)
{
// check the boundary of array
if(n <= colMax && n > colMin)
*outPtr += *(inPtr - n) * *kPtr;
++kPtr;
// next kernel
}
}
else
kPtr += kernelSizeX;
// out of bound, move to nex
t row of kernel
inPtr -= dataSizeX;
}

// move input data 1 raw up

kPtr = kernel;
inPtr = ++inPtr2;
++outPtr;

// reset kernel to (0,0)


// next input
// next output

}
}
return true;
}
///////////////////////////////////////////////////////////////////////////////
// double float precision version:
///////////////////////////////////////////////////////////////////////////////
bool convolve2D(double* in, double* out, int dataSizeX, int dataSizeY,
double* kernel, int kernelSizeX, int kernelSizeY)
{
int i, j, m, n;
double *inPtr, *inPtr2, *outPtr, *kPtr;
int kCenterX, kCenterY;
int rowMin, rowMax;
// to check boundary of inpu
t array
int colMin, colMax;
//
// check validity of params
if(!in || !out || !kernel) return false;
if(dataSizeX <= 0 || kernelSizeX <= 0) return false;
// find center position of kernel (half of kernel size)
kCenterX = kernelSizeX >> 1;
kCenterY = kernelSizeY >> 1;
// init working pointers
inPtr = inPtr2 = &in[dataSizeX * kCenterY + kCenterX]; // note that it is
shifted (kCenterX, kCenterY),
outPtr = out;
kPtr = kernel;
// start convolution
for(i= 0; i < dataSizeY; ++i)
// number of rows
{
// compute the range of convolution, the current row of kernel should be
between these
rowMax = i + kCenterY;
rowMin = i - dataSizeY + kCenterY;
for(j = 0; j <
{
// compute
ould be between these
colMax = j
colMin = j

dataSizeX; ++j)

// number of columns

the range of convolution, the current column of kernel sh


+ kCenterX;
- dataSizeX + kCenterX;

*outPtr = 0;

// set to 0 before accumulat

e
// flip the kernel and traverse all the kernel values
// multiply each kernel value with underlying input data
for(m = 0; m < kernelSizeY; ++m)
// kernel rows
{
// check if the index is out of bound of input array
if(m <= rowMax && m > rowMin)

{
for(n = 0; n < kernelSizeX; ++n)
{
// check the boundary of array
if(n <= colMax && n > colMin)
*outPtr += *(inPtr - n) * *kPtr;
++kPtr;
// next kernel
}
}
else
kPtr += kernelSizeX;

// out of bound, move to nex

t row of kernel
inPtr -= dataSizeX;

// move input data 1 raw up

}
kPtr = kernel;
inPtr = ++inPtr2;
++outPtr;

// reset kernel to (0,0)


// next input
// next output

}
}
return true;
}

///////////////////////////////////////////////////////////////////////////////
// Separable 2D Convolution
// If the MxN kernel can be separable to (Mx1) and (1xN) matrices, the
// multiplication can be reduced to M+N comapred to MxN in normal convolution.
// It does not check the output is excceded max for performance reason. And we
// assume the kernel contains good(valid) data, therefore, the result cannot be
// larger than max.
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// unsigned char (8-bit) version
///////////////////////////////////////////////////////////////////////////////
bool convolve2DSeparable(unsigned char* in, unsigned char* out, int dataSizeX, i
nt dataSizeY,
float* kernelX, int kSizeX, float* kernelY, int kSizeY)
{
int i, j, k, m, n;
float *tmp, *sum;
// intermediate data buffer
unsigned char *inPtr, *outPtr;
// working pointers
float *tmpPtr, *tmpPtr2;
// working pointers
int kCenter, kOffset, endIndex;
// kernel indice
// check validity of params
if(!in || !out || !kernelX || !kernelY) return false;
if(dataSizeX <= 0 || kSizeX <= 0) return false;
// allocate temp storage to keep intermediate result
tmp = new float[dataSizeX * dataSizeY];
if(!tmp) return false; // memory allocation error
// store accumulated sum
sum = new float[dataSizeX];
if(!sum) return false; // memory allocation error

// covolve horizontal direction ///////////////////////


// find center position of kernel (half of kernel size)
kCenter = kSizeX >> 1;
// center index of kernel ar
ray
endIndex = dataSizeX - kCenter;
volution

// index for full kernel con

// init working pointers


inPtr = in;
tmpPtr = tmp;
s from 1D horizontal convolution

// store intermediate result

// start horizontal convolution (x-direction)


for(i=0; i < dataSizeY; ++i)
{
kOffset = 0;
kernel varies for each sample
// COLUMN FROM index=0 TO index=kCenter-1
for(j=0; j < kCenter; ++j)
{
*tmpPtr = 0;

// number of rows
// starting index of partial

// init to 0 before accumula

tion
for(k = kCenter + kOffset, m = 0; k >= 0; --k, ++m) // convolve with
partial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++tmpPtr;
// next output
++kOffset;
// increase starting index o
f kernel
}
// COLUMN FROM index=kCenter TO index=(dataSizeX-kCenter-1)
for(j = kCenter; j < endIndex; ++j)
{
*tmpPtr = 0;
// init to 0 before accumula
te
for(k = kSizeX-1, m = 0; k >= 0; --k, ++m) // full kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
}
kOffset = 1;
ernel varies for each sample

// ending index of partial k

// COLUMN FROM index=(dataSizeX-kCenter) TO index=(dataSizeX-1)


for(j = endIndex; j < dataSizeX; ++j)
{
*tmpPtr = 0;
// init to 0 before accumula
tion

for(k = kSizeX-1, m=0; k >= kOffset; --k, ++m) // convolve with pa


rtial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
++kOffset;
// increase ending index of
partial kernel
}
inPtr += kCenter;
// next row
}
// END OF HORIZONTAL CONVOLUTION //////////////////////
// start vertical direction ///////////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeY >> 1;
// center index of vertical
kernel
endIndex = dataSizeY - kCenter;
// index where full kernel c
onvolution should stop
// set working pointers
tmpPtr = tmpPtr2 = tmp;
outPtr = out;
// clear out array before accumulation
for(i = 0; i < dataSizeX; ++i)
sum[i] = 0;
// start to convolve vertical direction (y-direction)
// ROW FROM index=0 TO index=(kCenter-1)
kOffset = 0;
kernel varies for each sample
for(i=0; i < kCenter; ++i)
{
for(k = kCenter + kOffset; k >= 0; --k)
nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// starting index of partial

// convolve with partial ker

// convert and copy from sum

to out
{
// covert negative to positive
*outPtr = (unsigned char)((float)fabs(sum[n]) + 0.5f);
sum[n] = 0;
// reset to zero for next su
mming
++outPtr;

// next element of output

}
tmpPtr = tmpPtr2;
++kOffset;

// reset input pointer


// increase starting index o

f kernel
}
// ROW FROM index=kCenter TO index=(dataSizeY-kCenter-1)
for(i = kCenter; i < endIndex; ++i)
{
for(k = kSizeY -1; k >= 0; --k)
// convolve with full kernel
{
for(j = 0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
// covert negative to positive
*outPtr = (unsigned char)((float)fabs(sum[n]) + 0.5f);
sum[n] = 0;
// reset for next summing
++outPtr;
// next output
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
}
// ROW FROM index=(dataSizeY-kCenter) TO index=(dataSizeY-1)
kOffset = 1;
// ending index of partial k
ernel varies for each sample
for(i=endIndex; i < dataSizeY; ++i)
{
for(k = kSizeY-1; k >= kOffset; --k)
// convolve with partial ker
nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
// covert negative to positive
*outPtr = (unsigned char)((float)fabs(sum[n]) + 0.5f);
sum[n] = 0;
// reset for next summing
++outPtr;
// next output
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
++kOffset;

// next input
// increase ending index of

kernel
}
// END OF VERTICAL CONVOLUTION ////////////////////////

// deallocate temp buffers


delete [] tmp;
delete [] sum;
return true;
}
///////////////////////////////////////////////////////////////////////////////
// unsigned short (16-bit) version
///////////////////////////////////////////////////////////////////////////////
bool convolve2DSeparable(unsigned short* in, unsigned short* out, int dataSizeX,
int dataSizeY,
float* kernelX, int kSizeX, float* kernelY, int kSizeY)
{
int i, j, k, m, n;
float *tmp, *sum;
// intermediate data buffer
unsigned short *inPtr, *outPtr;
// working pointers
float *tmpPtr, *tmpPtr2;
// working pointers
int kCenter, kOffset, endIndex;
// kernel indice
// check validity of params
if(!in || !out || !kernelX || !kernelY) return false;
if(dataSizeX <= 0 || kSizeX <= 0) return false;
// allocate temp storage to keep intermediate result
tmp = new float[dataSizeX * dataSizeY];
if(!tmp) return false; // memory allocation error
// store accumulated sum
sum = new float[dataSizeX];
if(!sum) return false; // memory allocation error
// covolve horizontal direction ///////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeX >> 1;
// center index of kernel ar
ray
endIndex = dataSizeX - kCenter;
volution

// index for full kernel con

// init working pointers


inPtr = in;
tmpPtr = tmp;
s from 1D horizontal convolution

// store intermediate result

// start horizontal convolution (x-direction)


for(i=0; i < dataSizeY; ++i)
{
kOffset = 0;
kernel varies for each sample
// COLUMN FROM index=0 TO index=kCenter-1
for(j=0; j < kCenter; ++j)
{
*tmpPtr = 0;

// number of rows
// starting index of partial

// init to 0 before accumula

tion
for(k = kCenter + kOffset, m = 0; k >= 0; --k, ++m) // convolve with

partial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++tmpPtr;
// next output
++kOffset;
// increase starting index o
f kernel
}
// COLUMN FROM index=kCenter TO index=(dataSizeX-kCenter-1)
for(j = kCenter; j < endIndex; ++j)
{
*tmpPtr = 0;
// init to 0 before accumula
te
for(k = kSizeX-1, m = 0; k >= 0; --k, ++m) // full kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
}
kOffset = 1;
ernel varies for each sample

// ending index of partial k

// COLUMN FROM index=(dataSizeX-kCenter) TO index=(dataSizeX-1)


for(j = endIndex; j < dataSizeX; ++j)
{
*tmpPtr = 0;
// init to 0 before accumula
tion
for(k = kSizeX-1, m=0; k >= kOffset; --k, ++m) // convolve with pa
rtial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
++kOffset;
// increase ending index of
partial kernel
}
inPtr += kCenter;
// next row
}
// END OF HORIZONTAL CONVOLUTION //////////////////////
// start vertical direction ///////////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeY >> 1;
// center index of vertical
kernel
endIndex = dataSizeY - kCenter;
// index where full kernel c
onvolution should stop
// set working pointers
tmpPtr = tmpPtr2 = tmp;
outPtr = out;
// clear out array before accumulation

for(i = 0; i < dataSizeX; ++i)


sum[i] = 0;
// start to convolve vertical direction (y-direction)
// ROW FROM index=0 TO index=(kCenter-1)
kOffset = 0;
kernel varies for each sample
for(i=0; i < kCenter; ++i)
{
for(k = kCenter + kOffset; k >= 0; --k)
nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// starting index of partial

// convolve with partial ker

// convert and copy from sum

to out
{
// covert negative to positive
*outPtr = (unsigned short)((float)fabs(sum[n]) + 0.5f);
sum[n] = 0;
// reset to zero for next su
mming
++outPtr;

// next element of output

}
tmpPtr = tmpPtr2;
++kOffset;

// reset input pointer


// increase starting index o

f kernel
}
// ROW FROM index=kCenter TO index=(dataSizeY-kCenter-1)
for(i = kCenter; i < endIndex; ++i)
{
for(k = kSizeY -1; k >= 0; --k)
// convolve with full kernel
{
for(j = 0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
// covert negative to positive
*outPtr = (unsigned short)((float)fabs(sum[n]) + 0.5f);
sum[n] = 0;
// reset before next summing
++outPtr;
// next output
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
}

// ROW FROM index=(dataSizeY-kCenter) TO index=(dataSizeY-1)


kOffset = 1;
// ending index of partial k
ernel varies for each sample
for(i=endIndex; i < dataSizeY; ++i)
{
for(k = kSizeY-1; k >= kOffset; --k)
// convolve with partial ker
nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
// covert negative to positive
*outPtr = (unsigned short)((float)fabs(sum[n]) + 0.5f);
sum[n] = 0;
// reset before next summing
++outPtr;
// next output
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
++kOffset;

// next input
// increase ending index of

kernel
}
// END OF VERTICAL CONVOLUTION ////////////////////////
// deallocate temp buffers
delete [] tmp;
delete [] sum;
return true;
}

///////////////////////////////////////////////////////////////////////////////
// integer (32-bit) version
///////////////////////////////////////////////////////////////////////////////
bool convolve2DSeparable(int* in, int* out, int dataSizeX, int dataSizeY,
float* kernelX, int kSizeX, float* kernelY, int kSizeY)
{
int i, j, k, m, n;
float *tmp, *sum;
// intermediate data buffer
int *inPtr, *outPtr;
// working pointers
float *tmpPtr, *tmpPtr2;
// working pointers
int kCenter, kOffset, endIndex;
// kernel indice
// check validity of params
if(!in || !out || !kernelX || !kernelY) return false;
if(dataSizeX <= 0 || kSizeX <= 0) return false;
// allocate temp storage to keep intermediate result
tmp = new float[dataSizeX * dataSizeY];
if(!tmp) return false; // memory allocation error

// store accumulated sum


sum = new float[dataSizeX];
if(!sum) return false; // memory allocation error
// covolve horizontal direction ///////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeX >> 1;
// center index of kernel ar
ray
endIndex = dataSizeX - kCenter;
volution

// index for full kernel con

// init working pointers


inPtr = in;
tmpPtr = tmp;
s from 1D horizontal convolution

// store intermediate result

// start horizontal convolution (x-direction)


for(i=0; i < dataSizeY; ++i)
{
kOffset = 0;
kernel varies for each sample
// COLUMN FROM index=0 TO index=kCenter-1
for(j=0; j < kCenter; ++j)
{
*tmpPtr = 0;

// number of rows
// starting index of partial

// init to 0 before accumula

tion
for(k = kCenter + kOffset, m = 0; k >= 0; --k, ++m) // convolve with
partial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++tmpPtr;
// next output
++kOffset;
// increase starting index o
f kernel
}
// COLUMN FROM index=kCenter TO index=(dataSizeX-kCenter-1)
for(j = kCenter; j < endIndex; ++j)
{
*tmpPtr = 0;
// init to 0 before accumula
te
for(k = kSizeX-1, m = 0; k >= 0; --k, ++m) // full kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
}
kOffset = 1;
ernel varies for each sample

// ending index of partial k

// COLUMN FROM index=(dataSizeX-kCenter) TO index=(dataSizeX-1)


for(j = endIndex; j < dataSizeX; ++j)

{
*tmpPtr = 0;

// init to 0 before accumula

tion
for(k = kSizeX-1, m=0; k >= kOffset; --k, ++m) // convolve with pa
rtial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
++kOffset;
// increase ending index of
partial kernel
}
inPtr += kCenter;
// next row
}
// END OF HORIZONTAL CONVOLUTION //////////////////////
// start vertical direction ///////////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeY >> 1;
// center index of vertical
kernel
endIndex = dataSizeY - kCenter;
// index where full kernel c
onvolution should stop
// set working pointers
tmpPtr = tmpPtr2 = tmp;
outPtr = out;
// clear out array before accumulation
for(i = 0; i < dataSizeX; ++i)
sum[i] = 0;
// start to convolve vertical direction (y-direction)
// ROW FROM index=0 TO index=(kCenter-1)
kOffset = 0;
kernel varies for each sample
for(i=0; i < kCenter; ++i)
{
for(k = kCenter + kOffset; k >= 0; --k)
nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// starting index of partial

// convolve with partial ker

// convert and copy from sum

to out
{
if(sum[n] >= 0)
*outPtr = (int)(sum[n] + 0.5f);

// store final result to out

put array
else
*outPtr = (int)(sum[n] - 0.5f);

// store final result to out

put array
sum[n] = 0;

// reset to zero for next su

++outPtr;

// next element of output

mming
}
tmpPtr = tmpPtr2;
++kOffset;

// reset input pointer


// increase starting index o

f kernel
}
// ROW FROM index=kCenter TO index=(dataSizeY-kCenter-1)
for(i = kCenter; i < endIndex; ++i)
{
for(k = kSizeY -1; k >= 0; --k)
// convolve with full kernel
{
for(j = 0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
if(sum[n] >= 0)
*outPtr = (int)(sum[n] + 0.5f);

// store final result to out

put array
else
*outPtr = (int)(sum[n] - 0.5f);

// store final result to out

put array
sum[n] = 0;

// reset to 0 before next su

++outPtr;

// next output

mming
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
}
// ROW FROM index=(dataSizeY-kCenter) TO index=(dataSizeY-1)
kOffset = 1;
// ending index of partial k
ernel varies for each sample
for(i=endIndex; i < dataSizeY; ++i)
{
for(k = kSizeY-1; k >= kOffset; --k)
// convolve with partial ker
nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)
to out

// convert and copy from sum

{
if(sum[n] >= 0)
*outPtr = (int)(sum[n] + 0.5f);

// store final result to out

put array
else
*outPtr = (int)(sum[n] - 0.5f);

// store final result to out

put array
sum[n] = 0;
++outPtr;

// reset before next summing


// next output

}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
++kOffset;

// next input
// increase ending index of

kernel
}
// END OF VERTICAL CONVOLUTION ////////////////////////
// deallocate temp buffers
delete [] tmp;
delete [] sum;
return true;
}

///////////////////////////////////////////////////////////////////////////////
// single precision float version
///////////////////////////////////////////////////////////////////////////////
bool convolve2DSeparable(float* in, float* out, int dataSizeX, int dataSizeY,
float* kernelX, int kSizeX, float* kernelY, int kSizeY)
{
int i, j, k, m, n;
float *tmp, *sum;
// intermediate data buffer
float *inPtr, *outPtr;
// working pointers
float *tmpPtr, *tmpPtr2;
// working pointers
int kCenter, kOffset, endIndex;
// kernel indice
// check validity of params
if(!in || !out || !kernelX || !kernelY) return false;
if(dataSizeX <= 0 || kSizeX <= 0) return false;
// allocate temp storage to keep intermediate result
tmp = new float[dataSizeX * dataSizeY];
if(!tmp) return false; // memory allocation error
// store accumulated sum
sum = new float[dataSizeX];
if(!sum) return false; // memory allocation error
// covolve horizontal direction ///////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeX >> 1;
// center index of kernel ar
ray
endIndex = dataSizeX - kCenter;
volution
// init working pointers

// index for full kernel con

inPtr = in;
tmpPtr = tmp;
s from 1D horizontal convolution
// start horizontal convolution (x-direction)
for(i=0; i < dataSizeY; ++i)
{
kOffset = 0;
kernel varies for each sample
// COLUMN FROM index=0 TO index=kCenter-1
for(j=0; j < kCenter; ++j)
{
*tmpPtr = 0;

// store intermediate result

// number of rows
// starting index of partial

// init to 0 before accumula

tion
for(k = kCenter + kOffset, m = 0; k >= 0; --k, ++m) // convolve with
partial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++tmpPtr;
// next output
++kOffset;
// increase starting index o
f kernel
}
// COLUMN FROM index=kCenter TO index=(dataSizeX-kCenter-1)
for(j = kCenter; j < endIndex; ++j)
{
*tmpPtr = 0;
// init to 0 before accumula
te
for(k = kSizeX-1, m = 0; k >= 0; --k, ++m) // full kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
}
kOffset = 1;
ernel varies for each sample

// ending index of partial k

// COLUMN FROM index=(dataSizeX-kCenter) TO index=(dataSizeX-1)


for(j = endIndex; j < dataSizeX; ++j)
{
*tmpPtr = 0;
// init to 0 before accumula
tion
for(k = kSizeX-1, m=0; k >= kOffset; --k, ++m) // convolve with pa
rtial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
++kOffset;
// increase ending index of
partial kernel
}

inPtr += kCenter;
// next row
}
// END OF HORIZONTAL CONVOLUTION //////////////////////
// start vertical direction ///////////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeY >> 1;
// center index of vertical
kernel
endIndex = dataSizeY - kCenter;
// index where full kernel c
onvolution should stop
// set working pointers
tmpPtr = tmpPtr2 = tmp;
outPtr = out;
// clear out array before accumulation
for(i = 0; i < dataSizeX; ++i)
sum[i] = 0;
// start to convolve vertical direction (y-direction)
// ROW FROM index=0 TO index=(kCenter-1)
kOffset = 0;
kernel varies for each sample
for(i=0; i < kCenter; ++i)
{
for(k = kCenter + kOffset; k >= 0; --k)
nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// starting index of partial

// convolve with partial ker

// convert and copy from sum

to out
{
*outPtr = sum[n];

// store final result to out

sum[n] = 0;

// reset to zero for next su

++outPtr;

// next element of output

put array
mming
}
tmpPtr = tmpPtr2;
++kOffset;

// reset input pointer


// increase starting index o

f kernel
}
// ROW FROM index=kCenter TO index=(dataSizeY-kCenter-1)
for(i = kCenter; i < endIndex; ++i)
{
for(k = kSizeY -1; k >= 0; --k)
// convolve with full kernel
{
for(j = 0; j < dataSizeX; ++j)
{

sum[j] += *tmpPtr * kernelY[k];


++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
*outPtr = sum[n];

// store final result to out

sum[n] = 0;
++outPtr;

// reset before next summing


// next output

put buffer
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
}
// ROW FROM index=(dataSizeY-kCenter) TO index=(dataSizeY-1)
kOffset = 1;
// ending index of partial k
ernel varies for each sample
for(i=endIndex; i < dataSizeY; ++i)
{
for(k = kSizeY-1; k >= kOffset; --k)
// convolve with partial ker
nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
*outPtr = sum[n];

// store final result to out

sum[n] = 0;
++outPtr;

// reset to 0 for next sum


// next output

put array
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
++kOffset;

// next input
// increase ending index of

kernel
}
// END OF VERTICAL CONVOLUTION ////////////////////////
// deallocate temp buffers
delete [] tmp;
delete [] sum;
return true;
}
///////////////////////////////////////////////////////////////////////////////
// double precision float version

///////////////////////////////////////////////////////////////////////////////
bool convolve2DSeparable(double* in, double* out, int dataSizeX, int dataSizeY,
double* kernelX, int kSizeX, float* kernelY, int kSizeY
)
{
int i, j, k, m, n;
double *tmp, *sum;
// intermediate data buffer
double *inPtr, *outPtr;
// working pointers
double *tmpPtr, *tmpPtr2;
// working pointers
int kCenter, kOffset, endIndex;
// kernel indice
// check validity of params
if(!in || !out || !kernelX || !kernelY) return false;
if(dataSizeX <= 0 || kSizeX <= 0) return false;
// allocate temp storage to keep intermediate result
tmp = new double[dataSizeX * dataSizeY];
if(!tmp) return false; // memory allocation error
// store accumulated sum
sum = new double[dataSizeX];
if(!sum) return false; // memory allocation error
// covolve horizontal direction ///////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeX >> 1;
// center index of kernel ar
ray
endIndex = dataSizeX - kCenter;
volution

// index for full kernel con

// init working pointers


inPtr = in;
tmpPtr = tmp;
s from 1D horizontal convolution

// store intermediate result

// start horizontal convolution (x-direction)


for(i=0; i < dataSizeY; ++i)
{
kOffset = 0;
kernel varies for each sample
// COLUMN FROM index=0 TO index=kCenter-1
for(j=0; j < kCenter; ++j)
{
*tmpPtr = 0;

// number of rows
// starting index of partial

// init to 0 before accumula

tion
for(k = kCenter + kOffset, m = 0; k >= 0; --k, ++m) // convolve with
partial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++tmpPtr;
// next output
++kOffset;
// increase starting index o
f kernel
}
// COLUMN FROM index=kCenter TO index=(dataSizeX-kCenter-1)

for(j = kCenter; j < endIndex; ++j)


{
*tmpPtr = 0;

// init to 0 before accumula

te
for(k = kSizeX-1, m = 0; k >= 0; --k, ++m) // full kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
}
kOffset = 1;
ernel varies for each sample

// ending index of partial k

// COLUMN FROM index=(dataSizeX-kCenter) TO index=(dataSizeX-1)


for(j = endIndex; j < dataSizeX; ++j)
{
*tmpPtr = 0;
// init to 0 before accumula
tion
for(k = kSizeX-1, m=0; k >= kOffset; --k, ++m) // convolve with pa
rtial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
++kOffset;
// increase ending index of
partial kernel
}
inPtr += kCenter;
// next row
}
// END OF HORIZONTAL CONVOLUTION //////////////////////
// start vertical direction ///////////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeY >> 1;
// center index of vertical
kernel
endIndex = dataSizeY - kCenter;
// index where full kernel c
onvolution should stop
// set working pointers
tmpPtr = tmpPtr2 = tmp;
outPtr = out;
// clear out array before accumulation
for(i = 0; i < dataSizeX; ++i)
sum[i] = 0;
// start to convolve vertical direction (y-direction)
// ROW FROM index=0 TO index=(kCenter-1)
kOffset = 0;
kernel varies for each sample
for(i=0; i < kCenter; ++i)
{

// starting index of partial

for(k = kCenter + kOffset; k >= 0; --k)

// convolve with partial ker

nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
*outPtr = sum[n];

// store final result to out

sum[n] = 0;

// reset to zero for next su

++outPtr;

// next element of output

put array
mming
}
tmpPtr = tmpPtr2;
++kOffset;

// reset input pointer


// increase starting index o

f kernel
}
// ROW FROM index=kCenter TO index=(dataSizeY-kCenter-1)
for(i = kCenter; i < endIndex; ++i)
{
for(k = kSizeY -1; k >= 0; --k)
// convolve with full kernel
{
for(j = 0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
*outPtr = sum[n];

// store final result to out

sum[n] = 0;

// reset to zero for next su

++outPtr;

// next output

put array
mming
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
}
// ROW FROM index=(dataSizeY-kCenter) TO index=(dataSizeY-1)
kOffset = 1;
// ending index of partial k
ernel varies for each sample
for(i=endIndex; i < dataSizeY; ++i)
{
for(k = kSizeY-1; k >= kOffset; --k)
// convolve with partial ker
nel
{

for(j=0; j < dataSizeX; ++j)


{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
*outPtr = sum[n];

// store final result to out

sum[n] = 0;

// reset to zero for next su

put array
mming
++outPtr;
partial kernel
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
++kOffset;

// increase ending index of

// next input
// increase ending index of

kernel
}
// END OF VERTICAL CONVOLUTION ////////////////////////
// deallocate temp buffers
delete [] tmp;
delete [] sum;
return true;
}

///////////////////////////////////////////////////////////////////////////////
// 2D Convolution Fast
// In order to improve the performance, this function uses multple cursors of
// input signal. It avoids indexing input array during convolution. And, the
// input signal is partitioned to 9 different sections, so we don't need to
// check the boundary for every samples.
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// unsigned char (8-bit) version
///////////////////////////////////////////////////////////////////////////////
bool convolve2DFast(unsigned char* in, unsigned char* out, int dataSizeX, int da
taSizeY,
float* kernel, int kernelSizeX, int kernelSizeY)
{
int i, j, m, n, x, y, t;
unsigned char **inPtr, *outPtr, *ptr;
int kCenterX, kCenterY;
int rowEnd, colEnd;
// ending indice for section
divider
float sum;
// temp accumulation buffer
int k, kSize;
// check validity of params
if(!in || !out || !kernel) return false;
if(dataSizeX <= 0 || kernelSizeX <= 0) return false;

// find center position of kernel (half of kernel size)


kCenterX = kernelSizeX >> 1;
kCenterY = kernelSizeY >> 1;
kSize = kernelSizeX * kernelSizeY;
// total kernel size
// allocate memeory for multi-cursor
inPtr = new unsigned char*[kSize];
if(!inPtr) return false;

// allocation error

// set initial position of multi-cursor, NOTE: it is swapped instead of kern


el
ptr = in + (dataSizeX * kCenterY + kCenterX); // the first cursor is shifted
(kCenterX, kCenterY)
for(m=0, t=0; m < kernelSizeY; ++m)
{
for(n=0; n < kernelSizeX; ++n, ++t)
{
inPtr[t] = ptr - n;
}
ptr -= dataSizeX;
}
// init working pointers
outPtr = out;
rowEnd = dataSizeY - kCenterY;

// bottom row partition divi

colEnd = dataSizeX - kCenterX;


vider

// right column partition di

der

// convolve rows from index=0 to index=kCenterY-1


y = kCenterY;
for(i=0; i < kCenterY; ++i)
{
// partition #1 ***********************************
x = kCenterX;
for(j=0; j < kCenterX; ++j)
// column from index=0 to in
dex=kCenterX-1
{
sum = 0;
t = 0;
for(m=0; m <= y; ++m)
{
for(n=0; n <= x; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += (kernelSizeX - x - 1);
// jump to next row
}
// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
// partition #2 ***********************************

for(j=kCenterX; j < colEnd; ++j)


X to index=(dataSizeX-kCenterX-1)
{
sum = 0;
t = 0;
for(m=0; m <= y; ++m)
{
for(n=0; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
}

// column from index=kCenter

// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
// partition #3 ***********************************
x = 1;
for(j=colEnd; j < dataSizeX; ++j)
// column from index=(dataSi
zeX-kCenter) to index=(dataSizeX-1)
{
sum = 0;
t = x;
for(m=0; m <= y; ++m)
{
for(n=x; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += x;
// jump to next row
}
// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
++y;
lve for next run
}

// add one more row to convo

// convolve rows from index=kCenterY to index=(dataSizeY-kCenterY-1)


for(i= kCenterY; i < rowEnd; ++i)
// number of rows
{
// partition #4 ***********************************
x = kCenterX;
for(j=0; j < kCenterX; ++j)
// column from index=0 to in
dex=kCenterX-1
{
sum = 0;
t = 0;
for(m=0; m < kernelSizeY; ++m)

{
for(n=0; n <= x; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += (kernelSizeX - x - 1);
}
// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
// partition #5 ***********************************
for(j = kCenterX; j < colEnd; ++j)
// column from index=kCenter
X to index=(dataSizeX-kCenterX-1)
{
sum = 0;
t = 0;
for(m=0; m < kernelSizeY; ++m)
{
for(n=0; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++inPtr[t]; // in this partition, all cursors are used to co
nvolve. moving cursors to next is safe here
++t;
}
}
// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;
}
// partition #6 ***********************************
x = 1;
for(j=colEnd; j < dataSizeX; ++j)
// column from index=(dataSi
zeX-kCenter) to index=(dataSizeX-1)
{
sum = 0;
t = x;
for(m=0; m < kernelSizeY; ++m)
{
for(n=x; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += x;
}
// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;

for(k=0; k < kSize; ++k) ++inPtr[k];

// move all cursors to next

}
}
// convolve rows from index=(dataSizeY-kCenterY) to index=(dataSizeY-1)
y = 1;
for(i= rowEnd; i < dataSizeY; ++i)
// number of rows
{
// partition #7 ***********************************
x = kCenterX;
for(j=0; j < kCenterX; ++j)
// column from index=0 to in
dex=kCenterX-1
{
sum = 0;
t = kernelSizeX * y;
for(m=y; m < kernelSizeY; ++m)
{
for(n=0; n <= x; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += (kernelSizeX - x - 1);
}
// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
// partition #8 ***********************************
for(j=kCenterX; j < colEnd; ++j)
// column from index=kCenter
X to index=(dataSizeX-kCenterX-1)
{
sum = 0;
t = kernelSizeX * y;
for(m=y; m < kernelSizeY; ++m)
{
for(n=0; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
}
// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
}
// partition #9 ***********************************
x = 1;
for(j=colEnd; j < dataSizeX; ++j)
// column from index=(dataSi
zeX-kCenter) to index=(dataSizeX-1)
{

sum = 0;
t = kernelSizeX * y + x;
for(m=y; m < kernelSizeY; ++m)
{
for(n=x; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += x;
}
// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
++y;
increased
}

// the starting row index is

return true;
}

///////////////////////////////////////////////////////////////////////////////
// Fast 2D Convolution using integer multiplication instead of float.
// Multiply coefficient(factor) to accumulated sum at last.
// NOTE: IT IS NOT FASTER THAN FLOAT MULTIPLICATION, TRY YOURSELF!!!
///////////////////////////////////////////////////////////////////////////////
bool convolve2DFast2(unsigned char* in, unsigned char* out, int dataSizeX, int d
ataSizeY,
int* kernel, float factor, int kernelSizeX, int kernelSizeY)
{
int i, j, m, n, x, y, t;
unsigned char **inPtr, *outPtr, *ptr;
int kCenterX, kCenterY;
int rowEnd, colEnd;
// ending indice for section
divider
int sum;
// temp accumulation buffer
int k, kSize;
// check validity of params
if(!in || !out || !kernel) return false;
if(dataSizeX <= 0 || kernelSizeX <= 0) return false;
// find center position of kernel (half of kernel size)
kCenterX = kernelSizeX >> 1;
kCenterY = kernelSizeY >> 1;
kSize = kernelSizeX * kernelSizeY;
// total kernel size
// allocate memeory for multi-cursor
inPtr = new unsigned char*[kSize];
if(!inPtr) return false;

// allocation error

// set initial position of multi-cursor, NOTE: it is swapped instead of kern


el

ptr = in + (dataSizeX * kCenterY + kCenterX); // the first cursor is shifted


(kCenterX, kCenterY)
for(m=0, t=0; m < kernelSizeY; ++m)
{
for(n=0; n < kernelSizeX; ++n, ++t)
{
inPtr[t] = ptr - n;
}
ptr -= dataSizeX;
}
// init working pointers
outPtr = out;
rowEnd = dataSizeY - kCenterY;

// bottom row partition divi

colEnd = dataSizeX - kCenterX;


vider

// right column partition di

der

// convolve rows from index=0 to index=kCenterY-1


y = kCenterY;
for(i=0; i < kCenterY; ++i)
{
// partition #1 ***********************************
x = kCenterX;
for(j=0; j < kCenterX; ++j)
// column from index=0 to in
dex=kCenterX-1
{
sum = 0;
t = 0;
for(m=0; m <= y; ++m)
{
for(n=0; n <= x; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += (kernelSizeX - x - 1);
// jump to next row
}
// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
// partition #2 ***********************************
for(j=kCenterX; j < colEnd; ++j)
// column from index=kCenter
X to index=(dataSizeX-kCenterX-1)
{
sum = 0;
t = 0;
for(m=0; m <= y; ++m)
{
for(n=0; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}

}
// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
// partition #3 ***********************************
x = 1;
for(j=colEnd; j < dataSizeX; ++j)
// column from index=(dataSi
zeX-kCenter) to index=(dataSizeX-1)
{
sum = 0;
t = x;
for(m=0; m <= y; ++m)
{
for(n=x; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += x;
// jump to next row
}
// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
++y;
lve for next run
}

// add one more row to convo

// convolve rows from index=kCenterY to index=(dataSizeY-kCenterY-1)


for(i= kCenterY; i < rowEnd; ++i)
// number of rows
{
// partition #4 ***********************************
x = kCenterX;
for(j=0; j < kCenterX; ++j)
// column from index=0 to in
dex=kCenterX-1
{
sum = 0;
t = 0;
for(m=0; m < kernelSizeY; ++m)
{
for(n=0; n <= x; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += (kernelSizeX - x - 1);
}
// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;

++x;
for(k=0; k < kSize; ++k) ++inPtr[k];

// move all cursors to next

}
// partition #5 ***********************************
for(j = kCenterX; j < colEnd; ++j)
// column from index=kCenter
X to index=(dataSizeX-kCenterX-1)
{
sum = 0;
t = 0;
for(m=0; m < kernelSizeY; ++m)
{
for(n=0; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++inPtr[t]; // in this partition, all cursors are used to co
nvolve. moving cursors to next is safe here
++t;
}
}
// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;
++x;
}
// partition #6 ***********************************
x = 1;
for(j=colEnd; j < dataSizeX; ++j)
// column from index=(dataSi
zeX-kCenter) to index=(dataSizeX-1)
{
sum = 0;
t = x;
for(m=0; m < kernelSizeY; ++m)
{
for(n=x; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += x;
}
// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
}
// convolve rows from index=(dataSizeY-kCenterY) to index=(dataSizeY-1)
y = 1;
for(i= rowEnd; i < dataSizeY; ++i)
// number of rows
{
// partition #7 ***********************************
x = kCenterX;
for(j=0; j < kCenterX; ++j)
// column from index=0 to in
dex=kCenterX-1

{
sum = 0;
t = kernelSizeX * y;
for(m=y; m < kernelSizeY; ++m)
{
for(n=0; n <= x; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += (kernelSizeX - x - 1);
}
// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
// partition #8 ***********************************
for(j=kCenterX; j < colEnd; ++j)
// column from index=kCenter
X to index=(dataSizeX-kCenterX-1)
{
sum = 0;
t = kernelSizeX * y;
for(m=y; m < kernelSizeY; ++m)
{
for(n=0; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
}
// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
}
// partition #9 ***********************************
x = 1;
for(j=colEnd; j < dataSizeX; ++j)
// column from index=(dataSi
zeX-kCenter) to index=(dataSizeX-1)
{
sum = 0;
t = kernelSizeX * y + x;
for(m=y; m < kernelSizeY; ++m)
{
for(n=x; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += x;
}

// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
++y;
increased
}
return true;
}

// the starting row index is

You might also like