gpu::GpuMat I1_2, I2_2, I1_I2;\r
vector<gpu::GpuMat> vI1, vI2;\r
\r
- gpu::GpuMat mu1, mu2; \r
- gpu::GpuMat mu1_2, mu2_2, mu1_mu2; \r
+ gpu::GpuMat mu1, mu2;\r
+ gpu::GpuMat mu1_2, mu2_2, mu1_mu2;\r
\r
- gpu::GpuMat sigma1_2, sigma2_2, sigma12; \r
- gpu::GpuMat t3; \r
+ gpu::GpuMat sigma1_2, sigma2_2, sigma12;\r
+ gpu::GpuMat t3;\r
\r
gpu::GpuMat ssim_map;\r
\r
\r
int main(int argc, char *argv[])\r
{\r
- help(); \r
+ help();\r
Mat I1 = imread(argv[1]); // Read the two images\r
Mat I2 = imread(argv[2]);\r
\r
BufferPSNR bufferPSNR;\r
BufferMSSIM bufferMSSIM;\r
\r
- int TIMES; \r
- stringstream sstr(argv[3]); \r
+ int TIMES;\r
+ stringstream sstr(argv[3]);\r
sstr >> TIMES;\r
double time, result;\r
\r
//------------------------------- PSNR CPU ----------------------------------------------------\r
- time = (double)getTickCount(); \r
+ time = (double)getTickCount();\r
\r
for (int i = 0; i < TIMES; ++i)\r
result = getPSNR(I1,I2);\r
time /= TIMES;\r
\r
cout << "Time of PSNR CPU (averaged for " << TIMES << " runs): " << time << " milliseconds."\r
- << " With result of: " << result << endl; \r
+ << " With result of: " << result << endl;\r
\r
//------------------------------- PSNR GPU ----------------------------------------------------\r
- time = (double)getTickCount(); \r
+ time = (double)getTickCount();\r
\r
for (int i = 0; i < TIMES; ++i)\r
result = getPSNR_GPU(I1,I2);\r
time /= TIMES;\r
\r
cout << "Time of PSNR GPU (averaged for " << TIMES << " runs): " << time << " milliseconds."\r
- << " With result of: " << result << endl; \r
+ << " With result of: " << result << endl;\r
\r
//------------------------------- PSNR GPU Optimized--------------------------------------------\r
time = (double)getTickCount(); // Initial call\r
cout << "Initial call GPU optimized: " << time <<" milliseconds."\r
<< " With result of: " << result << endl;\r
\r
- time = (double)getTickCount(); \r
+ time = (double)getTickCount();\r
for (int i = 0; i < TIMES; ++i)\r
result = getPSNR_GPU_optimized(I1, I2, bufferPSNR);\r
\r
time = 1000*((double)getTickCount() - time)/getTickFrequency();\r
time /= TIMES;\r
\r
- cout << "Time of PSNR GPU OPTIMIZED ( / " << TIMES << " runs): " << time \r
- << " milliseconds." << " With result of: " << result << endl << endl; \r
+ cout << "Time of PSNR GPU OPTIMIZED ( / " << TIMES << " runs): " << time\r
+ << " milliseconds." << " With result of: " << result << endl << endl;\r
\r
\r
//------------------------------- SSIM CPU -----------------------------------------------------\r
Scalar x;\r
- time = (double)getTickCount(); \r
+ time = (double)getTickCount();\r
\r
for (int i = 0; i < TIMES; ++i)\r
x = getMSSIM(I1,I2);\r
time /= TIMES;\r
\r
cout << "Time of MSSIM CPU (averaged for " << TIMES << " runs): " << time << " milliseconds."\r
- << " With result of B" << x.val[0] << " G" << x.val[1] << " R" << x.val[2] << endl; \r
+ << " With result of B" << x.val[0] << " G" << x.val[1] << " R" << x.val[2] << endl;\r
\r
//------------------------------- SSIM GPU -----------------------------------------------------\r
- time = (double)getTickCount(); \r
+ time = (double)getTickCount();\r
\r
for (int i = 0; i < TIMES; ++i)\r
x = getMSSIM_GPU(I1,I2);\r
time /= TIMES;\r
\r
cout << "Time of MSSIM GPU (averaged for " << TIMES << " runs): " << time << " milliseconds."\r
- << " With result of B" << x.val[0] << " G" << x.val[1] << " R" << x.val[2] << endl; \r
+ << " With result of B" << x.val[0] << " G" << x.val[1] << " R" << x.val[2] << endl;\r
\r
//------------------------------- SSIM GPU Optimized--------------------------------------------\r
- time = (double)getTickCount(); \r
+ time = (double)getTickCount();\r
x = getMSSIM_GPU_optimized(I1,I2, bufferMSSIM);\r
time = 1000*((double)getTickCount() - time)/getTickFrequency();\r
cout << "Time of MSSIM GPU Initial Call " << time << " milliseconds."\r
- << " With result of B" << x.val[0] << " G" << x.val[1] << " R" << x.val[2] << endl; \r
+ << " With result of B" << x.val[0] << " G" << x.val[1] << " R" << x.val[2] << endl;\r
\r
- time = (double)getTickCount(); \r
+ time = (double)getTickCount();\r
\r
for (int i = 0; i < TIMES; ++i)\r
x = getMSSIM_GPU_optimized(I1,I2, bufferMSSIM);\r
time /= TIMES;\r
\r
cout << "Time of MSSIM GPU OPTIMIZED ( / " << TIMES << " runs): " << time << " milliseconds."\r
- << " With result of B" << x.val[0] << " G" << x.val[1] << " R" << x.val[2] << endl << endl; \r
+ << " With result of B" << x.val[0] << " G" << x.val[1] << " R" << x.val[2] << endl << endl;\r
return 0;\r
}\r
\r
\r
double getPSNR(const Mat& I1, const Mat& I2)\r
{\r
- Mat s1; \r
+ Mat s1;\r
absdiff(I1, I2, s1); // |I1 - I2|\r
s1.convertTo(s1, CV_32F); // cannot make a square on 8 bits\r
s1 = s1.mul(s1); // |I1 - I2|^2\r
\r
\r
double getPSNR_GPU_optimized(const Mat& I1, const Mat& I2, BufferPSNR& b)\r
-{ \r
+{\r
b.gI1.upload(I1);\r
b.gI2.upload(I2);\r
\r
\r
double getPSNR_GPU(const Mat& I1, const Mat& I2)\r
{\r
- gpu::GpuMat gI1, gI2, gs, t1,t2; \r
+ gpu::GpuMat gI1, gI2, gs, t1,t2;\r
\r
gI1.upload(I1);\r
gI2.upload(I2);\r
gI1.convertTo(t1, CV_32F);\r
gI2.convertTo(t2, CV_32F);\r
\r
- gpu::absdiff(t1.reshape(1), t2.reshape(1), gs); \r
+ gpu::absdiff(t1.reshape(1), t2.reshape(1), gs);\r
gpu::multiply(gs, gs, gs);\r
\r
Scalar s = gpu::sum(gs);\r
}\r
\r
Scalar getMSSIM( const Mat& i1, const Mat& i2)\r
-{ \r
+{\r
const double C1 = 6.5025, C2 = 58.5225;\r
/***************************** INITS **********************************/\r
int d = CV_32F;\r
\r
- Mat I1, I2; \r
+ Mat I1, I2;\r
i1.convertTo(I1, d); // cannot calculate on one byte large values\r
- i2.convertTo(I2, d); \r
+ i2.convertTo(I2, d);\r
\r
Mat I2_2 = I2.mul(I2); // I2^2\r
Mat I1_2 = I1.mul(I1); // I1^2\r
GaussianBlur(I1, mu1, Size(11, 11), 1.5);\r
GaussianBlur(I2, mu2, Size(11, 11), 1.5);\r
\r
- Mat mu1_2 = mu1.mul(mu1); \r
- Mat mu2_2 = mu2.mul(mu2); \r
+ Mat mu1_2 = mu1.mul(mu1);\r
+ Mat mu2_2 = mu2.mul(mu2);\r
Mat mu1_mu2 = mu1.mul(mu2);\r
\r
- Mat sigma1_2, sigma2_2, sigma12; \r
+ Mat sigma1_2, sigma2_2, sigma12;\r
\r
GaussianBlur(I1_2, sigma1_2, Size(11, 11), 1.5);\r
sigma1_2 -= mu1_2;\r
sigma12 -= mu1_mu2;\r
\r
///////////////////////////////// FORMULA ////////////////////////////////\r
- Mat t1, t2, t3; \r
+ Mat t1, t2, t3;\r
\r
- t1 = 2 * mu1_mu2 + C1; \r
- t2 = 2 * sigma12 + C2; \r
+ t1 = 2 * mu1_mu2 + C1;\r
+ t2 = 2 * sigma12 + C2;\r
t3 = t1.mul(t2); // t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))\r
\r
- t1 = mu1_2 + mu2_2 + C1; \r
- t2 = sigma1_2 + sigma2_2 + C2; \r
+ t1 = mu1_2 + mu2_2 + C1;\r
+ t2 = sigma1_2 + sigma2_2 + C2;\r
t1 = t1.mul(t2); // t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2))\r
\r
Mat ssim_map;\r
divide(t3, t1, ssim_map); // ssim_map = t3./t1;\r
\r
Scalar mssim = mean( ssim_map ); // mssim = average of ssim map\r
- return mssim; \r
+ return mssim;\r
}\r
\r
Scalar getMSSIM_GPU( const Mat& i1, const Mat& i2)\r
-{ \r
+{\r
const float C1 = 6.5025f, C2 = 58.5225f;\r
/***************************** INITS **********************************/\r
- gpu::GpuMat gI1, gI2, gs1, t1,t2; \r
+ gpu::GpuMat gI1, gI2, gs1, t1,t2;\r
\r
gI1.upload(i1);\r
gI2.upload(i2);\r
gI1.convertTo(t1, CV_MAKE_TYPE(CV_32F, gI1.channels()));\r
gI2.convertTo(t2, CV_MAKE_TYPE(CV_32F, gI2.channels()));\r
\r
- vector<gpu::GpuMat> vI1, vI2; \r
+ vector<gpu::GpuMat> vI1, vI2;\r
gpu::split(t1, vI1);\r
gpu::split(t2, vI2);\r
Scalar mssim;\r
\r
for( int i = 0; i < gI1.channels(); ++i )\r
{\r
- gpu::GpuMat I2_2, I1_2, I1_I2; \r
+ gpu::GpuMat I2_2, I1_2, I1_I2;\r
\r
gpu::multiply(vI2[i], vI2[i], I2_2); // I2^2\r
gpu::multiply(vI1[i], vI1[i], I1_2); // I1^2\r
gpu::GaussianBlur(vI1[i], mu1, Size(11, 11), 1.5);\r
gpu::GaussianBlur(vI2[i], mu2, Size(11, 11), 1.5);\r
\r
- gpu::GpuMat mu1_2, mu2_2, mu1_mu2; \r
- gpu::multiply(mu1, mu1, mu1_2); \r
- gpu::multiply(mu2, mu2, mu2_2); \r
- gpu::multiply(mu1, mu2, mu1_mu2); \r
+ gpu::GpuMat mu1_2, mu2_2, mu1_mu2;\r
+ gpu::multiply(mu1, mu1, mu1_2);\r
+ gpu::multiply(mu2, mu2, mu2_2);\r
+ gpu::multiply(mu1, mu2, mu1_mu2);\r
\r
- gpu::GpuMat sigma1_2, sigma2_2, sigma12; \r
+ gpu::GpuMat sigma1_2, sigma2_2, sigma12;\r
\r
gpu::GaussianBlur(I1_2, sigma1_2, Size(11, 11), 1.5);\r
- sigma1_2 -= mu1_2;\r
+ gpu::subtract(sigma1_2, mu1_2, sigma1_2); // sigma1_2 -= mu1_2;\r
\r
gpu::GaussianBlur(I2_2, sigma2_2, Size(11, 11), 1.5);\r
- sigma2_2 -= mu2_2;\r
+ gpu::subtract(sigma2_2, mu2_2, sigma2_2); // sigma2_2 -= mu2_2;\r
\r
gpu::GaussianBlur(I1_I2, sigma12, Size(11, 11), 1.5);\r
- sigma12 -= mu1_mu2;\r
+ gpu::subtract(sigma12, mu1_mu2, sigma12); // sigma12 -= mu1_mu2;\r
\r
///////////////////////////////// FORMULA ////////////////////////////////\r
- gpu::GpuMat t1, t2, t3; \r
+ gpu::GpuMat t1, t2, t3;\r
\r
- t1 = 2 * mu1_mu2 + C1; \r
- t2 = 2 * sigma12 + C2; \r
- gpu::multiply(t1, t2, t3); // t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))\r
+ mu1_mu2.convertTo(t1, -1, 2, C1); // t1 = 2 * mu1_mu2 + C1;\r
+ sigma12.convertTo(t2, -1, 2, C2); // t2 = 2 * sigma12 + C2;\r
+ gpu::multiply(t1, t2, t3); // t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))\r
\r
- t1 = mu1_2 + mu2_2 + C1; \r
- t2 = sigma1_2 + sigma2_2 + C2; \r
- gpu::multiply(t1, t2, t1); // t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2))\r
+ gpu::addWeighted(mu1_2, 1.0, mu2_2, 1.0, C1, t1); // t1 = mu1_2 + mu2_2 + C1;\r
+ gpu::addWeighted(sigma1_2, 1.0, sigma2_2, 1.0, C2, t2); // t2 = sigma1_2 + sigma2_2 + C2;\r
+ gpu::multiply(t1, t2, t1); // t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2))\r
\r
gpu::GpuMat ssim_map;\r
gpu::divide(t3, t1, ssim_map); // ssim_map = t3./t1;\r
\r
- Scalar s = gpu::sum(ssim_map); \r
+ Scalar s = gpu::sum(ssim_map);\r
mssim.val[i] = s.val[0] / (ssim_map.rows * ssim_map.cols);\r
\r
}\r
- return mssim; \r
+ return mssim;\r
}\r
\r
Scalar getMSSIM_GPU_optimized( const Mat& i1, const Mat& i2, BufferMSSIM& b)\r
-{ \r
+{\r
int cn = i1.channels();\r
\r
const float C1 = 6.5025f, C2 = 58.5225f;\r
gpu::Stream stream;\r
\r
stream.enqueueConvert(b.gI1, b.t1, CV_32F);\r
- stream.enqueueConvert(b.gI2, b.t2, CV_32F); \r
+ stream.enqueueConvert(b.gI2, b.t2, CV_32F);\r
\r
gpu::split(b.t1, b.vI1, stream);\r
gpu::split(b.t2, b.vI2, stream);\r
Scalar mssim;\r
\r
+ gpu::GpuMat buf;\r
+\r
for( int i = 0; i < b.gI1.channels(); ++i )\r
- { \r
+ {\r
gpu::multiply(b.vI2[i], b.vI2[i], b.I2_2, stream); // I2^2\r
gpu::multiply(b.vI1[i], b.vI1[i], b.I1_2, stream); // I1^2\r
gpu::multiply(b.vI1[i], b.vI2[i], b.I1_I2, stream); // I1 * I2\r
\r
- gpu::GaussianBlur(b.vI1[i], b.mu1, Size(11, 11), 1.5, 0, BORDER_DEFAULT, -1, stream);\r
- gpu::GaussianBlur(b.vI2[i], b.mu2, Size(11, 11), 1.5, 0, BORDER_DEFAULT, -1, stream);\r
+ gpu::GaussianBlur(b.vI1[i], b.mu1, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream);\r
+ gpu::GaussianBlur(b.vI2[i], b.mu2, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream);\r
\r
- gpu::multiply(b.mu1, b.mu1, b.mu1_2, stream); \r
- gpu::multiply(b.mu2, b.mu2, b.mu2_2, stream); \r
- gpu::multiply(b.mu1, b.mu2, b.mu1_mu2, stream); \r
+ gpu::multiply(b.mu1, b.mu1, b.mu1_2, stream);\r
+ gpu::multiply(b.mu2, b.mu2, b.mu2_2, stream);\r
+ gpu::multiply(b.mu1, b.mu2, b.mu1_mu2, stream);\r
\r
- gpu::GaussianBlur(b.I1_2, b.sigma1_2, Size(11, 11), 1.5, 0, BORDER_DEFAULT, -1, stream);\r
- gpu::subtract(b.sigma1_2, b.mu1_2, b.sigma1_2, stream);\r
+ gpu::GaussianBlur(b.I1_2, b.sigma1_2, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream);\r
+ gpu::subtract(b.sigma1_2, b.mu1_2, b.sigma1_2, gpu::GpuMat(), -1, stream);\r
//b.sigma1_2 -= b.mu1_2; - This would result in an extra data transfer operation\r
\r
- gpu::GaussianBlur(b.I2_2, b.sigma2_2, Size(11, 11), 1.5, 0, BORDER_DEFAULT, -1, stream);\r
- gpu::subtract(b.sigma2_2, b.mu2_2, b.sigma2_2, stream);\r
+ gpu::GaussianBlur(b.I2_2, b.sigma2_2, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream);\r
+ gpu::subtract(b.sigma2_2, b.mu2_2, b.sigma2_2, gpu::GpuMat(), -1, stream);\r
//b.sigma2_2 -= b.mu2_2;\r
\r
- gpu::GaussianBlur(b.I1_I2, b.sigma12, Size(11, 11), 1.5, 0, BORDER_DEFAULT, -1, stream);\r
- gpu::subtract(b.sigma12, b.mu1_mu2, b.sigma12, stream);\r
+ gpu::GaussianBlur(b.I1_I2, b.sigma12, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream);\r
+ gpu::subtract(b.sigma12, b.mu1_mu2, b.sigma12, gpu::GpuMat(), -1, stream);\r
//b.sigma12 -= b.mu1_mu2;\r
\r
//here too it would be an extra data transfer due to call of operator*(Scalar, Mat)\r
- gpu::multiply(b.mu1_mu2, 2, b.t1, stream); //b.t1 = 2 * b.mu1_mu2 + C1; \r
- gpu::add(b.t1, C1, b.t1, stream);\r
- gpu::multiply(b.sigma12, 2, b.t2, stream); //b.t2 = 2 * b.sigma12 + C2; \r
- gpu::add(b.t2, C2, b.t2, stream); \r
+ gpu::multiply(b.mu1_mu2, 2, b.t1, 1, -1, stream); //b.t1 = 2 * b.mu1_mu2 + C1;\r
+ gpu::add(b.t1, C1, b.t1, gpu::GpuMat(), -1, stream);\r
+ gpu::multiply(b.sigma12, 2, b.t2, 1, -1, stream); //b.t2 = 2 * b.sigma12 + C2;\r
+ gpu::add(b.t2, C2, b.t2, gpu::GpuMat(), -12, stream);\r
\r
- gpu::multiply(b.t1, b.t2, b.t3, stream); // t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))\r
+ gpu::multiply(b.t1, b.t2, b.t3, 1, -1, stream); // t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))\r
\r
- gpu::add(b.mu1_2, b.mu2_2, b.t1, stream);\r
- gpu::add(b.t1, C1, b.t1, stream);\r
+ gpu::add(b.mu1_2, b.mu2_2, b.t1, gpu::GpuMat(), -1, stream);\r
+ gpu::add(b.t1, C1, b.t1, gpu::GpuMat(), -1, stream);\r
\r
- gpu::add(b.sigma1_2, b.sigma2_2, b.t2, stream);\r
- gpu::add(b.t2, C2, b.t2, stream);\r
+ gpu::add(b.sigma1_2, b.sigma2_2, b.t2, gpu::GpuMat(), -1, stream);\r
+ gpu::add(b.t2, C2, b.t2, gpu::GpuMat(), -1, stream);\r
\r
\r
- gpu::multiply(b.t1, b.t2, b.t1, stream); // t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2)) \r
- gpu::divide(b.t3, b.t1, b.ssim_map, stream); // ssim_map = t3./t1;\r
+ gpu::multiply(b.t1, b.t2, b.t1, 1, -1, stream); // t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2))\r
+ gpu::divide(b.t3, b.t1, b.ssim_map, 1, -1, stream); // ssim_map = t3./t1;\r
\r
stream.waitForCompletion();\r
\r
- Scalar s = gpu::sum(b.ssim_map, b.buf); \r
+ Scalar s = gpu::sum(b.ssim_map, b.buf);\r
mssim.val[i] = s.val[0] / (b.ssim_map.rows * b.ssim_map.cols);\r
\r
}\r
- return mssim; \r
-}
\ No newline at end of file
+ return mssim;\r
+}\r
+\r