let the test OCL_Filter/Bilateral pass
authorTomoaki Teshima <tomoaki.teshima@gmail.com>
Tue, 6 Mar 2018 10:48:04 +0000 (19:48 +0900)
committerTomoaki Teshima <tomoaki.teshima@gmail.com>
Tue, 6 Mar 2018 10:48:04 +0000 (19:48 +0900)
  * avoid aggressive optimization on Visual Studio 2012 Update 5
  * add code branch to avoid unattended optimization (keep the old code)

modules/imgproc/src/smooth.cpp

index 69ff4fa..aa1bd62 100644 (file)
@@ -4674,11 +4674,17 @@ public:
                                 color_weight[buf[2]],
                                 color_weight[buf[3]]);
                             v_float32x4 _sw = v_load(space_weight+k);
+#if defined(_MSC_VER) && _MSC_VER == 1700/* MSVS 2012 */ && CV_AVX
+                            // details: https://github.com/opencv/opencv/issues/11004
+                            vsumw += _cw * _sw;
+                            vsumc += _cw * _sw * _valF;
+#else
                             v_float32x4 _w = _cw * _sw;
                             _cw = _w * _valF;
 
                             vsumw += _w;
                             vsumc += _cw;
+#endif
                         }
                         float *bufFloat = (float*)buf;
                         v_float32x4 sum4 = v_reduce_sum4(vsumw, vsumc, vsumw, vsumc);
@@ -4743,6 +4749,13 @@ public:
                                                     color_weight[buf[2]],color_weight[buf[3]]);
                             v_float32x4 _sw = v_load(space_weight+k);
 
+#if defined(_MSC_VER) && _MSC_VER == 1700/* MSVS 2012 */ && CV_AVX
+                            // details: https://github.com/opencv/opencv/issues/11004
+                            vsumw += _w * _sw;
+                            vsumb += _w * _sw * _b;
+                            vsumg += _w * _sw * _g;
+                            vsumr += _w * _sw * _r;
+#else
                             _w *= _sw;
                             _b *=  _w;
                             _g *=  _w;
@@ -4752,6 +4765,7 @@ public:
                             vsumb += _b;
                             vsumg += _g;
                             vsumr += _r;
+#endif
                         }
                         float *bufFloat = (float*)buf;
                         v_float32x4 sum4 = v_reduce_sum4(vsumw, vsumb, vsumg, vsumr);