__m128 fhalf = _mm_set1_ps(0.5f), fzero = _mm_setzero_ps();
__m128 _angleScale = _mm_set1_ps(angleScale), fone = _mm_set1_ps(1.0f);
__m128i ione = _mm_set1_epi32(1), _nbins = _mm_set1_epi32(nbins), izero = _mm_setzero_si128();
-
+
for ( ; x <= width - 4; x += 4)
{
int x2 = x << 1;
__m128 _mag = _mm_loadu_ps(dbuf + x + (width << 1));
__m128 _angle = _mm_loadu_ps(dbuf + x + width * 3);
_angle = _mm_sub_ps(_mm_mul_ps(_angleScale, _angle), fhalf);
-
+
__m128 sign = _mm_and_ps(fone, _mm_cmplt_ps(_angle, fzero));
__m128i _hidx = _mm_cvttps_epi32(_angle);
_hidx = _mm_sub_epi32(_hidx, _mm_cvtps_epi32(sign));
_angle = _mm_sub_ps(_angle, _mm_cvtepi32_ps(_hidx));
-
+
__m128 ft0 = _mm_mul_ps(_mag, _mm_sub_ps(fone, _angle));
__m128 ft1 = _mm_mul_ps(_mag, _angle);
__m128 ft2 = _mm_unpacklo_ps(ft0, ft1);
__m128 ft3 = _mm_unpackhi_ps(ft0, ft1);
-
+
_mm_storeu_ps(gradPtr + x2, ft2);
_mm_storeu_ps(gradPtr + x2 + 4, ft3);
-
+
__m128i mask0 = _mm_sub_epi32(izero, _mm_srli_epi32(_hidx, 31));
__m128i it0 = _mm_and_si128(mask0, _nbins);
mask0 = _mm_cmplt_epi32(_hidx, _nbins);
BlockData() :
histOfs(0), imgOffset()
{ }
-
+
int histOfs;
Point imgOffset;
};
const float* getBlock(Point pt, float* buf);
virtual void normalizeBlockHistogram(float* histogram) const;
-
+
std::vector<PixData> pixData;
std::vector<BlockData> blockData;
__m128i idx = _mm_loadu_si128((__m128i*)a);
__m128 _bw = _mm_set1_ps(bw), _bh = _mm_set1_ps(bh);
__m128i ifour = _mm_set1_epi32(4);
-
+
for (; i <= blockSize.height - 4; i += 4)
{
__m128 t = _mm_sub_ps(_mm_cvtepi32_ps(idx), _bh);
int icellX1 = icellX0 + 1, icellY1 = icellY0 + 1;
cellX -= icellX0;
cellY -= icellY0;
-
+
if( (unsigned)icellX0 < (unsigned)ncells.width &&
(unsigned)icellX1 < (unsigned)ncells.width )
{
icellX1 = icellX0;
cellX = 1.f - cellX;
}
-
+
if( (unsigned)icellY0 < (unsigned)ncells.height &&
(unsigned)icellY1 < (unsigned)ncells.height )
{
data->qangleOfs = (qangle.cols*i + j)*2;
data->gradWeight = weights(i,j);
}
-
+
assert( count1 + count2 + count4 == rawBlockSize );
// defragment pixData
for( j = 0; j < count2; j++ )
pixData[j + count1 + count2] = pixData[j + rawBlockSize*2];
count2 += count1;
count4 += count2;
-
+
// initialize blockData
for( j = 0; j < nblocks.width; j++ )
for( i = 0; i < nblocks.height; i++ )
// CV_Assert( blockHist != 0 );
memset(blockHist, 0, sizeof(float) * blockHistogramSize);
-
+
const PixData* _pixData = &pixData[0];
-
+
for( k = 0; k < C1; k++ )
{
const PixData& pk = _pixData[k];
float w = pk.gradWeight*pk.histWeights[0];
const uchar* h = qanglePtr + pk.qangleOfs;
int h0 = h[0], h1 = h[1];
-
+
float* hist = blockHist + pk.histOfs[0];
float t0 = hist[h0] + a[0]*w;
float t1 = hist[h1] + a[1]*w;
const float* const a = gradPtr + pk.gradOfs;
const uchar* const h = qanglePtr + pk.qangleOfs;
int h0 = h[0], h1 = h[1];
-
+
__m128 _a0 = _mm_set1_ps(a[0]), _a1 = _mm_set1_ps(a[1]);
__m128 _w = _mm_mul_ps(_mm_set1_ps(pk.gradWeight), _mm_loadu_ps(pk.histWeights));
__m128 _t0 = _mm_mul_ps(_a0, _w), _t1 = _mm_mul_ps(_a1, _w);
-
+
_mm_storeu_ps(hist0, _t0);
_mm_storeu_ps(hist1, _t1);
-
+
float* hist = blockHist + pk.histOfs[0];
float t0 = hist[h0] + hist0[0];
float t1 = hist[h1] + hist1[0];
hist[h0] = t0; hist[h1] = t1;
-
+
hist = blockHist + pk.histOfs[1];
t0 = hist[h0] + hist0[1];
t1 = hist[h1] + hist1[1];
float w, t0, t1, a0 = a[0], a1 = a[1];
const uchar* const h = qanglePtr + pk.qangleOfs;
int h0 = h[0], h1 = h[1];
-
+
float* hist = blockHist + pk.histOfs[0];
w = pk.gradWeight*pk.histWeights[0];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
-
+
hist = blockHist + pk.histOfs[1];
w = pk.gradWeight*pk.histWeights[1];
t0 = hist[h0] + a0*w;
hist[h0] = t0; hist[h1] = t1;
}
#endif
-
+
#if CV_SSE2
for( ; k < C4; k++ )
{
const float* const a = gradPtr + pk.gradOfs;
const uchar* const h = qanglePtr + pk.qangleOfs;
int h0 = h[0], h1 = h[1];
-
+
__m128 _a0 = _mm_set1_ps(a[0]), _a1 = _mm_set1_ps(a[1]);
__m128 _w = _mm_mul_ps(_mm_set1_ps(pk.gradWeight), _mm_loadu_ps(pk.histWeights));
__m128 _t0 = _mm_mul_ps(_a0, _w), _t1 = _mm_mul_ps(_a1, _w);
-
+
_mm_storeu_ps(hist0, _t0);
_mm_storeu_ps(hist1, _t1);
-
+
float* hist = blockHist + pk.histOfs[0];
float t0 = hist[h0] + hist0[0];
float t1 = hist[h1] + hist1[0];
hist[h0] = t0; hist[h1] = t1;
-
+
hist = blockHist + pk.histOfs[1];
t0 = hist[h0] + hist0[1];
t1 = hist[h1] + hist1[1];
hist[h0] = t0; hist[h1] = t1;
-
+
hist = blockHist + pk.histOfs[2];
t0 = hist[h0] + hist0[2];
t1 = hist[h1] + hist1[2];
hist[h0] = t0; hist[h1] = t1;
-
+
hist = blockHist + pk.histOfs[3];
t0 = hist[h0] + hist0[3];
t1 = hist[h1] + hist1[3];
hist[h0] = t0; hist[h1] = t1;
-
+
// __m128 _hist0 = _mm_set_ps((blockHist + pk.histOfs[3])[h0], (blockHist + pk.histOfs[2])[h0],
// (blockHist + pk.histOfs[1])[h0], (blockHist + pk.histOfs[0])[h0]);
// __m128 _hist1 = _mm_set_ps((blockHist + pk.histOfs[3])[h1], (blockHist + pk.histOfs[2])[h1],
// (pk.histOfs[1] + blockHist)[h0] = hist0[1];
// (pk.histOfs[2] + blockHist)[h0] = hist0[2];
// (pk.histOfs[3] + blockHist)[h0] = hist0[3];
-//
+//
// (pk.histOfs[0] + blockHist)[h1] = hist1[0];
// (pk.histOfs[1] + blockHist)[h1] = hist1[1];
// (pk.histOfs[2] + blockHist)[h1] = hist1[2];
float w, t0, t1, a0 = a[0], a1 = a[1];
const uchar* h = qanglePtr + pk.qangleOfs;
int h0 = h[0], h1 = h[1];
-
+
float* hist = blockHist + pk.histOfs[0];
w = pk.gradWeight*pk.histWeights[0];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
-
+
hist = blockHist + pk.histOfs[1];
w = pk.gradWeight*pk.histWeights[1];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
-
+
hist = blockHist + pk.histOfs[2];
w = pk.gradWeight*pk.histWeights[2];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
-
+
hist = blockHist + pk.histOfs[3];
w = pk.gradWeight*pk.histWeights[3];
t0 = hist[h0] + a0*w;
return Rect( x*winStride.width, y*winStride.height, winSize.width, winSize.height );
}
+static inline int gcd(int a, int b)
+{
+ if( a < b )
+ std::swap(a, b);
+ while( b > 0 )
+ {
+ int r = a % b;
+ a = b;
+ b = r;
+ }
+ return a;
+}
+
void HOGDescriptor::compute(const Mat& img, std::vector<float>& descriptors,
Size winStride, Size padding, const std::vector<Point>& locations) const
{