Fixed profiler API
authorArmin Novak <armin.novak@thincast.com>
Thu, 15 Nov 2018 16:52:43 +0000 (17:52 +0100)
committerArmin Novak <armin.novak@thincast.com>
Fri, 16 Nov 2018 08:32:15 +0000 (09:32 +0100)
* Duplicate the name internally to avoid issues with stack
* Made API opaque and removed direct dereferencing of struct.

include/freerdp/utils/profiler.h
libfreerdp/codec/nsc_sse2.c
libfreerdp/codec/rfx_neon.c
libfreerdp/codec/rfx_sse2.c
libfreerdp/utils/profiler.c

index e80ca1c..a0ff766 100644 (file)
 #include <freerdp/api.h>
 #include <freerdp/utils/stopwatch.h>
 
-struct _PROFILER
-{
-       char* name;
-       STOPWATCH* stopwatch;
-};
 typedef struct _PROFILER PROFILER;
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-FREERDP_API PROFILER* profiler_create(char* name);
+FREERDP_API PROFILER* profiler_create(const char* name);
 FREERDP_API void profiler_free(PROFILER* profiler);
 
 FREERDP_API void profiler_enter(PROFILER* profiler);
@@ -49,7 +44,7 @@ FREERDP_API void profiler_print_footer(void);
 #endif
 
 #ifdef WITH_PROFILER
-#define IF_PROFILER(then)                      then
+#define PROFILER_RENAME(prof, name)    do { profiler_free(prof); prof = profiler_create(name); } while(0)
 #define PROFILER_DEFINE(prof)          PROFILER* prof;
 #define PROFILER_CREATE(prof,name)     prof = profiler_create(name);
 #define PROFILER_FREE(prof)                    profiler_free(prof);
@@ -59,7 +54,7 @@ FREERDP_API void profiler_print_footer(void);
 #define PROFILER_PRINT(prof)           profiler_print(prof);
 #define PROFILER_PRINT_FOOTER          profiler_print_footer();
 #else
-#define IF_PROFILER(then)              do { } while (0)
+#define PROFILER_RENAME(prof, name)            do { } while (0)
 
 #define PROFILER_DEFINE(prof)
 #define PROFILER_CREATE(prof,name)     do { } while (0);
index 6bd7147..149e80a 100644 (file)
@@ -401,6 +401,6 @@ void nsc_init_sse2(NSC_CONTEXT* context)
        if (!IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
                return;
 
-       IF_PROFILER(context->priv->prof_nsc_encode->name = "nsc_encode_sse2");
+       PROFILER_RENAME(context->priv->prof_nsc_encode, "nsc_encode_sse2");
        context->encode = nsc_encode_sse2;
 }
index 1632f98..f96f57d 100644 (file)
@@ -35,7 +35,7 @@
 /* rfx_decode_YCbCr_to_RGB_NEON code now resides in the primitives library. */
 
 static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-rfx_quantization_decode_block_NEON(INT16 * buffer, const int buffer_size, const UINT32 factor)
+rfx_quantization_decode_block_NEON(INT16* buffer, const int buffer_size, const UINT32 factor)
 {
        int16x8_t quantFactors = vdupq_n_s16(factor);
        int16x8_t* buf = (int16x8_t*)buffer;
@@ -48,10 +48,10 @@ rfx_quantization_decode_block_NEON(INT16 * buffer, const int buffer_size, const
                vst1q_s16((INT16*)buf, val);
                buf++;
        }
-       while(buf < buf_end);
+       while (buf < buf_end);
 }
 
-void rfx_quantization_decode_NEON(INT16 * buffer, const UINT32 * quantVals)
+void rfx_quantization_decode_NEON(INT16* buffer, const UINT32* quantVals)
 {
        rfx_quantization_decode_block_NEON(&buffer[0], 1024, quantVals[8] - 1); /* HL1 */
        rfx_quantization_decode_block_NEON(&buffer[1024], 1024, quantVals[7] - 1); /* LH1 */
@@ -68,21 +68,20 @@ void rfx_quantization_decode_NEON(INT16 * buffer, const UINT32 * quantVals)
 
 
 static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-rfx_dwt_2d_decode_block_horiz_NEON(INT16 * l, INT16 * h, INT16 * dst, int subband_width)
+rfx_dwt_2d_decode_block_horiz_NEON(INT16* l, INT16* h, INT16* dst, int subband_width)
 {
        int y, n;
-       INT16 * l_ptr = l;
-       INT16 * h_ptr = h;
-       INT16 * dst_ptr = dst;
+       INT16* l_ptr = l;
+       INT16* h_ptr = h;
+       INT16* dst_ptr = dst;
 
        for (y = 0; y < subband_width; y++)
        {
                /* Even coefficients */
-               for (n = 0; n < subband_width; n+=8)
+               for (n = 0; n < subband_width; n += 8)
                {
                        // dst[2n] = l[n] - ((h[n-1] + h[n] + 1) >> 1);
                        int16x8_t l_n = vld1q_s16(l_ptr);
-
                        int16x8_t h_n = vld1q_s16(h_ptr);
                        int16x8_t h_n_m = vld1q_s16(h_ptr - 1);
 
@@ -95,29 +94,25 @@ rfx_dwt_2d_decode_block_horiz_NEON(INT16 * l, INT16 * h, INT16 * dst, int subban
                        int16x8_t tmp_n = vaddq_s16(h_n, h_n_m);
                        tmp_n = vaddq_s16(tmp_n, vdupq_n_s16(1));
                        tmp_n = vshrq_n_s16(tmp_n, 1);
-
                        int16x8_t dst_n = vsubq_s16(l_n, tmp_n);
-
                        vst1q_s16(l_ptr, dst_n);
-
-                       l_ptr+=8;
-                       h_ptr+=8;
+                       l_ptr += 8;
+                       h_ptr += 8;
                }
+
                l_ptr -= subband_width;
                h_ptr -= subband_width;
 
                /* Odd coefficients */
-               for (n = 0; n < subband_width; n+=8)
+               for (n = 0; n < subband_width; n += 8)
                {
                        // dst[2n + 1] = (h[n] << 1) + ((dst[2n] + dst[2n + 2]) >> 1);
-
                        int16x8_t h_n = vld1q_s16(h_ptr);
-
                        h_n = vshlq_n_s16(h_n, 1);
-
                        int16x8x2_t dst_n;
                        dst_n.val[0] = vld1q_s16(l_ptr);
                        int16x8_t dst_n_p = vld1q_s16(l_ptr + 1);
+
                        if (n == subband_width - 8)
                        {
                                int16_t last = vgetq_lane_s16(dst_n_p, 6);
@@ -126,39 +121,34 @@ rfx_dwt_2d_decode_block_horiz_NEON(INT16 * l, INT16 * h, INT16 * dst, int subban
 
                        dst_n.val[1] = vaddq_s16(dst_n_p, dst_n.val[0]);
                        dst_n.val[1] = vshrq_n_s16(dst_n.val[1], 1);
-
                        dst_n.val[1] = vaddq_s16(dst_n.val[1], h_n);
-
                        vst2q_s16(dst_ptr, dst_n);
-
-                       l_ptr+=8;
-                       h_ptr+=8;
-                       dst_ptr+=16;
+                       l_ptr += 8;
+                       h_ptr += 8;
+                       dst_ptr += 16;
                }
        }
 }
 
 static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-rfx_dwt_2d_decode_block_vert_NEON(INT16 * l, INT16 * h, INT16 * dst, int subband_width)
+rfx_dwt_2d_decode_block_vert_NEON(INT16* l, INT16* h, INT16* dst, int subband_width)
 {
        int x, n;
-       INT16 * l_ptr = l;
-       INT16 * h_ptr = h;
-       INT16 * dst_ptr = dst;
-
+       INT16* l_ptr = l;
+       INT16* h_ptr = h;
+       INT16* dst_ptr = dst;
        int total_width = subband_width + subband_width;
 
        /* Even coefficients */
        for (n = 0; n < subband_width; n++)
        {
-               for (x = 0; x < total_width; x+=8)
+               for (x = 0; x < total_width; x += 8)
                {
                        // dst[2n] = l[n] - ((h[n-1] + h[n] + 1) >> 1);
-
                        int16x8_t l_n = vld1q_s16(l_ptr);
                        int16x8_t h_n = vld1q_s16(h_ptr);
-
                        int16x8_t tmp_n = vaddq_s16(h_n, vdupq_n_s16(1));;
+
                        if (n == 0)
                                tmp_n = vaddq_s16(tmp_n, h_n);
                        else
@@ -166,16 +156,16 @@ rfx_dwt_2d_decode_block_vert_NEON(INT16 * l, INT16 * h, INT16 * dst, int subband
                                int16x8_t h_n_m = vld1q_s16((h_ptr - total_width));
                                tmp_n = vaddq_s16(tmp_n, h_n_m);
                        }
-                       tmp_n = vshrq_n_s16(tmp_n, 1);
 
+                       tmp_n = vshrq_n_s16(tmp_n, 1);
                        int16x8_t dst_n = vsubq_s16(l_n, tmp_n);
                        vst1q_s16(dst_ptr, dst_n);
-
-                       l_ptr+=8;
-                       h_ptr+=8;
-                       dst_ptr+=8;
+                       l_ptr += 8;
+                       h_ptr += 8;
+                       dst_ptr += 8;
                }
-               dst_ptr+=total_width;
+
+               dst_ptr += total_width;
        }
 
        h_ptr = h;
@@ -184,78 +174,69 @@ rfx_dwt_2d_decode_block_vert_NEON(INT16 * l, INT16 * h, INT16 * dst, int subband
        /* Odd coefficients */
        for (n = 0; n < subband_width; n++)
        {
-               for (x = 0; x < total_width; x+=8)
+               for (x = 0; x < total_width; x += 8)
                {
-               // dst[2n + 1] = (h[n] << 1) + ((dst[2n] + dst[2n + 2]) >> 1);
-               int16x8_t h_n = vld1q_s16(h_ptr);
-               int16x8_t dst_n_m = vld1q_s16(dst_ptr - total_width);
+                       // dst[2n + 1] = (h[n] << 1) + ((dst[2n] + dst[2n + 2]) >> 1);
+                       int16x8_t h_n = vld1q_s16(h_ptr);
+                       int16x8_t dst_n_m = vld1q_s16(dst_ptr - total_width);
+                       h_n = vshlq_n_s16(h_n, 1);
+                       int16x8_t tmp_n = dst_n_m;
 
-               h_n = vshlq_n_s16(h_n, 1);
+                       if (n == subband_width - 1)
+                               tmp_n = vaddq_s16(tmp_n, dst_n_m);
+                       else
+                       {
+                               int16x8_t dst_n_p = vld1q_s16((dst_ptr + total_width));
+                               tmp_n = vaddq_s16(tmp_n, dst_n_p);
+                       }
 
-               int16x8_t tmp_n = dst_n_m;
-               if (n == subband_width - 1)
-                       tmp_n = vaddq_s16(tmp_n, dst_n_m);
-               else
-               {
-                       int16x8_t dst_n_p = vld1q_s16((dst_ptr + total_width));
-                       tmp_n = vaddq_s16(tmp_n, dst_n_p);
+                       tmp_n = vshrq_n_s16(tmp_n, 1);
+                       int16x8_t dst_n = vaddq_s16(tmp_n, h_n);
+                       vst1q_s16(dst_ptr, dst_n);
+                       h_ptr += 8;
+                       dst_ptr += 8;
                }
-               tmp_n = vshrq_n_s16(tmp_n, 1);
-
-               int16x8_t dst_n = vaddq_s16(tmp_n, h_n);
-               vst1q_s16(dst_ptr, dst_n);
 
-               h_ptr+=8;
-               dst_ptr+=8;
+               dst_ptr += total_width;
        }
-       dst_ptr+=total_width;
-}
 }
 
 static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-rfx_dwt_2d_decode_block_NEON(INT16 * buffer, INT16 * idwt, int subband_width)
+rfx_dwt_2d_decode_block_NEON(INT16* buffer, INT16* idwt, int subband_width)
 {
-       INT16 * hl, * lh, * hh, * ll;
-       INT16 * l_dst, * h_dst;
-
+       INT16* hl, * lh, * hh, * ll;
+       INT16* l_dst, * h_dst;
        /* Inverse DWT in horizontal direction, results in 2 sub-bands in L, H order in tmp buffer idwt. */
        /* The 4 sub-bands are stored in HL(0), LH(1), HH(2), LL(3) order. */
        /* The lower part L uses LL(3) and HL(0). */
        /* The higher part H uses LH(1) and HH(2). */
-
        ll = buffer + subband_width * subband_width * 3;
        hl = buffer;
        l_dst = idwt;
-
        rfx_dwt_2d_decode_block_horiz_NEON(ll, hl, l_dst, subband_width);
-
        lh = buffer + subband_width * subband_width;
        hh = buffer + subband_width * subband_width * 2;
        h_dst = idwt + subband_width * subband_width * 2;
-
        rfx_dwt_2d_decode_block_horiz_NEON(lh, hh, h_dst, subband_width);
-
        /* Inverse DWT in vertical direction, results are stored in original buffer. */
        rfx_dwt_2d_decode_block_vert_NEON(l_dst, h_dst, buffer, subband_width);
 }
 
-void rfx_dwt_2d_decode_NEON(INT16 * buffer, INT16 * dwt_buffer)
+void rfx_dwt_2d_decode_NEON(INT16* buffer, INT16* dwt_buffer)
 {
        rfx_dwt_2d_decode_block_NEON(buffer + 3840, dwt_buffer, 8);
        rfx_dwt_2d_decode_block_NEON(buffer + 3072, dwt_buffer, 16);
        rfx_dwt_2d_decode_block_NEON(buffer, dwt_buffer, 32);
 }
 
-void rfx_init_neon(RFX_CONTEXT * context)
+void rfx_init_neon(RFX_CONTEXT* context)
 {
        if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
        {
                DEBUG_RFX("Using NEON optimizations");
-
-               IF_PROFILER(context->priv->prof_rfx_ycbcr_to_rgb->name = "rfx_decode_YCbCr_to_RGB_NEON");
-               IF_PROFILER(context->priv->prof_rfx_quantization_decode->name = "rfx_quantization_decode_NEON");
-               IF_PROFILER(context->priv->prof_rfx_dwt_2d_decode->name = "rfx_dwt_2d_decode_NEON");
-
+               PROFILER_RENAME(context->priv->prof_rfx_ycbcr_to_rgb, "rfx_decode_YCbCr_to_RGB_NEON");
+               PROFILER_RENAME(context->priv->prof_rfx_quantization_decode, "rfx_quantization_decode_NEON");
+               PROFILER_RENAME(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode_NEON");
                context->quantization_decode = rfx_quantization_decode_NEON;
                context->dwt_2d_decode = rfx_dwt_2d_decode_NEON;
        }
index 94c6b21..0a28087 100644 (file)
@@ -455,10 +455,10 @@ void rfx_init_sse2(RFX_CONTEXT* context)
        if (!IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
                return;
 
-       IF_PROFILER(context->priv->prof_rfx_quantization_decode->name = "rfx_quantization_decode_sse2");
-       IF_PROFILER(context->priv->prof_rfx_quantization_encode->name = "rfx_quantization_encode_sse2");
-       IF_PROFILER(context->priv->prof_rfx_dwt_2d_decode->name = "rfx_dwt_2d_decode_sse2");
-       IF_PROFILER(context->priv->prof_rfx_dwt_2d_encode->name = "rfx_dwt_2d_encode_sse2");
+       PROFILER_RENAME(context->priv->prof_rfx_quantization_decode, "rfx_quantization_decode_sse2");
+       PROFILER_RENAME(context->priv->prof_rfx_quantization_encode, "rfx_quantization_encode_sse2");
+       PROFILER_RENAME(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode_sse2");
+       PROFILER_RENAME(context->priv->prof_rfx_dwt_2d_encode, "rfx_dwt_2d_encode_sse2");
        context->quantization_decode = rfx_quantization_decode_sse2;
        context->quantization_encode = rfx_quantization_encode_sse2;
        context->dwt_2d_decode = rfx_dwt_2d_decode_sse2;
index 61b7a52..a240e4d 100644 (file)
 
 #define TAG FREERDP_TAG("utils")
 
-PROFILER* profiler_create(char* name)
+struct _PROFILER
 {
-       PROFILER* profiler;
-       profiler = (PROFILER*) malloc(sizeof(PROFILER));
+       char* name;
+       STOPWATCH* stopwatch;
+};
+
+PROFILER* profiler_create(const char* name)
+{
+       PROFILER* profiler = (PROFILER*) calloc(1, sizeof(PROFILER));
 
        if (!profiler)
                return NULL;
 
-       profiler->name = name;
+       profiler->name = _strdup(name);
        profiler->stopwatch = stopwatch_create();
 
-       if (!profiler->stopwatch)
-       {
-               free(profiler);
-               return NULL;
-       }
+       if (!profiler->name || !profiler->stopwatch)
+               goto fail;
 
        return profiler;
+fail:
+       profiler_free(profiler);
+       return NULL;
 }
 
 void profiler_free(PROFILER* profiler)
 {
        if (profiler)
+       {
+               free(profiler->name);
                stopwatch_free(profiler->stopwatch);
+       }
 
        free(profiler);
 }