vp9/common/vp9_idct.h

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #ifndef VP9_COMMON_VP9_IDCT_H_
  12 #define VP9_COMMON_VP9_IDCT_H_
  13
  14 #include <assert.h>
  15
  16 #include "./vpx_config.h"
  17 #include "vp9/common/vp9_common.h"
  18 #include "vp9/common/vp9_enums.h"
  19
  20 #ifdef __cplusplus
  21 extern "C" {
  22 #endif
  23
  24 // Constants and Macros used by all idct/dct functions
  25 #define DCT_CONST_BITS 14
  26 #define DCT_CONST_ROUNDING  (1 << (DCT_CONST_BITS - 1))
  27
  28 #define UNIT_QUANT_SHIFT 2
  29 #define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT)
  30
  31 #define pair_set_epi16(a, b) \
  32   _mm_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \
  33                 (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a))
  34
  35 #define dual_set_epi16(a, b) \
  36   _mm_set_epi16((int16_t)(b), (int16_t)(b), (int16_t)(b), (int16_t)(b), \
  37                 (int16_t)(a), (int16_t)(a), (int16_t)(a), (int16_t)(a))
  38
  39 // Constants:
  40 //  for (int i = 1; i< 32; ++i)
  41 //    printf("static const int cospi_%d_64 = %.0f;\n", i,
  42 //           round(16384 * cos(i*M_PI/64)));
  43 // Note: sin(k*Pi/64) = cos((32-k)*Pi/64)
  44 static const tran_high_t cospi_1_64  = 16364;
  45 static const tran_high_t cospi_2_64  = 16305;
  46 static const tran_high_t cospi_3_64  = 16207;
  47 static const tran_high_t cospi_4_64  = 16069;
  48 static const tran_high_t cospi_5_64  = 15893;
  49 static const tran_high_t cospi_6_64  = 15679;
  50 static const tran_high_t cospi_7_64  = 15426;
  51 static const tran_high_t cospi_8_64  = 15137;
  52 static const tran_high_t cospi_9_64  = 14811;
  53 static const tran_high_t cospi_10_64 = 14449;
  54 static const tran_high_t cospi_11_64 = 14053;
  55 static const tran_high_t cospi_12_64 = 13623;
  56 static const tran_high_t cospi_13_64 = 13160;
  57 static const tran_high_t cospi_14_64 = 12665;
  58 static const tran_high_t cospi_15_64 = 12140;
  59 static const tran_high_t cospi_16_64 = 11585;
  60 static const tran_high_t cospi_17_64 = 11003;
  61 static const tran_high_t cospi_18_64 = 10394;
  62 static const tran_high_t cospi_19_64 = 9760;
  63 static const tran_high_t cospi_20_64 = 9102;
  64 static const tran_high_t cospi_21_64 = 8423;
  65 static const tran_high_t cospi_22_64 = 7723;
  66 static const tran_high_t cospi_23_64 = 7005;
  67 static const tran_high_t cospi_24_64 = 6270;
  68 static const tran_high_t cospi_25_64 = 5520;
  69 static const tran_high_t cospi_26_64 = 4756;
  70 static const tran_high_t cospi_27_64 = 3981;
  71 static const tran_high_t cospi_28_64 = 3196;
  72 static const tran_high_t cospi_29_64 = 2404;
  73 static const tran_high_t cospi_30_64 = 1606;
  74 static const tran_high_t cospi_31_64 = 804;
  75
  76 //  16384 * sqrt(2) * sin(kPi/9) * 2 / 3
  77 static const tran_high_t sinpi_1_9 = 5283;
  78 static const tran_high_t sinpi_2_9 = 9929;
  79 static const tran_high_t sinpi_3_9 = 13377;
  80 static const tran_high_t sinpi_4_9 = 15212;
  81
  82 static INLINE tran_low_t check_range(tran_high_t input) {
  83 #if CONFIG_VP9_HIGHBITDEPTH
  84   // For valid highbitdepth VP9 streams, intermediate stage coefficients will
  85   // stay within the ranges:
  86   // - 8 bit: signed 16 bit integer
  87   // - 10 bit: signed 18 bit integer
  88   // - 12 bit: signed 20 bit integer
  89 #elif CONFIG_COEFFICIENT_RANGE_CHECKING
  90   // For valid VP9 input streams, intermediate stage coefficients should always
  91   // stay within the range of a signed 16 bit integer. Coefficients can go out
  92   // of this range for invalid/corrupt VP9 streams. However, strictly checking
  93   // this range for every intermediate coefficient can burdensome for a decoder,
  94   // therefore the following assertion is only enabled when configured with
  95   // --enable-coefficient-range-checking.
  96   assert(INT16_MIN <= input);
  97   assert(input <= INT16_MAX);
  98 #endif
  99   return (tran_low_t)input;
 100 }
 101
 102 static INLINE tran_low_t dct_const_round_shift(tran_high_t input) {
 103   tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
 104   return check_range(rv);
 105 }
 106
 107 typedef void (*transform_1d)(const tran_low_t*, tran_low_t*);
 108
 109 typedef struct {
 110   transform_1d cols, rows;  // vertical and horizontal
 111 } transform_2d;
 112
 113 #if CONFIG_VP9_HIGHBITDEPTH
 114 typedef void (*highbd_transform_1d)(const tran_low_t*, tran_low_t*, int bd);
 115
 116 typedef struct {
 117   highbd_transform_1d cols, rows;  // vertical and horizontal
 118 } highbd_transform_2d;
 119 #endif  // CONFIG_VP9_HIGHBITDEPTH
 120
 121 #if CONFIG_EMULATE_HARDWARE
 122 // When CONFIG_EMULATE_HARDWARE is 1 the transform performs a
 123 // non-normative method to handle overflows. A stream that causes
 124 // overflows  in the inverse transform is considered invalid in VP9,
 125 // and a hardware implementer is free to choose any reasonable
 126 // method to handle overflows. However to aid in hardware
 127 // verification they can use a specific implementation of the
 128 // WRAPLOW() macro below that is identical to their intended
 129 // hardware implementation (and also use configure options to trigger
 130 // the C-implementation of the transform).
 131 //
 132 // The particular WRAPLOW implementation below performs strict
 133 // overflow wrapping to match common hardware implementations.
 134 // bd of 8 uses trans_low with 16bits, need to remove 16bits
 135 // bd of 10 uses trans_low with 18bits, need to remove 14bits
 136 // bd of 12 uses trans_low with 20bits, need to remove 12bits
 137 // bd of x uses trans_low with 8+x bits, need to remove 24-x bits
 138 #define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
 139 #else
 140 #define WRAPLOW(x, bd) (x)
 141 #endif  // CONFIG_EMULATE_HARDWARE
 142
 143 void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
 144                      int eob);
 145 void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
 146                      int eob);
 147 void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
 148                      int eob);
 149 void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, int
 150                        eob);
 151 void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
 152                        int eob);
 153
 154 void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
 155                     int stride, int eob);
 156 void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
 157                     int stride, int eob);
 158 void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
 159                       int stride, int eob);
 160
 161 #if CONFIG_VP9_HIGHBITDEPTH
 162 void vp9_highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd);
 163 void vp9_highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd);
 164 void vp9_highbd_idct16(const tran_low_t *input, tran_low_t *output, int bd);
 165 void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
 166                             int eob, int bd);
 167 void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
 168                             int eob, int bd);
 169 void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
 170                             int eob, int bd);
 171 void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
 172                               int stride, int eob, int bd);
 173 void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
 174                               int stride, int eob, int bd);
 175 void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
 176                            uint8_t *dest, int stride, int eob, int bd);
 177 void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
 178                            uint8_t *dest, int stride, int eob, int bd);
 179 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
 180                              uint8_t *dest, int stride, int eob, int bd);
 181 static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
 182                                              int bd) {
 183   trans = WRAPLOW(trans, bd);
 184   return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd);
 185 }
 186 #endif  // CONFIG_VP9_HIGHBITDEPTH
 187 #ifdef __cplusplus
 188 }  // extern "C"
 189 #endif
 190
 191 #endif  // VP9_COMMON_VP9_IDCT_H_