4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander.
6 * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
8 * Based on the x86 SIMD extension for IJG JPEG library,
9 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10 * For conditions of distribution and use, see copyright notice in jsimdext.inc
12 * This file contains the interface between the "normal" portions
13 * of the library and the SIMD implementations when running on a
14 * 64-bit x86 architecture.
17 #define JPEG_INTERNALS
18 #include "../../jinclude.h"
19 #include "../../jpeglib.h"
20 #include "../../jsimd.h"
21 #include "../../jdct.h"
22 #include "../../jsimddct.h"
24 #include "jconfigint.h"
27 * In the PIC cases, we have no guarantee that constants will keep
28 * their alignment. This macro allows us to verify it at runtime.
30 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
32 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
33 #define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
35 static unsigned int simd_support = (unsigned int)(~0);
36 static unsigned int simd_huffman = 1;
39 * Check what SIMD accelerations are supported.
41 * FIXME: This code is racy under a multi-threaded environment.
50 if (simd_support != ~0U)
53 simd_support = jpeg_simd_cpu_support();
56 /* Force different settings through environment variables */
57 if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
58 simd_support &= JSIMD_SSE2;
59 if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
60 simd_support &= JSIMD_AVX2;
61 if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
63 if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
69 jsimd_can_rgb_ycc(void)
73 /* The code is optimised for these values only */
74 if (BITS_IN_JSAMPLE != 8)
76 if (sizeof(JDIMENSION) != 4)
78 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
81 if ((simd_support & JSIMD_AVX2) &&
82 IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
84 if ((simd_support & JSIMD_SSE2) &&
85 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
92 jsimd_can_rgb_gray(void)
96 /* The code is optimised for these values only */
97 if (BITS_IN_JSAMPLE != 8)
99 if (sizeof(JDIMENSION) != 4)
101 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
104 if ((simd_support & JSIMD_AVX2) &&
105 IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
107 if ((simd_support & JSIMD_SSE2) &&
108 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
115 jsimd_can_ycc_rgb(void)
119 /* The code is optimised for these values only */
120 if (BITS_IN_JSAMPLE != 8)
122 if (sizeof(JDIMENSION) != 4)
124 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
127 if ((simd_support & JSIMD_AVX2) &&
128 IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
130 if ((simd_support & JSIMD_SSE2) &&
131 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
138 jsimd_can_ycc_rgb565(void)
144 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
145 JSAMPIMAGE output_buf, JDIMENSION output_row,
148 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
149 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
151 switch (cinfo->in_color_space) {
153 avx2fct = jsimd_extrgb_ycc_convert_avx2;
154 sse2fct = jsimd_extrgb_ycc_convert_sse2;
158 avx2fct = jsimd_extrgbx_ycc_convert_avx2;
159 sse2fct = jsimd_extrgbx_ycc_convert_sse2;
162 avx2fct = jsimd_extbgr_ycc_convert_avx2;
163 sse2fct = jsimd_extbgr_ycc_convert_sse2;
167 avx2fct = jsimd_extbgrx_ycc_convert_avx2;
168 sse2fct = jsimd_extbgrx_ycc_convert_sse2;
172 avx2fct = jsimd_extxbgr_ycc_convert_avx2;
173 sse2fct = jsimd_extxbgr_ycc_convert_sse2;
177 avx2fct = jsimd_extxrgb_ycc_convert_avx2;
178 sse2fct = jsimd_extxrgb_ycc_convert_sse2;
181 avx2fct = jsimd_rgb_ycc_convert_avx2;
182 sse2fct = jsimd_rgb_ycc_convert_sse2;
186 if (simd_support & JSIMD_AVX2)
187 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
189 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
193 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
194 JSAMPIMAGE output_buf, JDIMENSION output_row,
197 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
198 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
200 switch (cinfo->in_color_space) {
202 avx2fct = jsimd_extrgb_gray_convert_avx2;
203 sse2fct = jsimd_extrgb_gray_convert_sse2;
207 avx2fct = jsimd_extrgbx_gray_convert_avx2;
208 sse2fct = jsimd_extrgbx_gray_convert_sse2;
211 avx2fct = jsimd_extbgr_gray_convert_avx2;
212 sse2fct = jsimd_extbgr_gray_convert_sse2;
216 avx2fct = jsimd_extbgrx_gray_convert_avx2;
217 sse2fct = jsimd_extbgrx_gray_convert_sse2;
221 avx2fct = jsimd_extxbgr_gray_convert_avx2;
222 sse2fct = jsimd_extxbgr_gray_convert_sse2;
226 avx2fct = jsimd_extxrgb_gray_convert_avx2;
227 sse2fct = jsimd_extxrgb_gray_convert_sse2;
230 avx2fct = jsimd_rgb_gray_convert_avx2;
231 sse2fct = jsimd_rgb_gray_convert_sse2;
235 if (simd_support & JSIMD_AVX2)
236 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
238 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
242 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
243 JDIMENSION input_row, JSAMPARRAY output_buf,
246 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
247 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
249 switch (cinfo->out_color_space) {
251 avx2fct = jsimd_ycc_extrgb_convert_avx2;
252 sse2fct = jsimd_ycc_extrgb_convert_sse2;
256 avx2fct = jsimd_ycc_extrgbx_convert_avx2;
257 sse2fct = jsimd_ycc_extrgbx_convert_sse2;
260 avx2fct = jsimd_ycc_extbgr_convert_avx2;
261 sse2fct = jsimd_ycc_extbgr_convert_sse2;
265 avx2fct = jsimd_ycc_extbgrx_convert_avx2;
266 sse2fct = jsimd_ycc_extbgrx_convert_sse2;
270 avx2fct = jsimd_ycc_extxbgr_convert_avx2;
271 sse2fct = jsimd_ycc_extxbgr_convert_sse2;
275 avx2fct = jsimd_ycc_extxrgb_convert_avx2;
276 sse2fct = jsimd_ycc_extxrgb_convert_sse2;
279 avx2fct = jsimd_ycc_rgb_convert_avx2;
280 sse2fct = jsimd_ycc_rgb_convert_sse2;
284 if (simd_support & JSIMD_AVX2)
285 avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
287 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
291 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
292 JDIMENSION input_row, JSAMPARRAY output_buf,
298 jsimd_can_h2v2_downsample(void)
302 /* The code is optimised for these values only */
303 if (BITS_IN_JSAMPLE != 8)
305 if (sizeof(JDIMENSION) != 4)
308 if (simd_support & JSIMD_AVX2)
310 if (simd_support & JSIMD_SSE2)
317 jsimd_can_h2v1_downsample(void)
321 /* The code is optimised for these values only */
322 if (BITS_IN_JSAMPLE != 8)
324 if (sizeof(JDIMENSION) != 4)
327 if (simd_support & JSIMD_AVX2)
329 if (simd_support & JSIMD_SSE2)
336 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
337 JSAMPARRAY input_data, JSAMPARRAY output_data)
339 if (simd_support & JSIMD_AVX2)
340 jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
341 compptr->v_samp_factor,
342 compptr->width_in_blocks, input_data,
345 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
346 compptr->v_samp_factor,
347 compptr->width_in_blocks, input_data,
352 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
353 JSAMPARRAY input_data, JSAMPARRAY output_data)
355 if (simd_support & JSIMD_AVX2)
356 jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
357 compptr->v_samp_factor,
358 compptr->width_in_blocks, input_data,
361 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
362 compptr->v_samp_factor,
363 compptr->width_in_blocks, input_data,
368 jsimd_can_h2v2_upsample(void)
372 /* The code is optimised for these values only */
373 if (BITS_IN_JSAMPLE != 8)
375 if (sizeof(JDIMENSION) != 4)
378 if (simd_support & JSIMD_AVX2)
380 if (simd_support & JSIMD_SSE2)
387 jsimd_can_h2v1_upsample(void)
391 /* The code is optimised for these values only */
392 if (BITS_IN_JSAMPLE != 8)
394 if (sizeof(JDIMENSION) != 4)
397 if (simd_support & JSIMD_AVX2)
399 if (simd_support & JSIMD_SSE2)
406 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
407 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
409 if (simd_support & JSIMD_AVX2)
410 jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
411 input_data, output_data_ptr);
413 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
414 input_data, output_data_ptr);
418 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
419 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
421 if (simd_support & JSIMD_AVX2)
422 jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
423 input_data, output_data_ptr);
425 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
426 input_data, output_data_ptr);
430 jsimd_can_h2v2_fancy_upsample(void)
434 /* The code is optimised for these values only */
435 if (BITS_IN_JSAMPLE != 8)
437 if (sizeof(JDIMENSION) != 4)
440 if ((simd_support & JSIMD_AVX2) &&
441 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
443 if ((simd_support & JSIMD_SSE2) &&
444 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
451 jsimd_can_h2v1_fancy_upsample(void)
455 /* The code is optimised for these values only */
456 if (BITS_IN_JSAMPLE != 8)
458 if (sizeof(JDIMENSION) != 4)
461 if ((simd_support & JSIMD_AVX2) &&
462 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
464 if ((simd_support & JSIMD_SSE2) &&
465 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
472 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
473 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
475 if (simd_support & JSIMD_AVX2)
476 jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
477 compptr->downsampled_width, input_data,
480 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
481 compptr->downsampled_width, input_data,
486 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
487 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
489 if (simd_support & JSIMD_AVX2)
490 jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
491 compptr->downsampled_width, input_data,
494 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
495 compptr->downsampled_width, input_data,
500 jsimd_can_h2v2_merged_upsample(void)
504 /* The code is optimised for these values only */
505 if (BITS_IN_JSAMPLE != 8)
507 if (sizeof(JDIMENSION) != 4)
510 if ((simd_support & JSIMD_AVX2) &&
511 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
513 if ((simd_support & JSIMD_SSE2) &&
514 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
521 jsimd_can_h2v1_merged_upsample(void)
525 /* The code is optimised for these values only */
526 if (BITS_IN_JSAMPLE != 8)
528 if (sizeof(JDIMENSION) != 4)
531 if ((simd_support & JSIMD_AVX2) &&
532 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
534 if ((simd_support & JSIMD_SSE2) &&
535 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
542 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
543 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
545 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
546 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
548 switch (cinfo->out_color_space) {
550 avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
551 sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
555 avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
556 sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
559 avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
560 sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
564 avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
565 sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
569 avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
570 sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
574 avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
575 sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
578 avx2fct = jsimd_h2v2_merged_upsample_avx2;
579 sse2fct = jsimd_h2v2_merged_upsample_sse2;
583 if (simd_support & JSIMD_AVX2)
584 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
586 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
590 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
591 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
593 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
594 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
596 switch (cinfo->out_color_space) {
598 avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
599 sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
603 avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
604 sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
607 avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
608 sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
612 avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
613 sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
617 avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
618 sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
622 avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
623 sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
626 avx2fct = jsimd_h2v1_merged_upsample_avx2;
627 sse2fct = jsimd_h2v1_merged_upsample_sse2;
631 if (simd_support & JSIMD_AVX2)
632 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
634 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
638 jsimd_can_convsamp(void)
642 /* The code is optimised for these values only */
645 if (BITS_IN_JSAMPLE != 8)
647 if (sizeof(JDIMENSION) != 4)
649 if (sizeof(DCTELEM) != 2)
652 if (simd_support & JSIMD_AVX2)
654 if (simd_support & JSIMD_SSE2)
661 jsimd_can_convsamp_float(void)
665 /* The code is optimised for these values only */
668 if (BITS_IN_JSAMPLE != 8)
670 if (sizeof(JDIMENSION) != 4)
672 if (sizeof(FAST_FLOAT) != 4)
675 if (simd_support & JSIMD_SSE2)
682 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
685 if (simd_support & JSIMD_AVX2)
686 jsimd_convsamp_avx2(sample_data, start_col, workspace);
688 jsimd_convsamp_sse2(sample_data, start_col, workspace);
692 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
693 FAST_FLOAT *workspace)
695 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
699 jsimd_can_fdct_islow(void)
703 /* The code is optimised for these values only */
706 if (sizeof(DCTELEM) != 2)
709 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
711 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
718 jsimd_can_fdct_ifast(void)
722 /* The code is optimised for these values only */
725 if (sizeof(DCTELEM) != 2)
728 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
735 jsimd_can_fdct_float(void)
739 /* The code is optimised for these values only */
742 if (sizeof(FAST_FLOAT) != 4)
745 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
752 jsimd_fdct_islow(DCTELEM *data)
754 if (simd_support & JSIMD_AVX2)
755 jsimd_fdct_islow_avx2(data);
757 jsimd_fdct_islow_sse2(data);
761 jsimd_fdct_ifast(DCTELEM *data)
763 jsimd_fdct_ifast_sse2(data);
767 jsimd_fdct_float(FAST_FLOAT *data)
769 jsimd_fdct_float_sse(data);
773 jsimd_can_quantize(void)
777 /* The code is optimised for these values only */
780 if (sizeof(JCOEF) != 2)
782 if (sizeof(DCTELEM) != 2)
785 if (simd_support & JSIMD_AVX2)
787 if (simd_support & JSIMD_SSE2)
794 jsimd_can_quantize_float(void)
798 /* The code is optimised for these values only */
801 if (sizeof(JCOEF) != 2)
803 if (sizeof(FAST_FLOAT) != 4)
806 if (simd_support & JSIMD_SSE2)
813 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
815 if (simd_support & JSIMD_AVX2)
816 jsimd_quantize_avx2(coef_block, divisors, workspace);
818 jsimd_quantize_sse2(coef_block, divisors, workspace);
822 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
823 FAST_FLOAT *workspace)
825 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
829 jsimd_can_idct_2x2(void)
833 /* The code is optimised for these values only */
836 if (sizeof(JCOEF) != 2)
838 if (BITS_IN_JSAMPLE != 8)
840 if (sizeof(JDIMENSION) != 4)
842 if (sizeof(ISLOW_MULT_TYPE) != 2)
845 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
852 jsimd_can_idct_4x4(void)
856 /* The code is optimised for these values only */
859 if (sizeof(JCOEF) != 2)
861 if (BITS_IN_JSAMPLE != 8)
863 if (sizeof(JDIMENSION) != 4)
865 if (sizeof(ISLOW_MULT_TYPE) != 2)
868 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
875 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
876 JCOEFPTR coef_block, JSAMPARRAY output_buf,
877 JDIMENSION output_col)
879 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
883 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
884 JCOEFPTR coef_block, JSAMPARRAY output_buf,
885 JDIMENSION output_col)
887 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
891 jsimd_can_idct_islow(void)
895 /* The code is optimised for these values only */
898 if (sizeof(JCOEF) != 2)
900 if (BITS_IN_JSAMPLE != 8)
902 if (sizeof(JDIMENSION) != 4)
904 if (sizeof(ISLOW_MULT_TYPE) != 2)
907 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
909 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
916 jsimd_can_idct_ifast(void)
920 /* The code is optimised for these values only */
923 if (sizeof(JCOEF) != 2)
925 if (BITS_IN_JSAMPLE != 8)
927 if (sizeof(JDIMENSION) != 4)
929 if (sizeof(IFAST_MULT_TYPE) != 2)
931 if (IFAST_SCALE_BITS != 2)
934 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
941 jsimd_can_idct_float(void)
947 if (sizeof(JCOEF) != 2)
949 if (BITS_IN_JSAMPLE != 8)
951 if (sizeof(JDIMENSION) != 4)
953 if (sizeof(FAST_FLOAT) != 4)
955 if (sizeof(FLOAT_MULT_TYPE) != 4)
958 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
965 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
966 JCOEFPTR coef_block, JSAMPARRAY output_buf,
967 JDIMENSION output_col)
969 if (simd_support & JSIMD_AVX2)
970 jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
973 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
978 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
979 JCOEFPTR coef_block, JSAMPARRAY output_buf,
980 JDIMENSION output_col)
982 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
987 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
988 JCOEFPTR coef_block, JSAMPARRAY output_buf,
989 JDIMENSION output_col)
991 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
996 jsimd_can_huff_encode_one_block(void)
1002 if (sizeof(JCOEF) != 2)
1005 if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1006 IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1013 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1014 int last_dc_val, c_derived_tbl *dctbl,
1015 c_derived_tbl *actbl)
1017 return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1022 jsimd_can_encode_mcu_AC_first_prepare(void)
1028 if (sizeof(JCOEF) != 2)
1030 if (simd_support & JSIMD_SSE2)
1037 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1038 const int *jpeg_natural_order_start, int Sl,
1039 int Al, JCOEF *values, size_t *zerobits)
1041 jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1042 Sl, Al, values, zerobits);
1046 jsimd_can_encode_mcu_AC_refine_prepare(void)
1052 if (sizeof(JCOEF) != 2)
1054 if (simd_support & JSIMD_SSE2)
1061 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1062 const int *jpeg_natural_order_start, int Sl,
1063 int Al, JCOEF *absvalues, size_t *bits)
1065 return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1066 jpeg_natural_order_start,
1067 Sl, Al, absvalues, bits);