4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2023, D. R. Commander.
6 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
8 * Based on the x86 SIMD extension for IJG JPEG library,
9 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10 * For conditions of distribution and use, see copyright notice in jsimdext.inc
12 * This file contains the interface between the "normal" portions
13 * of the library and the SIMD implementations when running on a
14 * 64-bit x86 architecture.
17 #define JPEG_INTERNALS
18 #include "../../jinclude.h"
19 #include "../../jpeglib.h"
20 #include "../../jsimd.h"
21 #include "../../jdct.h"
22 #include "../../jsimddct.h"
26 * In the PIC cases, we have no guarantee that constants will keep
27 * their alignment. This macro allows us to verify it at runtime.
29 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
31 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
32 #define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
34 static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
35 static THREAD_LOCAL unsigned int simd_huffman = 1;
38 * Check what SIMD accelerations are supported.
47 if (simd_support != ~0U)
50 simd_support = jpeg_simd_cpu_support();
53 /* Force different settings through environment variables */
54 if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
55 simd_support &= JSIMD_SSE2;
56 if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
57 simd_support &= JSIMD_AVX2;
58 if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
60 if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
66 jsimd_can_rgb_ycc(void)
70 /* The code is optimised for these values only */
71 if (BITS_IN_JSAMPLE != 8)
73 if (sizeof(JDIMENSION) != 4)
75 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
78 if ((simd_support & JSIMD_AVX2) &&
79 IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
81 if ((simd_support & JSIMD_SSE2) &&
82 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
89 jsimd_can_rgb_gray(void)
93 /* The code is optimised for these values only */
94 if (BITS_IN_JSAMPLE != 8)
96 if (sizeof(JDIMENSION) != 4)
98 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
101 if ((simd_support & JSIMD_AVX2) &&
102 IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
104 if ((simd_support & JSIMD_SSE2) &&
105 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
112 jsimd_can_ycc_rgb(void)
116 /* The code is optimised for these values only */
117 if (BITS_IN_JSAMPLE != 8)
119 if (sizeof(JDIMENSION) != 4)
121 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
124 if ((simd_support & JSIMD_AVX2) &&
125 IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
127 if ((simd_support & JSIMD_SSE2) &&
128 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
135 jsimd_can_ycc_rgb565(void)
141 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
142 JSAMPIMAGE output_buf, JDIMENSION output_row,
145 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
146 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
148 if (simd_support == ~0U)
151 switch (cinfo->in_color_space) {
153 avx2fct = jsimd_extrgb_ycc_convert_avx2;
154 sse2fct = jsimd_extrgb_ycc_convert_sse2;
158 avx2fct = jsimd_extrgbx_ycc_convert_avx2;
159 sse2fct = jsimd_extrgbx_ycc_convert_sse2;
162 avx2fct = jsimd_extbgr_ycc_convert_avx2;
163 sse2fct = jsimd_extbgr_ycc_convert_sse2;
167 avx2fct = jsimd_extbgrx_ycc_convert_avx2;
168 sse2fct = jsimd_extbgrx_ycc_convert_sse2;
172 avx2fct = jsimd_extxbgr_ycc_convert_avx2;
173 sse2fct = jsimd_extxbgr_ycc_convert_sse2;
177 avx2fct = jsimd_extxrgb_ycc_convert_avx2;
178 sse2fct = jsimd_extxrgb_ycc_convert_sse2;
181 avx2fct = jsimd_rgb_ycc_convert_avx2;
182 sse2fct = jsimd_rgb_ycc_convert_sse2;
186 if (simd_support & JSIMD_AVX2)
187 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
189 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
193 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
194 JSAMPIMAGE output_buf, JDIMENSION output_row,
197 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
198 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
200 if (simd_support == ~0U)
203 switch (cinfo->in_color_space) {
205 avx2fct = jsimd_extrgb_gray_convert_avx2;
206 sse2fct = jsimd_extrgb_gray_convert_sse2;
210 avx2fct = jsimd_extrgbx_gray_convert_avx2;
211 sse2fct = jsimd_extrgbx_gray_convert_sse2;
214 avx2fct = jsimd_extbgr_gray_convert_avx2;
215 sse2fct = jsimd_extbgr_gray_convert_sse2;
219 avx2fct = jsimd_extbgrx_gray_convert_avx2;
220 sse2fct = jsimd_extbgrx_gray_convert_sse2;
224 avx2fct = jsimd_extxbgr_gray_convert_avx2;
225 sse2fct = jsimd_extxbgr_gray_convert_sse2;
229 avx2fct = jsimd_extxrgb_gray_convert_avx2;
230 sse2fct = jsimd_extxrgb_gray_convert_sse2;
233 avx2fct = jsimd_rgb_gray_convert_avx2;
234 sse2fct = jsimd_rgb_gray_convert_sse2;
238 if (simd_support & JSIMD_AVX2)
239 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
241 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
245 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
246 JDIMENSION input_row, JSAMPARRAY output_buf,
249 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
250 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
252 if (simd_support == ~0U)
255 switch (cinfo->out_color_space) {
257 avx2fct = jsimd_ycc_extrgb_convert_avx2;
258 sse2fct = jsimd_ycc_extrgb_convert_sse2;
262 avx2fct = jsimd_ycc_extrgbx_convert_avx2;
263 sse2fct = jsimd_ycc_extrgbx_convert_sse2;
266 avx2fct = jsimd_ycc_extbgr_convert_avx2;
267 sse2fct = jsimd_ycc_extbgr_convert_sse2;
271 avx2fct = jsimd_ycc_extbgrx_convert_avx2;
272 sse2fct = jsimd_ycc_extbgrx_convert_sse2;
276 avx2fct = jsimd_ycc_extxbgr_convert_avx2;
277 sse2fct = jsimd_ycc_extxbgr_convert_sse2;
281 avx2fct = jsimd_ycc_extxrgb_convert_avx2;
282 sse2fct = jsimd_ycc_extxrgb_convert_sse2;
285 avx2fct = jsimd_ycc_rgb_convert_avx2;
286 sse2fct = jsimd_ycc_rgb_convert_sse2;
290 if (simd_support & JSIMD_AVX2)
291 avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
293 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
297 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
298 JDIMENSION input_row, JSAMPARRAY output_buf,
304 jsimd_can_h2v2_downsample(void)
308 /* The code is optimised for these values only */
309 if (BITS_IN_JSAMPLE != 8)
311 if (sizeof(JDIMENSION) != 4)
314 if (simd_support & JSIMD_AVX2)
316 if (simd_support & JSIMD_SSE2)
323 jsimd_can_h2v1_downsample(void)
327 /* The code is optimised for these values only */
328 if (BITS_IN_JSAMPLE != 8)
330 if (sizeof(JDIMENSION) != 4)
333 if (simd_support & JSIMD_AVX2)
335 if (simd_support & JSIMD_SSE2)
342 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
343 JSAMPARRAY input_data, JSAMPARRAY output_data)
345 if (simd_support == ~0U)
348 if (simd_support & JSIMD_AVX2)
349 jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
350 compptr->v_samp_factor,
351 compptr->width_in_blocks, input_data,
354 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
355 compptr->v_samp_factor,
356 compptr->width_in_blocks, input_data,
361 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
362 JSAMPARRAY input_data, JSAMPARRAY output_data)
364 if (simd_support == ~0U)
367 if (simd_support & JSIMD_AVX2)
368 jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
369 compptr->v_samp_factor,
370 compptr->width_in_blocks, input_data,
373 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
374 compptr->v_samp_factor,
375 compptr->width_in_blocks, input_data,
380 jsimd_can_h2v2_upsample(void)
384 /* The code is optimised for these values only */
385 if (BITS_IN_JSAMPLE != 8)
387 if (sizeof(JDIMENSION) != 4)
390 if (simd_support & JSIMD_AVX2)
392 if (simd_support & JSIMD_SSE2)
399 jsimd_can_h2v1_upsample(void)
403 /* The code is optimised for these values only */
404 if (BITS_IN_JSAMPLE != 8)
406 if (sizeof(JDIMENSION) != 4)
409 if (simd_support & JSIMD_AVX2)
411 if (simd_support & JSIMD_SSE2)
418 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
419 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
421 if (simd_support == ~0U)
424 if (simd_support & JSIMD_AVX2)
425 jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
426 input_data, output_data_ptr);
428 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
429 input_data, output_data_ptr);
433 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
434 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
436 if (simd_support == ~0U)
439 if (simd_support & JSIMD_AVX2)
440 jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
441 input_data, output_data_ptr);
443 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
444 input_data, output_data_ptr);
448 jsimd_can_h2v2_fancy_upsample(void)
452 /* The code is optimised for these values only */
453 if (BITS_IN_JSAMPLE != 8)
455 if (sizeof(JDIMENSION) != 4)
458 if ((simd_support & JSIMD_AVX2) &&
459 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
461 if ((simd_support & JSIMD_SSE2) &&
462 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
469 jsimd_can_h2v1_fancy_upsample(void)
473 /* The code is optimised for these values only */
474 if (BITS_IN_JSAMPLE != 8)
476 if (sizeof(JDIMENSION) != 4)
479 if ((simd_support & JSIMD_AVX2) &&
480 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
482 if ((simd_support & JSIMD_SSE2) &&
483 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
490 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
491 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
493 if (simd_support == ~0U)
496 if (simd_support & JSIMD_AVX2)
497 jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
498 compptr->downsampled_width, input_data,
501 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
502 compptr->downsampled_width, input_data,
507 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
508 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
510 if (simd_support == ~0U)
513 if (simd_support & JSIMD_AVX2)
514 jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
515 compptr->downsampled_width, input_data,
518 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
519 compptr->downsampled_width, input_data,
524 jsimd_can_h2v2_merged_upsample(void)
528 /* The code is optimised for these values only */
529 if (BITS_IN_JSAMPLE != 8)
531 if (sizeof(JDIMENSION) != 4)
534 if ((simd_support & JSIMD_AVX2) &&
535 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
537 if ((simd_support & JSIMD_SSE2) &&
538 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
545 jsimd_can_h2v1_merged_upsample(void)
549 /* The code is optimised for these values only */
550 if (BITS_IN_JSAMPLE != 8)
552 if (sizeof(JDIMENSION) != 4)
555 if ((simd_support & JSIMD_AVX2) &&
556 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
558 if ((simd_support & JSIMD_SSE2) &&
559 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
566 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
567 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
569 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
570 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
572 if (simd_support == ~0U)
575 switch (cinfo->out_color_space) {
577 avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
578 sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
582 avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
583 sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
586 avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
587 sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
591 avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
592 sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
596 avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
597 sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
601 avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
602 sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
605 avx2fct = jsimd_h2v2_merged_upsample_avx2;
606 sse2fct = jsimd_h2v2_merged_upsample_sse2;
610 if (simd_support & JSIMD_AVX2)
611 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
613 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
617 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
618 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
620 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
621 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
623 if (simd_support == ~0U)
626 switch (cinfo->out_color_space) {
628 avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
629 sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
633 avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
634 sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
637 avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
638 sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
642 avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
643 sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
647 avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
648 sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
652 avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
653 sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
656 avx2fct = jsimd_h2v1_merged_upsample_avx2;
657 sse2fct = jsimd_h2v1_merged_upsample_sse2;
661 if (simd_support & JSIMD_AVX2)
662 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
664 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
668 jsimd_can_convsamp(void)
672 /* The code is optimised for these values only */
675 if (BITS_IN_JSAMPLE != 8)
677 if (sizeof(JDIMENSION) != 4)
679 if (sizeof(DCTELEM) != 2)
682 if (simd_support & JSIMD_AVX2)
684 if (simd_support & JSIMD_SSE2)
691 jsimd_can_convsamp_float(void)
695 /* The code is optimised for these values only */
698 if (BITS_IN_JSAMPLE != 8)
700 if (sizeof(JDIMENSION) != 4)
702 if (sizeof(FAST_FLOAT) != 4)
705 if (simd_support & JSIMD_SSE2)
712 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
715 if (simd_support == ~0U)
718 if (simd_support & JSIMD_AVX2)
719 jsimd_convsamp_avx2(sample_data, start_col, workspace);
721 jsimd_convsamp_sse2(sample_data, start_col, workspace);
725 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
726 FAST_FLOAT *workspace)
728 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
732 jsimd_can_fdct_islow(void)
736 /* The code is optimised for these values only */
739 if (sizeof(DCTELEM) != 2)
742 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
744 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
751 jsimd_can_fdct_ifast(void)
755 /* The code is optimised for these values only */
758 if (sizeof(DCTELEM) != 2)
761 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
768 jsimd_can_fdct_float(void)
772 /* The code is optimised for these values only */
775 if (sizeof(FAST_FLOAT) != 4)
778 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
785 jsimd_fdct_islow(DCTELEM *data)
787 if (simd_support == ~0U)
790 if (simd_support & JSIMD_AVX2)
791 jsimd_fdct_islow_avx2(data);
793 jsimd_fdct_islow_sse2(data);
797 jsimd_fdct_ifast(DCTELEM *data)
799 jsimd_fdct_ifast_sse2(data);
803 jsimd_fdct_float(FAST_FLOAT *data)
805 jsimd_fdct_float_sse(data);
809 jsimd_can_quantize(void)
813 /* The code is optimised for these values only */
816 if (sizeof(JCOEF) != 2)
818 if (sizeof(DCTELEM) != 2)
821 if (simd_support & JSIMD_AVX2)
823 if (simd_support & JSIMD_SSE2)
830 jsimd_can_quantize_float(void)
834 /* The code is optimised for these values only */
837 if (sizeof(JCOEF) != 2)
839 if (sizeof(FAST_FLOAT) != 4)
842 if (simd_support & JSIMD_SSE2)
849 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
851 if (simd_support == ~0U)
854 if (simd_support & JSIMD_AVX2)
855 jsimd_quantize_avx2(coef_block, divisors, workspace);
857 jsimd_quantize_sse2(coef_block, divisors, workspace);
861 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
862 FAST_FLOAT *workspace)
864 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
868 jsimd_can_idct_2x2(void)
872 /* The code is optimised for these values only */
875 if (sizeof(JCOEF) != 2)
877 if (BITS_IN_JSAMPLE != 8)
879 if (sizeof(JDIMENSION) != 4)
881 if (sizeof(ISLOW_MULT_TYPE) != 2)
884 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
891 jsimd_can_idct_4x4(void)
895 /* The code is optimised for these values only */
898 if (sizeof(JCOEF) != 2)
900 if (BITS_IN_JSAMPLE != 8)
902 if (sizeof(JDIMENSION) != 4)
904 if (sizeof(ISLOW_MULT_TYPE) != 2)
907 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
914 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
915 JCOEFPTR coef_block, JSAMPARRAY output_buf,
916 JDIMENSION output_col)
918 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
922 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
923 JCOEFPTR coef_block, JSAMPARRAY output_buf,
924 JDIMENSION output_col)
926 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
930 jsimd_can_idct_islow(void)
934 /* The code is optimised for these values only */
937 if (sizeof(JCOEF) != 2)
939 if (BITS_IN_JSAMPLE != 8)
941 if (sizeof(JDIMENSION) != 4)
943 if (sizeof(ISLOW_MULT_TYPE) != 2)
946 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
948 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
955 jsimd_can_idct_ifast(void)
959 /* The code is optimised for these values only */
962 if (sizeof(JCOEF) != 2)
964 if (BITS_IN_JSAMPLE != 8)
966 if (sizeof(JDIMENSION) != 4)
968 if (sizeof(IFAST_MULT_TYPE) != 2)
970 if (IFAST_SCALE_BITS != 2)
973 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
980 jsimd_can_idct_float(void)
986 if (sizeof(JCOEF) != 2)
988 if (BITS_IN_JSAMPLE != 8)
990 if (sizeof(JDIMENSION) != 4)
992 if (sizeof(FAST_FLOAT) != 4)
994 if (sizeof(FLOAT_MULT_TYPE) != 4)
997 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1004 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1005 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1006 JDIMENSION output_col)
1008 if (simd_support == ~0U)
1011 if (simd_support & JSIMD_AVX2)
1012 jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
1015 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1020 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1021 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1022 JDIMENSION output_col)
1024 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1029 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1030 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1031 JDIMENSION output_col)
1033 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1038 jsimd_can_huff_encode_one_block(void)
1044 if (sizeof(JCOEF) != 2)
1047 if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1048 IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1055 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1056 int last_dc_val, c_derived_tbl *dctbl,
1057 c_derived_tbl *actbl)
1059 return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1064 jsimd_can_encode_mcu_AC_first_prepare(void)
1070 if (sizeof(JCOEF) != 2)
1072 if (simd_support & JSIMD_SSE2)
1079 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1080 const int *jpeg_natural_order_start, int Sl,
1081 int Al, UJCOEF *values, size_t *zerobits)
1083 jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1084 Sl, Al, values, zerobits);
1088 jsimd_can_encode_mcu_AC_refine_prepare(void)
1094 if (sizeof(JCOEF) != 2)
1096 if (simd_support & JSIMD_SSE2)
1103 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1104 const int *jpeg_natural_order_start, int Sl,
1105 int Al, UJCOEF *absvalues, size_t *bits)
1107 return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1108 jpeg_natural_order_start,
1109 Sl, Al, absvalues, bits);