4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
6 * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
8 * Based on the x86 SIMD extension for IJG JPEG library,
9 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10 * For conditions of distribution and use, see copyright notice in jsimdext.inc
12 * This file contains the interface between the "normal" portions
13 * of the library and the SIMD implementations when running on a
14 * 32-bit x86 architecture.
17 #define JPEG_INTERNALS
18 #include "../../jinclude.h"
19 #include "../../jpeglib.h"
20 #include "../../jsimd.h"
21 #include "../../jdct.h"
22 #include "../../jsimddct.h"
24 #include "jconfigint.h"
27 * In the PIC cases, we have no guarantee that constants will keep
28 * their alignment. This macro allows us to verify it at runtime.
30 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
32 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
33 #define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
35 static unsigned int simd_support = (unsigned int)(~0);
36 static unsigned int simd_huffman = 1;
39 * Check what SIMD accelerations are supported.
41 * FIXME: This code is racy under a multi-threaded environment.
50 if (simd_support != ~0U)
53 simd_support = jpeg_simd_cpu_support();
56 /* Force different settings through environment variables */
57 if (!GETENV_S(env, 2, "JSIMD_FORCEMMX") && !strcmp(env, "1"))
58 simd_support &= JSIMD_MMX;
59 if (!GETENV_S(env, 2, "JSIMD_FORCE3DNOW") && !strcmp(env, "1"))
60 simd_support &= JSIMD_3DNOW | JSIMD_MMX;
61 if (!GETENV_S(env, 2, "JSIMD_FORCESSE") && !strcmp(env, "1"))
62 simd_support &= JSIMD_SSE | JSIMD_MMX;
63 if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
64 simd_support &= JSIMD_SSE2;
65 if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
66 simd_support &= JSIMD_AVX2;
67 if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
69 if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
75 jsimd_can_rgb_ycc(void)
79 /* The code is optimised for these values only */
80 if (BITS_IN_JSAMPLE != 8)
82 if (sizeof(JDIMENSION) != 4)
84 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
87 if ((simd_support & JSIMD_AVX2) &&
88 IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
90 if ((simd_support & JSIMD_SSE2) &&
91 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
93 if (simd_support & JSIMD_MMX)
100 jsimd_can_rgb_gray(void)
104 /* The code is optimised for these values only */
105 if (BITS_IN_JSAMPLE != 8)
107 if (sizeof(JDIMENSION) != 4)
109 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
112 if ((simd_support & JSIMD_AVX2) &&
113 IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
115 if ((simd_support & JSIMD_SSE2) &&
116 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
118 if (simd_support & JSIMD_MMX)
125 jsimd_can_ycc_rgb(void)
129 /* The code is optimised for these values only */
130 if (BITS_IN_JSAMPLE != 8)
132 if (sizeof(JDIMENSION) != 4)
134 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
137 if ((simd_support & JSIMD_AVX2) &&
138 IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
140 if ((simd_support & JSIMD_SSE2) &&
141 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
143 if (simd_support & JSIMD_MMX)
150 jsimd_can_ycc_rgb565(void)
156 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
157 JSAMPIMAGE output_buf, JDIMENSION output_row,
160 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
161 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
162 void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
164 switch (cinfo->in_color_space) {
166 avx2fct = jsimd_extrgb_ycc_convert_avx2;
167 sse2fct = jsimd_extrgb_ycc_convert_sse2;
168 mmxfct = jsimd_extrgb_ycc_convert_mmx;
172 avx2fct = jsimd_extrgbx_ycc_convert_avx2;
173 sse2fct = jsimd_extrgbx_ycc_convert_sse2;
174 mmxfct = jsimd_extrgbx_ycc_convert_mmx;
177 avx2fct = jsimd_extbgr_ycc_convert_avx2;
178 sse2fct = jsimd_extbgr_ycc_convert_sse2;
179 mmxfct = jsimd_extbgr_ycc_convert_mmx;
183 avx2fct = jsimd_extbgrx_ycc_convert_avx2;
184 sse2fct = jsimd_extbgrx_ycc_convert_sse2;
185 mmxfct = jsimd_extbgrx_ycc_convert_mmx;
189 avx2fct = jsimd_extxbgr_ycc_convert_avx2;
190 sse2fct = jsimd_extxbgr_ycc_convert_sse2;
191 mmxfct = jsimd_extxbgr_ycc_convert_mmx;
195 avx2fct = jsimd_extxrgb_ycc_convert_avx2;
196 sse2fct = jsimd_extxrgb_ycc_convert_sse2;
197 mmxfct = jsimd_extxrgb_ycc_convert_mmx;
200 avx2fct = jsimd_rgb_ycc_convert_avx2;
201 sse2fct = jsimd_rgb_ycc_convert_sse2;
202 mmxfct = jsimd_rgb_ycc_convert_mmx;
206 if (simd_support & JSIMD_AVX2)
207 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
208 else if (simd_support & JSIMD_SSE2)
209 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
211 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
215 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
216 JSAMPIMAGE output_buf, JDIMENSION output_row,
219 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
220 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
221 void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
223 switch (cinfo->in_color_space) {
225 avx2fct = jsimd_extrgb_gray_convert_avx2;
226 sse2fct = jsimd_extrgb_gray_convert_sse2;
227 mmxfct = jsimd_extrgb_gray_convert_mmx;
231 avx2fct = jsimd_extrgbx_gray_convert_avx2;
232 sse2fct = jsimd_extrgbx_gray_convert_sse2;
233 mmxfct = jsimd_extrgbx_gray_convert_mmx;
236 avx2fct = jsimd_extbgr_gray_convert_avx2;
237 sse2fct = jsimd_extbgr_gray_convert_sse2;
238 mmxfct = jsimd_extbgr_gray_convert_mmx;
242 avx2fct = jsimd_extbgrx_gray_convert_avx2;
243 sse2fct = jsimd_extbgrx_gray_convert_sse2;
244 mmxfct = jsimd_extbgrx_gray_convert_mmx;
248 avx2fct = jsimd_extxbgr_gray_convert_avx2;
249 sse2fct = jsimd_extxbgr_gray_convert_sse2;
250 mmxfct = jsimd_extxbgr_gray_convert_mmx;
254 avx2fct = jsimd_extxrgb_gray_convert_avx2;
255 sse2fct = jsimd_extxrgb_gray_convert_sse2;
256 mmxfct = jsimd_extxrgb_gray_convert_mmx;
259 avx2fct = jsimd_rgb_gray_convert_avx2;
260 sse2fct = jsimd_rgb_gray_convert_sse2;
261 mmxfct = jsimd_rgb_gray_convert_mmx;
265 if (simd_support & JSIMD_AVX2)
266 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
267 else if (simd_support & JSIMD_SSE2)
268 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
270 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
274 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
275 JDIMENSION input_row, JSAMPARRAY output_buf,
278 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
279 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
280 void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
282 switch (cinfo->out_color_space) {
284 avx2fct = jsimd_ycc_extrgb_convert_avx2;
285 sse2fct = jsimd_ycc_extrgb_convert_sse2;
286 mmxfct = jsimd_ycc_extrgb_convert_mmx;
290 avx2fct = jsimd_ycc_extrgbx_convert_avx2;
291 sse2fct = jsimd_ycc_extrgbx_convert_sse2;
292 mmxfct = jsimd_ycc_extrgbx_convert_mmx;
295 avx2fct = jsimd_ycc_extbgr_convert_avx2;
296 sse2fct = jsimd_ycc_extbgr_convert_sse2;
297 mmxfct = jsimd_ycc_extbgr_convert_mmx;
301 avx2fct = jsimd_ycc_extbgrx_convert_avx2;
302 sse2fct = jsimd_ycc_extbgrx_convert_sse2;
303 mmxfct = jsimd_ycc_extbgrx_convert_mmx;
307 avx2fct = jsimd_ycc_extxbgr_convert_avx2;
308 sse2fct = jsimd_ycc_extxbgr_convert_sse2;
309 mmxfct = jsimd_ycc_extxbgr_convert_mmx;
313 avx2fct = jsimd_ycc_extxrgb_convert_avx2;
314 sse2fct = jsimd_ycc_extxrgb_convert_sse2;
315 mmxfct = jsimd_ycc_extxrgb_convert_mmx;
318 avx2fct = jsimd_ycc_rgb_convert_avx2;
319 sse2fct = jsimd_ycc_rgb_convert_sse2;
320 mmxfct = jsimd_ycc_rgb_convert_mmx;
324 if (simd_support & JSIMD_AVX2)
325 avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
326 else if (simd_support & JSIMD_SSE2)
327 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
329 mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
333 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
334 JDIMENSION input_row, JSAMPARRAY output_buf,
340 jsimd_can_h2v2_downsample(void)
344 /* The code is optimised for these values only */
345 if (BITS_IN_JSAMPLE != 8)
347 if (sizeof(JDIMENSION) != 4)
350 if (simd_support & JSIMD_AVX2)
352 if (simd_support & JSIMD_SSE2)
354 if (simd_support & JSIMD_MMX)
361 jsimd_can_h2v1_downsample(void)
365 /* The code is optimised for these values only */
366 if (BITS_IN_JSAMPLE != 8)
368 if (sizeof(JDIMENSION) != 4)
371 if (simd_support & JSIMD_AVX2)
373 if (simd_support & JSIMD_SSE2)
375 if (simd_support & JSIMD_MMX)
382 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
383 JSAMPARRAY input_data, JSAMPARRAY output_data)
385 if (simd_support & JSIMD_AVX2)
386 jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
387 compptr->v_samp_factor,
388 compptr->width_in_blocks, input_data,
390 else if (simd_support & JSIMD_SSE2)
391 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
392 compptr->v_samp_factor,
393 compptr->width_in_blocks, input_data,
396 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
397 compptr->v_samp_factor, compptr->width_in_blocks,
398 input_data, output_data);
402 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
403 JSAMPARRAY input_data, JSAMPARRAY output_data)
405 if (simd_support & JSIMD_AVX2)
406 jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
407 compptr->v_samp_factor,
408 compptr->width_in_blocks, input_data,
410 else if (simd_support & JSIMD_SSE2)
411 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
412 compptr->v_samp_factor,
413 compptr->width_in_blocks, input_data,
416 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
417 compptr->v_samp_factor, compptr->width_in_blocks,
418 input_data, output_data);
422 jsimd_can_h2v2_upsample(void)
426 /* The code is optimised for these values only */
427 if (BITS_IN_JSAMPLE != 8)
429 if (sizeof(JDIMENSION) != 4)
432 if (simd_support & JSIMD_AVX2)
434 if (simd_support & JSIMD_SSE2)
436 if (simd_support & JSIMD_MMX)
443 jsimd_can_h2v1_upsample(void)
447 /* The code is optimised for these values only */
448 if (BITS_IN_JSAMPLE != 8)
450 if (sizeof(JDIMENSION) != 4)
453 if (simd_support & JSIMD_AVX2)
455 if (simd_support & JSIMD_SSE2)
457 if (simd_support & JSIMD_MMX)
464 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
465 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
467 if (simd_support & JSIMD_AVX2)
468 jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
469 input_data, output_data_ptr);
470 else if (simd_support & JSIMD_SSE2)
471 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
472 input_data, output_data_ptr);
474 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
475 input_data, output_data_ptr);
479 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
480 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
482 if (simd_support & JSIMD_AVX2)
483 jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
484 input_data, output_data_ptr);
485 else if (simd_support & JSIMD_SSE2)
486 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
487 input_data, output_data_ptr);
489 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
490 input_data, output_data_ptr);
494 jsimd_can_h2v2_fancy_upsample(void)
498 /* The code is optimised for these values only */
499 if (BITS_IN_JSAMPLE != 8)
501 if (sizeof(JDIMENSION) != 4)
504 if ((simd_support & JSIMD_AVX2) &&
505 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
507 if ((simd_support & JSIMD_SSE2) &&
508 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
510 if (simd_support & JSIMD_MMX)
517 jsimd_can_h2v1_fancy_upsample(void)
521 /* The code is optimised for these values only */
522 if (BITS_IN_JSAMPLE != 8)
524 if (sizeof(JDIMENSION) != 4)
527 if ((simd_support & JSIMD_AVX2) &&
528 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
530 if ((simd_support & JSIMD_SSE2) &&
531 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
533 if (simd_support & JSIMD_MMX)
540 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
541 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
543 if (simd_support & JSIMD_AVX2)
544 jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
545 compptr->downsampled_width, input_data,
547 else if (simd_support & JSIMD_SSE2)
548 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
549 compptr->downsampled_width, input_data,
552 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
553 compptr->downsampled_width, input_data,
558 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
559 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
561 if (simd_support & JSIMD_AVX2)
562 jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
563 compptr->downsampled_width, input_data,
565 else if (simd_support & JSIMD_SSE2)
566 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
567 compptr->downsampled_width, input_data,
570 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
571 compptr->downsampled_width, input_data,
576 jsimd_can_h2v2_merged_upsample(void)
580 /* The code is optimised for these values only */
581 if (BITS_IN_JSAMPLE != 8)
583 if (sizeof(JDIMENSION) != 4)
586 if ((simd_support & JSIMD_AVX2) &&
587 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
589 if ((simd_support & JSIMD_SSE2) &&
590 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
592 if (simd_support & JSIMD_MMX)
599 jsimd_can_h2v1_merged_upsample(void)
603 /* The code is optimised for these values only */
604 if (BITS_IN_JSAMPLE != 8)
606 if (sizeof(JDIMENSION) != 4)
609 if ((simd_support & JSIMD_AVX2) &&
610 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
612 if ((simd_support & JSIMD_SSE2) &&
613 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
615 if (simd_support & JSIMD_MMX)
622 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
623 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
625 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
626 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
627 void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
629 switch (cinfo->out_color_space) {
631 avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
632 sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
633 mmxfct = jsimd_h2v2_extrgb_merged_upsample_mmx;
637 avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
638 sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
639 mmxfct = jsimd_h2v2_extrgbx_merged_upsample_mmx;
642 avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
643 sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
644 mmxfct = jsimd_h2v2_extbgr_merged_upsample_mmx;
648 avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
649 sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
650 mmxfct = jsimd_h2v2_extbgrx_merged_upsample_mmx;
654 avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
655 sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
656 mmxfct = jsimd_h2v2_extxbgr_merged_upsample_mmx;
660 avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
661 sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
662 mmxfct = jsimd_h2v2_extxrgb_merged_upsample_mmx;
665 avx2fct = jsimd_h2v2_merged_upsample_avx2;
666 sse2fct = jsimd_h2v2_merged_upsample_sse2;
667 mmxfct = jsimd_h2v2_merged_upsample_mmx;
671 if (simd_support & JSIMD_AVX2)
672 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
673 else if (simd_support & JSIMD_SSE2)
674 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
676 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
680 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
681 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
683 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
684 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
685 void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
687 switch (cinfo->out_color_space) {
689 avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
690 sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
691 mmxfct = jsimd_h2v1_extrgb_merged_upsample_mmx;
695 avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
696 sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
697 mmxfct = jsimd_h2v1_extrgbx_merged_upsample_mmx;
700 avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
701 sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
702 mmxfct = jsimd_h2v1_extbgr_merged_upsample_mmx;
706 avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
707 sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
708 mmxfct = jsimd_h2v1_extbgrx_merged_upsample_mmx;
712 avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
713 sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
714 mmxfct = jsimd_h2v1_extxbgr_merged_upsample_mmx;
718 avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
719 sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
720 mmxfct = jsimd_h2v1_extxrgb_merged_upsample_mmx;
723 avx2fct = jsimd_h2v1_merged_upsample_avx2;
724 sse2fct = jsimd_h2v1_merged_upsample_sse2;
725 mmxfct = jsimd_h2v1_merged_upsample_mmx;
729 if (simd_support & JSIMD_AVX2)
730 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
731 else if (simd_support & JSIMD_SSE2)
732 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
734 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
738 jsimd_can_convsamp(void)
742 /* The code is optimised for these values only */
745 if (BITS_IN_JSAMPLE != 8)
747 if (sizeof(JDIMENSION) != 4)
749 if (sizeof(DCTELEM) != 2)
752 if (simd_support & JSIMD_AVX2)
754 if (simd_support & JSIMD_SSE2)
756 if (simd_support & JSIMD_MMX)
763 jsimd_can_convsamp_float(void)
767 /* The code is optimised for these values only */
770 if (BITS_IN_JSAMPLE != 8)
772 if (sizeof(JDIMENSION) != 4)
774 if (sizeof(FAST_FLOAT) != 4)
777 if (simd_support & JSIMD_SSE2)
779 if (simd_support & JSIMD_SSE)
781 if (simd_support & JSIMD_3DNOW)
788 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
791 if (simd_support & JSIMD_AVX2)
792 jsimd_convsamp_avx2(sample_data, start_col, workspace);
793 else if (simd_support & JSIMD_SSE2)
794 jsimd_convsamp_sse2(sample_data, start_col, workspace);
796 jsimd_convsamp_mmx(sample_data, start_col, workspace);
800 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
801 FAST_FLOAT *workspace)
803 if (simd_support & JSIMD_SSE2)
804 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
805 else if (simd_support & JSIMD_SSE)
806 jsimd_convsamp_float_sse(sample_data, start_col, workspace);
808 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
812 jsimd_can_fdct_islow(void)
816 /* The code is optimised for these values only */
819 if (sizeof(DCTELEM) != 2)
822 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
824 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
826 if (simd_support & JSIMD_MMX)
833 jsimd_can_fdct_ifast(void)
837 /* The code is optimised for these values only */
840 if (sizeof(DCTELEM) != 2)
843 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
845 if (simd_support & JSIMD_MMX)
852 jsimd_can_fdct_float(void)
856 /* The code is optimised for these values only */
859 if (sizeof(FAST_FLOAT) != 4)
862 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
864 if (simd_support & JSIMD_3DNOW)
871 jsimd_fdct_islow(DCTELEM *data)
873 if (simd_support & JSIMD_AVX2)
874 jsimd_fdct_islow_avx2(data);
875 else if (simd_support & JSIMD_SSE2)
876 jsimd_fdct_islow_sse2(data);
878 jsimd_fdct_islow_mmx(data);
882 jsimd_fdct_ifast(DCTELEM *data)
884 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
885 jsimd_fdct_ifast_sse2(data);
887 jsimd_fdct_ifast_mmx(data);
891 jsimd_fdct_float(FAST_FLOAT *data)
893 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
894 jsimd_fdct_float_sse(data);
895 else if (simd_support & JSIMD_3DNOW)
896 jsimd_fdct_float_3dnow(data);
900 jsimd_can_quantize(void)
904 /* The code is optimised for these values only */
907 if (sizeof(JCOEF) != 2)
909 if (sizeof(DCTELEM) != 2)
912 if (simd_support & JSIMD_AVX2)
914 if (simd_support & JSIMD_SSE2)
916 if (simd_support & JSIMD_MMX)
923 jsimd_can_quantize_float(void)
927 /* The code is optimised for these values only */
930 if (sizeof(JCOEF) != 2)
932 if (sizeof(FAST_FLOAT) != 4)
935 if (simd_support & JSIMD_SSE2)
937 if (simd_support & JSIMD_SSE)
939 if (simd_support & JSIMD_3DNOW)
946 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
948 if (simd_support & JSIMD_AVX2)
949 jsimd_quantize_avx2(coef_block, divisors, workspace);
950 else if (simd_support & JSIMD_SSE2)
951 jsimd_quantize_sse2(coef_block, divisors, workspace);
953 jsimd_quantize_mmx(coef_block, divisors, workspace);
957 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
958 FAST_FLOAT *workspace)
960 if (simd_support & JSIMD_SSE2)
961 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
962 else if (simd_support & JSIMD_SSE)
963 jsimd_quantize_float_sse(coef_block, divisors, workspace);
965 jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
969 jsimd_can_idct_2x2(void)
973 /* The code is optimised for these values only */
976 if (sizeof(JCOEF) != 2)
978 if (BITS_IN_JSAMPLE != 8)
980 if (sizeof(JDIMENSION) != 4)
982 if (sizeof(ISLOW_MULT_TYPE) != 2)
985 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
987 if (simd_support & JSIMD_MMX)
994 jsimd_can_idct_4x4(void)
998 /* The code is optimised for these values only */
1001 if (sizeof(JCOEF) != 2)
1003 if (BITS_IN_JSAMPLE != 8)
1005 if (sizeof(JDIMENSION) != 4)
1007 if (sizeof(ISLOW_MULT_TYPE) != 2)
1010 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
1012 if (simd_support & JSIMD_MMX)
1019 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1020 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1021 JDIMENSION output_col)
1023 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
1024 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
1027 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
1031 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1032 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1033 JDIMENSION output_col)
1035 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
1036 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
1039 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
1043 jsimd_can_idct_islow(void)
1047 /* The code is optimised for these values only */
1050 if (sizeof(JCOEF) != 2)
1052 if (BITS_IN_JSAMPLE != 8)
1054 if (sizeof(JDIMENSION) != 4)
1056 if (sizeof(ISLOW_MULT_TYPE) != 2)
1059 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
1061 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
1063 if (simd_support & JSIMD_MMX)
1070 jsimd_can_idct_ifast(void)
1074 /* The code is optimised for these values only */
1077 if (sizeof(JCOEF) != 2)
1079 if (BITS_IN_JSAMPLE != 8)
1081 if (sizeof(JDIMENSION) != 4)
1083 if (sizeof(IFAST_MULT_TYPE) != 2)
1085 if (IFAST_SCALE_BITS != 2)
1088 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1090 if (simd_support & JSIMD_MMX)
1097 jsimd_can_idct_float(void)
1103 if (sizeof(JCOEF) != 2)
1105 if (BITS_IN_JSAMPLE != 8)
1107 if (sizeof(JDIMENSION) != 4)
1109 if (sizeof(FAST_FLOAT) != 4)
1111 if (sizeof(FLOAT_MULT_TYPE) != 4)
1114 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1116 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1118 if (simd_support & JSIMD_3DNOW)
1125 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1126 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1127 JDIMENSION output_col)
1129 if (simd_support & JSIMD_AVX2)
1130 jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
1132 else if (simd_support & JSIMD_SSE2)
1133 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1136 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf,
1141 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1142 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1143 JDIMENSION output_col)
1145 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1146 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1149 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf,
1154 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1155 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1156 JDIMENSION output_col)
1158 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1159 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1161 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1162 jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf,
1165 jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf,
1170 jsimd_can_huff_encode_one_block(void)
1176 if (sizeof(JCOEF) != 2)
1179 if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1180 IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1187 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1188 int last_dc_val, c_derived_tbl *dctbl,
1189 c_derived_tbl *actbl)
1191 return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1196 jsimd_can_encode_mcu_AC_first_prepare(void)
1202 if (sizeof(JCOEF) != 2)
1204 if (SIZEOF_SIZE_T != 4)
1206 if (simd_support & JSIMD_SSE2)
1213 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1214 const int *jpeg_natural_order_start, int Sl,
1215 int Al, JCOEF *values, size_t *zerobits)
1217 jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1218 Sl, Al, values, zerobits);
1222 jsimd_can_encode_mcu_AC_refine_prepare(void)
1228 if (sizeof(JCOEF) != 2)
1230 if (SIZEOF_SIZE_T != 4)
1232 if (simd_support & JSIMD_SSE2)
1239 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1240 const int *jpeg_natural_order_start, int Sl,
1241 int Al, JCOEF *absvalues, size_t *bits)
1243 return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1244 jpeg_natural_order_start,
1245 Sl, Al, absvalues, bits);