4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, D. R. Commander.
6 * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
8 * Based on the x86 SIMD extension for IJG JPEG library,
9 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10 * For conditions of distribution and use, see copyright notice in jsimdext.inc
12 * This file contains the interface between the "normal" portions
13 * of the library and the SIMD implementations when running on a
14 * 32-bit x86 architecture.
17 #define JPEG_INTERNALS
18 #include "../../jinclude.h"
19 #include "../../jpeglib.h"
20 #include "../../jsimd.h"
21 #include "../../jdct.h"
22 #include "../../jsimddct.h"
24 #include "jconfigint.h"
27 * In the PIC cases, we have no guarantee that constants will keep
28 * their alignment. This macro allows us to verify it at runtime.
30 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
32 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
33 #define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
35 static unsigned int simd_support = (unsigned int)(~0);
36 static unsigned int simd_huffman = 1;
39 * Check what SIMD accelerations are supported.
41 * FIXME: This code is racy under a multi-threaded environment.
50 if (simd_support != ~0U)
53 simd_support = jpeg_simd_cpu_support();
56 /* Force different settings through environment variables */
57 env = getenv("JSIMD_FORCEMMX");
58 if ((env != NULL) && (strcmp(env, "1") == 0))
59 simd_support &= JSIMD_MMX;
60 env = getenv("JSIMD_FORCE3DNOW");
61 if ((env != NULL) && (strcmp(env, "1") == 0))
62 simd_support &= JSIMD_3DNOW | JSIMD_MMX;
63 env = getenv("JSIMD_FORCESSE");
64 if ((env != NULL) && (strcmp(env, "1") == 0))
65 simd_support &= JSIMD_SSE | JSIMD_MMX;
66 env = getenv("JSIMD_FORCESSE2");
67 if ((env != NULL) && (strcmp(env, "1") == 0))
68 simd_support &= JSIMD_SSE2;
69 env = getenv("JSIMD_FORCEAVX2");
70 if ((env != NULL) && (strcmp(env, "1") == 0))
71 simd_support &= JSIMD_AVX2;
72 env = getenv("JSIMD_FORCENONE");
73 if ((env != NULL) && (strcmp(env, "1") == 0))
75 env = getenv("JSIMD_NOHUFFENC");
76 if ((env != NULL) && (strcmp(env, "1") == 0))
82 jsimd_can_rgb_ycc(void)
86 /* The code is optimised for these values only */
87 if (BITS_IN_JSAMPLE != 8)
89 if (sizeof(JDIMENSION) != 4)
91 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
94 if ((simd_support & JSIMD_AVX2) &&
95 IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
97 if ((simd_support & JSIMD_SSE2) &&
98 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
100 if (simd_support & JSIMD_MMX)
107 jsimd_can_rgb_gray(void)
111 /* The code is optimised for these values only */
112 if (BITS_IN_JSAMPLE != 8)
114 if (sizeof(JDIMENSION) != 4)
116 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
119 if ((simd_support & JSIMD_AVX2) &&
120 IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
122 if ((simd_support & JSIMD_SSE2) &&
123 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
125 if (simd_support & JSIMD_MMX)
132 jsimd_can_ycc_rgb(void)
136 /* The code is optimised for these values only */
137 if (BITS_IN_JSAMPLE != 8)
139 if (sizeof(JDIMENSION) != 4)
141 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
144 if ((simd_support & JSIMD_AVX2) &&
145 IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
147 if ((simd_support & JSIMD_SSE2) &&
148 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
150 if (simd_support & JSIMD_MMX)
157 jsimd_can_ycc_rgb565(void)
163 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
164 JSAMPIMAGE output_buf, JDIMENSION output_row,
167 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
168 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
169 void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
171 switch (cinfo->in_color_space) {
173 avx2fct = jsimd_extrgb_ycc_convert_avx2;
174 sse2fct = jsimd_extrgb_ycc_convert_sse2;
175 mmxfct = jsimd_extrgb_ycc_convert_mmx;
179 avx2fct = jsimd_extrgbx_ycc_convert_avx2;
180 sse2fct = jsimd_extrgbx_ycc_convert_sse2;
181 mmxfct = jsimd_extrgbx_ycc_convert_mmx;
184 avx2fct = jsimd_extbgr_ycc_convert_avx2;
185 sse2fct = jsimd_extbgr_ycc_convert_sse2;
186 mmxfct = jsimd_extbgr_ycc_convert_mmx;
190 avx2fct = jsimd_extbgrx_ycc_convert_avx2;
191 sse2fct = jsimd_extbgrx_ycc_convert_sse2;
192 mmxfct = jsimd_extbgrx_ycc_convert_mmx;
196 avx2fct = jsimd_extxbgr_ycc_convert_avx2;
197 sse2fct = jsimd_extxbgr_ycc_convert_sse2;
198 mmxfct = jsimd_extxbgr_ycc_convert_mmx;
202 avx2fct = jsimd_extxrgb_ycc_convert_avx2;
203 sse2fct = jsimd_extxrgb_ycc_convert_sse2;
204 mmxfct = jsimd_extxrgb_ycc_convert_mmx;
207 avx2fct = jsimd_rgb_ycc_convert_avx2;
208 sse2fct = jsimd_rgb_ycc_convert_sse2;
209 mmxfct = jsimd_rgb_ycc_convert_mmx;
213 if (simd_support & JSIMD_AVX2)
214 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
215 else if (simd_support & JSIMD_SSE2)
216 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
218 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
222 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
223 JSAMPIMAGE output_buf, JDIMENSION output_row,
226 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
227 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
228 void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
230 switch (cinfo->in_color_space) {
232 avx2fct = jsimd_extrgb_gray_convert_avx2;
233 sse2fct = jsimd_extrgb_gray_convert_sse2;
234 mmxfct = jsimd_extrgb_gray_convert_mmx;
238 avx2fct = jsimd_extrgbx_gray_convert_avx2;
239 sse2fct = jsimd_extrgbx_gray_convert_sse2;
240 mmxfct = jsimd_extrgbx_gray_convert_mmx;
243 avx2fct = jsimd_extbgr_gray_convert_avx2;
244 sse2fct = jsimd_extbgr_gray_convert_sse2;
245 mmxfct = jsimd_extbgr_gray_convert_mmx;
249 avx2fct = jsimd_extbgrx_gray_convert_avx2;
250 sse2fct = jsimd_extbgrx_gray_convert_sse2;
251 mmxfct = jsimd_extbgrx_gray_convert_mmx;
255 avx2fct = jsimd_extxbgr_gray_convert_avx2;
256 sse2fct = jsimd_extxbgr_gray_convert_sse2;
257 mmxfct = jsimd_extxbgr_gray_convert_mmx;
261 avx2fct = jsimd_extxrgb_gray_convert_avx2;
262 sse2fct = jsimd_extxrgb_gray_convert_sse2;
263 mmxfct = jsimd_extxrgb_gray_convert_mmx;
266 avx2fct = jsimd_rgb_gray_convert_avx2;
267 sse2fct = jsimd_rgb_gray_convert_sse2;
268 mmxfct = jsimd_rgb_gray_convert_mmx;
272 if (simd_support & JSIMD_AVX2)
273 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
274 else if (simd_support & JSIMD_SSE2)
275 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
277 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
281 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
282 JDIMENSION input_row, JSAMPARRAY output_buf,
285 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
286 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
287 void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
289 switch (cinfo->out_color_space) {
291 avx2fct = jsimd_ycc_extrgb_convert_avx2;
292 sse2fct = jsimd_ycc_extrgb_convert_sse2;
293 mmxfct = jsimd_ycc_extrgb_convert_mmx;
297 avx2fct = jsimd_ycc_extrgbx_convert_avx2;
298 sse2fct = jsimd_ycc_extrgbx_convert_sse2;
299 mmxfct = jsimd_ycc_extrgbx_convert_mmx;
302 avx2fct = jsimd_ycc_extbgr_convert_avx2;
303 sse2fct = jsimd_ycc_extbgr_convert_sse2;
304 mmxfct = jsimd_ycc_extbgr_convert_mmx;
308 avx2fct = jsimd_ycc_extbgrx_convert_avx2;
309 sse2fct = jsimd_ycc_extbgrx_convert_sse2;
310 mmxfct = jsimd_ycc_extbgrx_convert_mmx;
314 avx2fct = jsimd_ycc_extxbgr_convert_avx2;
315 sse2fct = jsimd_ycc_extxbgr_convert_sse2;
316 mmxfct = jsimd_ycc_extxbgr_convert_mmx;
320 avx2fct = jsimd_ycc_extxrgb_convert_avx2;
321 sse2fct = jsimd_ycc_extxrgb_convert_sse2;
322 mmxfct = jsimd_ycc_extxrgb_convert_mmx;
325 avx2fct = jsimd_ycc_rgb_convert_avx2;
326 sse2fct = jsimd_ycc_rgb_convert_sse2;
327 mmxfct = jsimd_ycc_rgb_convert_mmx;
331 if (simd_support & JSIMD_AVX2)
332 avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
333 else if (simd_support & JSIMD_SSE2)
334 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
336 mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
340 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
341 JDIMENSION input_row, JSAMPARRAY output_buf,
347 jsimd_can_h2v2_downsample(void)
351 /* The code is optimised for these values only */
352 if (BITS_IN_JSAMPLE != 8)
354 if (sizeof(JDIMENSION) != 4)
357 if (simd_support & JSIMD_AVX2)
359 if (simd_support & JSIMD_SSE2)
361 if (simd_support & JSIMD_MMX)
368 jsimd_can_h2v1_downsample(void)
372 /* The code is optimised for these values only */
373 if (BITS_IN_JSAMPLE != 8)
375 if (sizeof(JDIMENSION) != 4)
378 if (simd_support & JSIMD_AVX2)
380 if (simd_support & JSIMD_SSE2)
382 if (simd_support & JSIMD_MMX)
389 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
390 JSAMPARRAY input_data, JSAMPARRAY output_data)
392 if (simd_support & JSIMD_AVX2)
393 jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
394 compptr->v_samp_factor,
395 compptr->width_in_blocks, input_data,
397 else if (simd_support & JSIMD_SSE2)
398 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
399 compptr->v_samp_factor,
400 compptr->width_in_blocks, input_data,
403 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
404 compptr->v_samp_factor, compptr->width_in_blocks,
405 input_data, output_data);
409 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
410 JSAMPARRAY input_data, JSAMPARRAY output_data)
412 if (simd_support & JSIMD_AVX2)
413 jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
414 compptr->v_samp_factor,
415 compptr->width_in_blocks, input_data,
417 else if (simd_support & JSIMD_SSE2)
418 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
419 compptr->v_samp_factor,
420 compptr->width_in_blocks, input_data,
423 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
424 compptr->v_samp_factor, compptr->width_in_blocks,
425 input_data, output_data);
429 jsimd_can_h2v2_upsample(void)
433 /* The code is optimised for these values only */
434 if (BITS_IN_JSAMPLE != 8)
436 if (sizeof(JDIMENSION) != 4)
439 if (simd_support & JSIMD_AVX2)
441 if (simd_support & JSIMD_SSE2)
443 if (simd_support & JSIMD_MMX)
450 jsimd_can_h2v1_upsample(void)
454 /* The code is optimised for these values only */
455 if (BITS_IN_JSAMPLE != 8)
457 if (sizeof(JDIMENSION) != 4)
460 if (simd_support & JSIMD_AVX2)
462 if (simd_support & JSIMD_SSE2)
464 if (simd_support & JSIMD_MMX)
471 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
472 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
474 if (simd_support & JSIMD_AVX2)
475 jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
476 input_data, output_data_ptr);
477 else if (simd_support & JSIMD_SSE2)
478 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
479 input_data, output_data_ptr);
481 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
482 input_data, output_data_ptr);
486 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
487 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
489 if (simd_support & JSIMD_AVX2)
490 jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
491 input_data, output_data_ptr);
492 else if (simd_support & JSIMD_SSE2)
493 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
494 input_data, output_data_ptr);
496 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
497 input_data, output_data_ptr);
501 jsimd_can_h2v2_fancy_upsample(void)
505 /* The code is optimised for these values only */
506 if (BITS_IN_JSAMPLE != 8)
508 if (sizeof(JDIMENSION) != 4)
511 if ((simd_support & JSIMD_AVX2) &&
512 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
514 if ((simd_support & JSIMD_SSE2) &&
515 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
517 if (simd_support & JSIMD_MMX)
524 jsimd_can_h2v1_fancy_upsample(void)
528 /* The code is optimised for these values only */
529 if (BITS_IN_JSAMPLE != 8)
531 if (sizeof(JDIMENSION) != 4)
534 if ((simd_support & JSIMD_AVX2) &&
535 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
537 if ((simd_support & JSIMD_SSE2) &&
538 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
540 if (simd_support & JSIMD_MMX)
547 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
548 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
550 if (simd_support & JSIMD_AVX2)
551 jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
552 compptr->downsampled_width, input_data,
554 else if (simd_support & JSIMD_SSE2)
555 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
556 compptr->downsampled_width, input_data,
559 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
560 compptr->downsampled_width, input_data,
565 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
566 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
568 if (simd_support & JSIMD_AVX2)
569 jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
570 compptr->downsampled_width, input_data,
572 else if (simd_support & JSIMD_SSE2)
573 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
574 compptr->downsampled_width, input_data,
577 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
578 compptr->downsampled_width, input_data,
583 jsimd_can_h2v2_merged_upsample(void)
587 /* The code is optimised for these values only */
588 if (BITS_IN_JSAMPLE != 8)
590 if (sizeof(JDIMENSION) != 4)
593 if ((simd_support & JSIMD_AVX2) &&
594 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
596 if ((simd_support & JSIMD_SSE2) &&
597 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
599 if (simd_support & JSIMD_MMX)
606 jsimd_can_h2v1_merged_upsample(void)
610 /* The code is optimised for these values only */
611 if (BITS_IN_JSAMPLE != 8)
613 if (sizeof(JDIMENSION) != 4)
616 if ((simd_support & JSIMD_AVX2) &&
617 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
619 if ((simd_support & JSIMD_SSE2) &&
620 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
622 if (simd_support & JSIMD_MMX)
629 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
630 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
632 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
633 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
634 void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
636 switch (cinfo->out_color_space) {
638 avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
639 sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
640 mmxfct = jsimd_h2v2_extrgb_merged_upsample_mmx;
644 avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
645 sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
646 mmxfct = jsimd_h2v2_extrgbx_merged_upsample_mmx;
649 avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
650 sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
651 mmxfct = jsimd_h2v2_extbgr_merged_upsample_mmx;
655 avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
656 sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
657 mmxfct = jsimd_h2v2_extbgrx_merged_upsample_mmx;
661 avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
662 sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
663 mmxfct = jsimd_h2v2_extxbgr_merged_upsample_mmx;
667 avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
668 sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
669 mmxfct = jsimd_h2v2_extxrgb_merged_upsample_mmx;
672 avx2fct = jsimd_h2v2_merged_upsample_avx2;
673 sse2fct = jsimd_h2v2_merged_upsample_sse2;
674 mmxfct = jsimd_h2v2_merged_upsample_mmx;
678 if (simd_support & JSIMD_AVX2)
679 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
680 else if (simd_support & JSIMD_SSE2)
681 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
683 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
687 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
688 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
690 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
691 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
692 void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
694 switch (cinfo->out_color_space) {
696 avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
697 sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
698 mmxfct = jsimd_h2v1_extrgb_merged_upsample_mmx;
702 avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
703 sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
704 mmxfct = jsimd_h2v1_extrgbx_merged_upsample_mmx;
707 avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
708 sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
709 mmxfct = jsimd_h2v1_extbgr_merged_upsample_mmx;
713 avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
714 sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
715 mmxfct = jsimd_h2v1_extbgrx_merged_upsample_mmx;
719 avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
720 sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
721 mmxfct = jsimd_h2v1_extxbgr_merged_upsample_mmx;
725 avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
726 sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
727 mmxfct = jsimd_h2v1_extxrgb_merged_upsample_mmx;
730 avx2fct = jsimd_h2v1_merged_upsample_avx2;
731 sse2fct = jsimd_h2v1_merged_upsample_sse2;
732 mmxfct = jsimd_h2v1_merged_upsample_mmx;
736 if (simd_support & JSIMD_AVX2)
737 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
738 else if (simd_support & JSIMD_SSE2)
739 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
741 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
745 jsimd_can_convsamp(void)
749 /* The code is optimised for these values only */
752 if (BITS_IN_JSAMPLE != 8)
754 if (sizeof(JDIMENSION) != 4)
756 if (sizeof(DCTELEM) != 2)
759 if (simd_support & JSIMD_AVX2)
761 if (simd_support & JSIMD_SSE2)
763 if (simd_support & JSIMD_MMX)
770 jsimd_can_convsamp_float(void)
774 /* The code is optimised for these values only */
777 if (BITS_IN_JSAMPLE != 8)
779 if (sizeof(JDIMENSION) != 4)
781 if (sizeof(FAST_FLOAT) != 4)
784 if (simd_support & JSIMD_SSE2)
786 if (simd_support & JSIMD_SSE)
788 if (simd_support & JSIMD_3DNOW)
795 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
798 if (simd_support & JSIMD_AVX2)
799 jsimd_convsamp_avx2(sample_data, start_col, workspace);
800 else if (simd_support & JSIMD_SSE2)
801 jsimd_convsamp_sse2(sample_data, start_col, workspace);
803 jsimd_convsamp_mmx(sample_data, start_col, workspace);
807 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
808 FAST_FLOAT *workspace)
810 if (simd_support & JSIMD_SSE2)
811 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
812 else if (simd_support & JSIMD_SSE)
813 jsimd_convsamp_float_sse(sample_data, start_col, workspace);
815 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
819 jsimd_can_fdct_islow(void)
823 /* The code is optimised for these values only */
826 if (sizeof(DCTELEM) != 2)
829 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
831 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
833 if (simd_support & JSIMD_MMX)
840 jsimd_can_fdct_ifast(void)
844 /* The code is optimised for these values only */
847 if (sizeof(DCTELEM) != 2)
850 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
852 if (simd_support & JSIMD_MMX)
859 jsimd_can_fdct_float(void)
863 /* The code is optimised for these values only */
866 if (sizeof(FAST_FLOAT) != 4)
869 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
871 if (simd_support & JSIMD_3DNOW)
878 jsimd_fdct_islow(DCTELEM *data)
880 if (simd_support & JSIMD_AVX2)
881 jsimd_fdct_islow_avx2(data);
882 else if (simd_support & JSIMD_SSE2)
883 jsimd_fdct_islow_sse2(data);
885 jsimd_fdct_islow_mmx(data);
889 jsimd_fdct_ifast(DCTELEM *data)
891 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
892 jsimd_fdct_ifast_sse2(data);
894 jsimd_fdct_ifast_mmx(data);
898 jsimd_fdct_float(FAST_FLOAT *data)
900 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
901 jsimd_fdct_float_sse(data);
902 else if (simd_support & JSIMD_3DNOW)
903 jsimd_fdct_float_3dnow(data);
907 jsimd_can_quantize(void)
911 /* The code is optimised for these values only */
914 if (sizeof(JCOEF) != 2)
916 if (sizeof(DCTELEM) != 2)
919 if (simd_support & JSIMD_AVX2)
921 if (simd_support & JSIMD_SSE2)
923 if (simd_support & JSIMD_MMX)
930 jsimd_can_quantize_float(void)
934 /* The code is optimised for these values only */
937 if (sizeof(JCOEF) != 2)
939 if (sizeof(FAST_FLOAT) != 4)
942 if (simd_support & JSIMD_SSE2)
944 if (simd_support & JSIMD_SSE)
946 if (simd_support & JSIMD_3DNOW)
953 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
955 if (simd_support & JSIMD_AVX2)
956 jsimd_quantize_avx2(coef_block, divisors, workspace);
957 else if (simd_support & JSIMD_SSE2)
958 jsimd_quantize_sse2(coef_block, divisors, workspace);
960 jsimd_quantize_mmx(coef_block, divisors, workspace);
964 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
965 FAST_FLOAT *workspace)
967 if (simd_support & JSIMD_SSE2)
968 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
969 else if (simd_support & JSIMD_SSE)
970 jsimd_quantize_float_sse(coef_block, divisors, workspace);
972 jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
976 jsimd_can_idct_2x2(void)
980 /* The code is optimised for these values only */
983 if (sizeof(JCOEF) != 2)
985 if (BITS_IN_JSAMPLE != 8)
987 if (sizeof(JDIMENSION) != 4)
989 if (sizeof(ISLOW_MULT_TYPE) != 2)
992 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
994 if (simd_support & JSIMD_MMX)
1001 jsimd_can_idct_4x4(void)
1005 /* The code is optimised for these values only */
1008 if (sizeof(JCOEF) != 2)
1010 if (BITS_IN_JSAMPLE != 8)
1012 if (sizeof(JDIMENSION) != 4)
1014 if (sizeof(ISLOW_MULT_TYPE) != 2)
1017 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
1019 if (simd_support & JSIMD_MMX)
1026 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1027 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1028 JDIMENSION output_col)
1030 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
1031 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
1034 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
1038 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1039 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1040 JDIMENSION output_col)
1042 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
1043 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
1046 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
1050 jsimd_can_idct_islow(void)
1054 /* The code is optimised for these values only */
1057 if (sizeof(JCOEF) != 2)
1059 if (BITS_IN_JSAMPLE != 8)
1061 if (sizeof(JDIMENSION) != 4)
1063 if (sizeof(ISLOW_MULT_TYPE) != 2)
1066 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
1068 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
1070 if (simd_support & JSIMD_MMX)
1077 jsimd_can_idct_ifast(void)
1081 /* The code is optimised for these values only */
1084 if (sizeof(JCOEF) != 2)
1086 if (BITS_IN_JSAMPLE != 8)
1088 if (sizeof(JDIMENSION) != 4)
1090 if (sizeof(IFAST_MULT_TYPE) != 2)
1092 if (IFAST_SCALE_BITS != 2)
1095 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1097 if (simd_support & JSIMD_MMX)
1104 jsimd_can_idct_float(void)
1110 if (sizeof(JCOEF) != 2)
1112 if (BITS_IN_JSAMPLE != 8)
1114 if (sizeof(JDIMENSION) != 4)
1116 if (sizeof(FAST_FLOAT) != 4)
1118 if (sizeof(FLOAT_MULT_TYPE) != 4)
1121 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1123 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1125 if (simd_support & JSIMD_3DNOW)
1132 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1133 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1134 JDIMENSION output_col)
1136 if (simd_support & JSIMD_AVX2)
1137 jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
1139 else if (simd_support & JSIMD_SSE2)
1140 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1143 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf,
1148 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1149 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1150 JDIMENSION output_col)
1152 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1153 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1156 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf,
1161 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1162 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1163 JDIMENSION output_col)
1165 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1166 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1168 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1169 jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf,
1172 jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf,
1177 jsimd_can_huff_encode_one_block(void)
1183 if (sizeof(JCOEF) != 2)
1186 if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1187 IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1194 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1195 int last_dc_val, c_derived_tbl *dctbl,
1196 c_derived_tbl *actbl)
1198 return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1203 jsimd_can_encode_mcu_AC_first_prepare(void)
1209 if (sizeof(JCOEF) != 2)
1211 if (SIZEOF_SIZE_T != 4)
1213 if (simd_support & JSIMD_SSE2)
1220 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1221 const int *jpeg_natural_order_start, int Sl,
1222 int Al, JCOEF *values, size_t *zerobits)
1224 jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1225 Sl, Al, values, zerobits);
1229 jsimd_can_encode_mcu_AC_refine_prepare(void)
1235 if (sizeof(JCOEF) != 2)
1237 if (SIZEOF_SIZE_T != 4)
1239 if (simd_support & JSIMD_SSE2)
1246 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1247 const int *jpeg_natural_order_start, int Sl,
1248 int Al, JCOEF *absvalues, size_t *bits)
1250 return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1251 jpeg_natural_order_start,
1252 Sl, Al, absvalues, bits);