4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright 2009-2011 D. R. Commander
7 * Based on the x86 SIMD extension for IJG JPEG library,
8 * Copyright (C) 1999-2006, MIYASAKA Masaru.
9 * For conditions of distribution and use, see copyright notice in jsimdext.inc
11 * This file contains the interface between the "normal" portions
12 * of the library and the SIMD implementations when running on a
13 * 32-bit x86 architecture.
16 #define JPEG_INTERNALS
17 #include "../jinclude.h"
18 #include "../jpeglib.h"
21 #include "../jsimddct.h"
25 * In the PIC cases, we have no guarantee that constants will keep
26 * their alignment. This macro allows us to verify it at runtime.
28 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
30 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
32 static unsigned int simd_support = ~0;
35 * Check what SIMD accelerations are supported.
37 * FIXME: This code is racy under a multi-threaded environment.
44 if (simd_support != ~0U)
47 simd_support = jpeg_simd_cpu_support();
49 /* Force different settings through environment variables */
50 env = getenv("JSIMD_FORCEMMX");
51 if ((env != NULL) && (strcmp(env, "1") == 0))
52 simd_support &= JSIMD_MMX;
53 env = getenv("JSIMD_FORCE3DNOW");
54 if ((env != NULL) && (strcmp(env, "1") == 0))
55 simd_support &= JSIMD_3DNOW|JSIMD_MMX;
56 env = getenv("JSIMD_FORCESSE");
57 if ((env != NULL) && (strcmp(env, "1") == 0))
58 simd_support &= JSIMD_SSE|JSIMD_MMX;
59 env = getenv("JSIMD_FORCESSE2");
60 if ((env != NULL) && (strcmp(env, "1") == 0))
61 simd_support &= JSIMD_SSE2;
65 jsimd_can_rgb_ycc (void)
69 /* The code is optimised for these values only */
70 if (BITS_IN_JSAMPLE != 8)
72 if (sizeof(JDIMENSION) != 4)
74 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
77 if ((simd_support & JSIMD_SSE2) &&
78 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
80 if (simd_support & JSIMD_MMX)
87 jsimd_can_rgb_gray (void)
91 /* The code is optimised for these values only */
92 if (BITS_IN_JSAMPLE != 8)
94 if (sizeof(JDIMENSION) != 4)
96 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
99 if ((simd_support & JSIMD_SSE2) &&
100 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
102 if (simd_support & JSIMD_MMX)
109 jsimd_can_ycc_rgb (void)
113 /* The code is optimised for these values only */
114 if (BITS_IN_JSAMPLE != 8)
116 if (sizeof(JDIMENSION) != 4)
118 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
121 if ((simd_support & JSIMD_SSE2) &&
122 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
124 if (simd_support & JSIMD_MMX)
131 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
132 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
133 JDIMENSION output_row, int num_rows)
135 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
136 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
138 switch(cinfo->in_color_space)
141 sse2fct=jsimd_extrgb_ycc_convert_sse2;
142 mmxfct=jsimd_extrgb_ycc_convert_mmx;
146 sse2fct=jsimd_extrgbx_ycc_convert_sse2;
147 mmxfct=jsimd_extrgbx_ycc_convert_mmx;
150 sse2fct=jsimd_extbgr_ycc_convert_sse2;
151 mmxfct=jsimd_extbgr_ycc_convert_mmx;
155 sse2fct=jsimd_extbgrx_ycc_convert_sse2;
156 mmxfct=jsimd_extbgrx_ycc_convert_mmx;
160 sse2fct=jsimd_extxbgr_ycc_convert_sse2;
161 mmxfct=jsimd_extxbgr_ycc_convert_mmx;
165 sse2fct=jsimd_extxrgb_ycc_convert_sse2;
166 mmxfct=jsimd_extxrgb_ycc_convert_mmx;
169 sse2fct=jsimd_rgb_ycc_convert_sse2;
170 mmxfct=jsimd_rgb_ycc_convert_mmx;
174 if ((simd_support & JSIMD_SSE2) &&
175 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
176 sse2fct(cinfo->image_width, input_buf,
177 output_buf, output_row, num_rows);
178 else if (simd_support & JSIMD_MMX)
179 mmxfct(cinfo->image_width, input_buf,
180 output_buf, output_row, num_rows);
184 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
185 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
186 JDIMENSION output_row, int num_rows)
188 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
189 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
191 switch(cinfo->in_color_space)
194 sse2fct=jsimd_extrgb_gray_convert_sse2;
195 mmxfct=jsimd_extrgb_gray_convert_mmx;
199 sse2fct=jsimd_extrgbx_gray_convert_sse2;
200 mmxfct=jsimd_extrgbx_gray_convert_mmx;
203 sse2fct=jsimd_extbgr_gray_convert_sse2;
204 mmxfct=jsimd_extbgr_gray_convert_mmx;
208 sse2fct=jsimd_extbgrx_gray_convert_sse2;
209 mmxfct=jsimd_extbgrx_gray_convert_mmx;
213 sse2fct=jsimd_extxbgr_gray_convert_sse2;
214 mmxfct=jsimd_extxbgr_gray_convert_mmx;
218 sse2fct=jsimd_extxrgb_gray_convert_sse2;
219 mmxfct=jsimd_extxrgb_gray_convert_mmx;
222 sse2fct=jsimd_rgb_gray_convert_sse2;
223 mmxfct=jsimd_rgb_gray_convert_mmx;
227 if ((simd_support & JSIMD_SSE2) &&
228 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
229 sse2fct(cinfo->image_width, input_buf,
230 output_buf, output_row, num_rows);
231 else if (simd_support & JSIMD_MMX)
232 mmxfct(cinfo->image_width, input_buf,
233 output_buf, output_row, num_rows);
237 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
238 JSAMPIMAGE input_buf, JDIMENSION input_row,
239 JSAMPARRAY output_buf, int num_rows)
241 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
242 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
244 switch(cinfo->out_color_space)
247 sse2fct=jsimd_ycc_extrgb_convert_sse2;
248 mmxfct=jsimd_ycc_extrgb_convert_mmx;
252 sse2fct=jsimd_ycc_extrgbx_convert_sse2;
253 mmxfct=jsimd_ycc_extrgbx_convert_mmx;
256 sse2fct=jsimd_ycc_extbgr_convert_sse2;
257 mmxfct=jsimd_ycc_extbgr_convert_mmx;
261 sse2fct=jsimd_ycc_extbgrx_convert_sse2;
262 mmxfct=jsimd_ycc_extbgrx_convert_mmx;
266 sse2fct=jsimd_ycc_extxbgr_convert_sse2;
267 mmxfct=jsimd_ycc_extxbgr_convert_mmx;
271 sse2fct=jsimd_ycc_extxrgb_convert_sse2;
272 mmxfct=jsimd_ycc_extxrgb_convert_mmx;
275 sse2fct=jsimd_ycc_rgb_convert_sse2;
276 mmxfct=jsimd_ycc_rgb_convert_mmx;
280 if ((simd_support & JSIMD_SSE2) &&
281 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
282 sse2fct(cinfo->output_width, input_buf,
283 input_row, output_buf, num_rows);
284 else if (simd_support & JSIMD_MMX)
285 mmxfct(cinfo->output_width, input_buf,
286 input_row, output_buf, num_rows);
290 jsimd_can_h2v2_downsample (void)
294 /* The code is optimised for these values only */
295 if (BITS_IN_JSAMPLE != 8)
297 if (sizeof(JDIMENSION) != 4)
300 if (simd_support & JSIMD_SSE2)
302 if (simd_support & JSIMD_MMX)
309 jsimd_can_h2v1_downsample (void)
313 /* The code is optimised for these values only */
314 if (BITS_IN_JSAMPLE != 8)
316 if (sizeof(JDIMENSION) != 4)
319 if (simd_support & JSIMD_SSE2)
321 if (simd_support & JSIMD_MMX)
328 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
329 JSAMPARRAY input_data, JSAMPARRAY output_data)
331 if (simd_support & JSIMD_SSE2)
332 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
333 compptr->v_samp_factor, compptr->width_in_blocks,
334 input_data, output_data);
335 else if (simd_support & JSIMD_MMX)
336 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
337 compptr->v_samp_factor, compptr->width_in_blocks,
338 input_data, output_data);
342 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
343 JSAMPARRAY input_data, JSAMPARRAY output_data)
345 if (simd_support & JSIMD_SSE2)
346 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
347 compptr->v_samp_factor, compptr->width_in_blocks,
348 input_data, output_data);
349 else if (simd_support & JSIMD_MMX)
350 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
351 compptr->v_samp_factor, compptr->width_in_blocks,
352 input_data, output_data);
356 jsimd_can_h2v2_upsample (void)
360 /* The code is optimised for these values only */
361 if (BITS_IN_JSAMPLE != 8)
363 if (sizeof(JDIMENSION) != 4)
366 if (simd_support & JSIMD_SSE2)
368 if (simd_support & JSIMD_MMX)
375 jsimd_can_h2v1_upsample (void)
379 /* The code is optimised for these values only */
380 if (BITS_IN_JSAMPLE != 8)
382 if (sizeof(JDIMENSION) != 4)
385 if (simd_support & JSIMD_SSE2)
387 if (simd_support & JSIMD_MMX)
394 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
395 jpeg_component_info * compptr,
396 JSAMPARRAY input_data,
397 JSAMPARRAY * output_data_ptr)
399 if (simd_support & JSIMD_SSE2)
400 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor,
401 cinfo->output_width, input_data, output_data_ptr);
402 else if (simd_support & JSIMD_MMX)
403 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor,
404 cinfo->output_width, input_data, output_data_ptr);
408 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
409 jpeg_component_info * compptr,
410 JSAMPARRAY input_data,
411 JSAMPARRAY * output_data_ptr)
413 if (simd_support & JSIMD_SSE2)
414 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor,
415 cinfo->output_width, input_data, output_data_ptr);
416 else if (simd_support & JSIMD_MMX)
417 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor,
418 cinfo->output_width, input_data, output_data_ptr);
422 jsimd_can_h2v2_fancy_upsample (void)
426 /* The code is optimised for these values only */
427 if (BITS_IN_JSAMPLE != 8)
429 if (sizeof(JDIMENSION) != 4)
432 if ((simd_support & JSIMD_SSE2) &&
433 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
435 if (simd_support & JSIMD_MMX)
442 jsimd_can_h2v1_fancy_upsample (void)
446 /* The code is optimised for these values only */
447 if (BITS_IN_JSAMPLE != 8)
449 if (sizeof(JDIMENSION) != 4)
452 if ((simd_support & JSIMD_SSE2) &&
453 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
455 if (simd_support & JSIMD_MMX)
462 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
463 jpeg_component_info * compptr,
464 JSAMPARRAY input_data,
465 JSAMPARRAY * output_data_ptr)
467 if ((simd_support & JSIMD_SSE2) &&
468 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
469 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
470 compptr->downsampled_width, input_data, output_data_ptr);
471 else if (simd_support & JSIMD_MMX)
472 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
473 compptr->downsampled_width, input_data, output_data_ptr);
477 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
478 jpeg_component_info * compptr,
479 JSAMPARRAY input_data,
480 JSAMPARRAY * output_data_ptr)
482 if ((simd_support & JSIMD_SSE2) &&
483 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
484 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
485 compptr->downsampled_width, input_data, output_data_ptr);
486 else if (simd_support & JSIMD_MMX)
487 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
488 compptr->downsampled_width, input_data, output_data_ptr);
492 jsimd_can_h2v2_merged_upsample (void)
496 /* The code is optimised for these values only */
497 if (BITS_IN_JSAMPLE != 8)
499 if (sizeof(JDIMENSION) != 4)
502 if ((simd_support & JSIMD_SSE2) &&
503 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
505 if (simd_support & JSIMD_MMX)
512 jsimd_can_h2v1_merged_upsample (void)
516 /* The code is optimised for these values only */
517 if (BITS_IN_JSAMPLE != 8)
519 if (sizeof(JDIMENSION) != 4)
522 if ((simd_support & JSIMD_SSE2) &&
523 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
525 if (simd_support & JSIMD_MMX)
532 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
533 JSAMPIMAGE input_buf,
534 JDIMENSION in_row_group_ctr,
535 JSAMPARRAY output_buf)
537 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
538 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
540 switch(cinfo->out_color_space)
543 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
544 mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx;
548 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
549 mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx;
552 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
553 mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx;
557 sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
558 mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx;
562 sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
563 mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx;
567 sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
568 mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx;
571 sse2fct=jsimd_h2v2_merged_upsample_sse2;
572 mmxfct=jsimd_h2v2_merged_upsample_mmx;
576 if ((simd_support & JSIMD_SSE2) &&
577 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
578 sse2fct(cinfo->output_width, input_buf,
579 in_row_group_ctr, output_buf);
580 else if (simd_support & JSIMD_MMX)
581 mmxfct(cinfo->output_width, input_buf,
582 in_row_group_ctr, output_buf);
586 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
587 JSAMPIMAGE input_buf,
588 JDIMENSION in_row_group_ctr,
589 JSAMPARRAY output_buf)
591 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
592 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
594 switch(cinfo->out_color_space)
597 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
598 mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx;
602 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
603 mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx;
606 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
607 mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx;
611 sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
612 mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx;
616 sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
617 mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx;
621 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
622 mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx;
625 sse2fct=jsimd_h2v1_merged_upsample_sse2;
626 mmxfct=jsimd_h2v1_merged_upsample_mmx;
630 if ((simd_support & JSIMD_SSE2) &&
631 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
632 sse2fct(cinfo->output_width, input_buf,
633 in_row_group_ctr, output_buf);
634 else if (simd_support & JSIMD_MMX)
635 mmxfct(cinfo->output_width, input_buf,
636 in_row_group_ctr, output_buf);
640 jsimd_can_convsamp (void)
644 /* The code is optimised for these values only */
647 if (BITS_IN_JSAMPLE != 8)
649 if (sizeof(JDIMENSION) != 4)
651 if (sizeof(DCTELEM) != 2)
654 if (simd_support & JSIMD_SSE2)
656 if (simd_support & JSIMD_MMX)
663 jsimd_can_convsamp_float (void)
667 /* The code is optimised for these values only */
670 if (BITS_IN_JSAMPLE != 8)
672 if (sizeof(JDIMENSION) != 4)
674 if (sizeof(FAST_FLOAT) != 4)
677 if (simd_support & JSIMD_SSE2)
679 if (simd_support & JSIMD_SSE)
681 if (simd_support & JSIMD_3DNOW)
688 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
691 if (simd_support & JSIMD_SSE2)
692 jsimd_convsamp_sse2(sample_data, start_col, workspace);
693 else if (simd_support & JSIMD_MMX)
694 jsimd_convsamp_mmx(sample_data, start_col, workspace);
698 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
699 FAST_FLOAT * workspace)
701 if (simd_support & JSIMD_SSE2)
702 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
703 else if (simd_support & JSIMD_SSE)
704 jsimd_convsamp_float_sse(sample_data, start_col, workspace);
705 else if (simd_support & JSIMD_3DNOW)
706 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
710 jsimd_can_fdct_islow (void)
714 /* The code is optimised for these values only */
717 if (sizeof(DCTELEM) != 2)
720 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
722 if (simd_support & JSIMD_MMX)
729 jsimd_can_fdct_ifast (void)
733 /* The code is optimised for these values only */
736 if (sizeof(DCTELEM) != 2)
739 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
741 if (simd_support & JSIMD_MMX)
748 jsimd_can_fdct_float (void)
752 /* The code is optimised for these values only */
755 if (sizeof(FAST_FLOAT) != 4)
758 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
760 if (simd_support & JSIMD_3DNOW)
767 jsimd_fdct_islow (DCTELEM * data)
769 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
770 jsimd_fdct_islow_sse2(data);
771 else if (simd_support & JSIMD_MMX)
772 jsimd_fdct_islow_mmx(data);
776 jsimd_fdct_ifast (DCTELEM * data)
778 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
779 jsimd_fdct_ifast_sse2(data);
780 else if (simd_support & JSIMD_MMX)
781 jsimd_fdct_ifast_mmx(data);
785 jsimd_fdct_float (FAST_FLOAT * data)
787 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
788 jsimd_fdct_float_sse(data);
789 else if (simd_support & JSIMD_3DNOW)
790 jsimd_fdct_float_3dnow(data);
794 jsimd_can_quantize (void)
798 /* The code is optimised for these values only */
801 if (sizeof(JCOEF) != 2)
803 if (sizeof(DCTELEM) != 2)
806 if (simd_support & JSIMD_SSE2)
808 if (simd_support & JSIMD_MMX)
815 jsimd_can_quantize_float (void)
819 /* The code is optimised for these values only */
822 if (sizeof(JCOEF) != 2)
824 if (sizeof(FAST_FLOAT) != 4)
827 if (simd_support & JSIMD_SSE2)
829 if (simd_support & JSIMD_SSE)
831 if (simd_support & JSIMD_3DNOW)
838 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
841 if (simd_support & JSIMD_SSE2)
842 jsimd_quantize_sse2(coef_block, divisors, workspace);
843 else if (simd_support & JSIMD_MMX)
844 jsimd_quantize_mmx(coef_block, divisors, workspace);
848 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
849 FAST_FLOAT * workspace)
851 if (simd_support & JSIMD_SSE2)
852 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
853 else if (simd_support & JSIMD_SSE)
854 jsimd_quantize_float_sse(coef_block, divisors, workspace);
855 else if (simd_support & JSIMD_3DNOW)
856 jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
860 jsimd_can_idct_2x2 (void)
864 /* The code is optimised for these values only */
867 if (sizeof(JCOEF) != 2)
869 if (BITS_IN_JSAMPLE != 8)
871 if (sizeof(JDIMENSION) != 4)
873 if (sizeof(ISLOW_MULT_TYPE) != 2)
876 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
878 if (simd_support & JSIMD_MMX)
885 jsimd_can_idct_4x4 (void)
889 /* The code is optimised for these values only */
892 if (sizeof(JCOEF) != 2)
894 if (BITS_IN_JSAMPLE != 8)
896 if (sizeof(JDIMENSION) != 4)
898 if (sizeof(ISLOW_MULT_TYPE) != 2)
901 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
903 if (simd_support & JSIMD_MMX)
910 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
911 JCOEFPTR coef_block, JSAMPARRAY output_buf,
912 JDIMENSION output_col)
914 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
915 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
916 else if (simd_support & JSIMD_MMX)
917 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
921 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
922 JCOEFPTR coef_block, JSAMPARRAY output_buf,
923 JDIMENSION output_col)
925 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
926 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
927 else if (simd_support & JSIMD_MMX)
928 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
932 jsimd_can_idct_islow (void)
936 /* The code is optimised for these values only */
939 if (sizeof(JCOEF) != 2)
941 if (BITS_IN_JSAMPLE != 8)
943 if (sizeof(JDIMENSION) != 4)
945 if (sizeof(ISLOW_MULT_TYPE) != 2)
948 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
950 if (simd_support & JSIMD_MMX)
957 jsimd_can_idct_ifast (void)
961 /* The code is optimised for these values only */
964 if (sizeof(JCOEF) != 2)
966 if (BITS_IN_JSAMPLE != 8)
968 if (sizeof(JDIMENSION) != 4)
970 if (sizeof(IFAST_MULT_TYPE) != 2)
972 if (IFAST_SCALE_BITS != 2)
975 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
977 if (simd_support & JSIMD_MMX)
984 jsimd_can_idct_float (void)
990 if (sizeof(JCOEF) != 2)
992 if (BITS_IN_JSAMPLE != 8)
994 if (sizeof(JDIMENSION) != 4)
996 if (sizeof(FAST_FLOAT) != 4)
998 if (sizeof(FLOAT_MULT_TYPE) != 4)
1001 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1003 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1005 if (simd_support & JSIMD_3DNOW)
1012 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1013 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1014 JDIMENSION output_col)
1016 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
1017 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col);
1018 else if (simd_support & JSIMD_MMX)
1019 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col);
1023 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1024 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1025 JDIMENSION output_col)
1027 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1028 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col);
1029 else if (simd_support & JSIMD_MMX)
1030 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col);
1034 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1035 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1036 JDIMENSION output_col)
1038 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1039 jsimd_idct_float_sse2(compptr->dct_table, coef_block,
1040 output_buf, output_col);
1041 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1042 jsimd_idct_float_sse(compptr->dct_table, coef_block,
1043 output_buf, output_col);
1044 else if (simd_support & JSIMD_3DNOW)
1045 jsimd_idct_float_3dnow(compptr->dct_table, coef_block,
1046 output_buf, output_col);