4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2009-2011, 2014, 2016, 2018, D. R. Commander.
6 * Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
7 * Copyright (C) 2015, 2018, Matthieu Darbois.
8 * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
10 * Based on the x86 SIMD extension for IJG JPEG library,
11 * Copyright (C) 1999-2006, MIYASAKA Masaru.
12 * For conditions of distribution and use, see copyright notice in jsimdext.inc
14 * This file contains the interface between the "normal" portions
15 * of the library and the SIMD implementations when running on a
16 * 64-bit MIPS architecture.
19 #define JPEG_INTERNALS
20 #include "../../jinclude.h"
21 #include "../../jpeglib.h"
22 #include "../../jsimd.h"
23 #include "../../jdct.h"
24 #include "../../jsimddct.h"
29 static unsigned int simd_support = ~0;
31 #if defined(__linux__)
33 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
36 check_feature(char *buffer, char *feature)
42 if (strncmp(buffer, "ASEs implemented", 16) != 0)
45 while (isspace(*buffer))
48 /* Check if 'feature' is present in the buffer as a separate word */
49 while ((p = strstr(buffer, feature))) {
50 if (p > buffer && !isspace(*(p - 1))) {
55 if (*p != 0 && !isspace(*p)) {
65 parse_proc_cpuinfo(int bufsize)
67 char *buffer = (char *)malloc(bufsize);
75 fd = fopen("/proc/cpuinfo", "r");
77 while (fgets(buffer, bufsize, fd)) {
78 if (!strchr(buffer, '\n') && !feof(fd)) {
79 /* "impossible" happened - insufficient size of the buffer! */
84 if (check_feature(buffer, "loongson-mmi"))
85 simd_support |= JSIMD_MMI;
96 * Check what SIMD accelerations are supported.
98 * FIXME: This code is racy under a multi-threaded environment.
106 #if defined(__linux__)
107 int bufsize = 1024; /* an initial guess for the line buffer size limit */
110 if (simd_support != ~0U)
115 #if defined(__linux__)
116 while (!parse_proc_cpuinfo(bufsize)) {
118 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
121 #elif defined(__mips_loongson_vector_rev)
122 /* Only enable MMI by default on non-Linux platforms when the compiler flags
124 simd_support |= JSIMD_MMI;
128 /* Force different settings through environment variables */
129 env = getenv("JSIMD_FORCEMMI");
130 if ((env != NULL) && (strcmp(env, "1") == 0))
131 simd_support = JSIMD_MMI;
132 env = getenv("JSIMD_FORCENONE");
133 if ((env != NULL) && (strcmp(env, "1") == 0))
139 jsimd_can_rgb_ycc(void)
143 /* The code is optimised for these values only */
144 if (BITS_IN_JSAMPLE != 8)
146 if (sizeof(JDIMENSION) != 4)
148 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
151 if (simd_support & JSIMD_MMI)
158 jsimd_can_rgb_gray(void)
162 /* The code is optimised for these values only */
163 if (BITS_IN_JSAMPLE != 8)
165 if (sizeof(JDIMENSION) != 4)
167 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
170 if (simd_support & JSIMD_MMI)
177 jsimd_can_ycc_rgb(void)
181 /* The code is optimised for these values only */
182 if (BITS_IN_JSAMPLE != 8)
184 if (sizeof(JDIMENSION) != 4)
186 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
189 if (simd_support & JSIMD_MMI)
196 jsimd_can_ycc_rgb565(void)
202 jsimd_c_can_null_convert(void)
208 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
209 JSAMPIMAGE output_buf, JDIMENSION output_row,
212 void (*mmifct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
214 switch (cinfo->in_color_space) {
216 mmifct = jsimd_extrgb_ycc_convert_mmi;
220 mmifct = jsimd_extrgbx_ycc_convert_mmi;
223 mmifct = jsimd_extbgr_ycc_convert_mmi;
227 mmifct = jsimd_extbgrx_ycc_convert_mmi;
231 mmifct = jsimd_extxbgr_ycc_convert_mmi;
235 mmifct = jsimd_extxrgb_ycc_convert_mmi;
238 mmifct = jsimd_rgb_ycc_convert_mmi;
242 mmifct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
246 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
247 JSAMPIMAGE output_buf, JDIMENSION output_row,
250 void (*mmifct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
252 switch (cinfo->in_color_space) {
254 mmifct = jsimd_extrgb_gray_convert_mmi;
258 mmifct = jsimd_extrgbx_gray_convert_mmi;
261 mmifct = jsimd_extbgr_gray_convert_mmi;
265 mmifct = jsimd_extbgrx_gray_convert_mmi;
269 mmifct = jsimd_extxbgr_gray_convert_mmi;
273 mmifct = jsimd_extxrgb_gray_convert_mmi;
276 mmifct = jsimd_rgb_gray_convert_mmi;
280 mmifct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
284 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
285 JDIMENSION input_row, JSAMPARRAY output_buf,
288 void (*mmifct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
290 switch (cinfo->out_color_space) {
292 mmifct = jsimd_ycc_extrgb_convert_mmi;
296 mmifct = jsimd_ycc_extrgbx_convert_mmi;
299 mmifct = jsimd_ycc_extbgr_convert_mmi;
303 mmifct = jsimd_ycc_extbgrx_convert_mmi;
307 mmifct = jsimd_ycc_extxbgr_convert_mmi;
311 mmifct = jsimd_ycc_extxrgb_convert_mmi;
314 mmifct = jsimd_ycc_rgb_convert_mmi;
318 mmifct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
322 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
323 JDIMENSION input_row, JSAMPARRAY output_buf,
329 jsimd_c_null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
330 JSAMPIMAGE output_buf, JDIMENSION output_row,
336 jsimd_can_h2v2_downsample(void)
340 /* The code is optimised for these values only */
341 if (BITS_IN_JSAMPLE != 8)
343 if (sizeof(JDIMENSION) != 4)
346 if (simd_support & JSIMD_MMI)
353 jsimd_can_h2v2_smooth_downsample(void)
359 jsimd_can_h2v1_downsample(void)
365 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
366 JSAMPARRAY input_data, JSAMPARRAY output_data)
368 jsimd_h2v2_downsample_mmi(cinfo->image_width, cinfo->max_v_samp_factor,
369 compptr->v_samp_factor, compptr->width_in_blocks,
370 input_data, output_data);
374 jsimd_h2v2_smooth_downsample(j_compress_ptr cinfo,
375 jpeg_component_info *compptr,
376 JSAMPARRAY input_data, JSAMPARRAY output_data)
381 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
382 JSAMPARRAY input_data, JSAMPARRAY output_data)
387 jsimd_can_h2v2_upsample(void)
393 jsimd_can_h2v1_upsample(void)
399 jsimd_can_int_upsample(void)
405 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
406 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
411 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
412 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
417 jsimd_int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
418 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
423 jsimd_can_h2v2_fancy_upsample(void)
427 /* The code is optimised for these values only */
428 if (BITS_IN_JSAMPLE != 8)
430 if (sizeof(JDIMENSION) != 4)
433 if (simd_support & JSIMD_MMI)
440 jsimd_can_h2v1_fancy_upsample(void)
444 /* The code is optimised for these values only */
445 if (BITS_IN_JSAMPLE != 8)
447 if (sizeof(JDIMENSION) != 4)
450 if (simd_support & JSIMD_MMI)
457 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
458 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
460 jsimd_h2v2_fancy_upsample_mmi(cinfo->max_v_samp_factor,
461 compptr->downsampled_width, input_data,
466 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
467 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
469 jsimd_h2v1_fancy_upsample_mmi(cinfo->max_v_samp_factor,
470 compptr->downsampled_width, input_data,
475 jsimd_can_h2v2_merged_upsample(void)
479 /* The code is optimised for these values only */
480 if (BITS_IN_JSAMPLE != 8)
482 if (sizeof(JDIMENSION) != 4)
485 if (simd_support & JSIMD_MMI)
492 jsimd_can_h2v1_merged_upsample(void)
496 /* The code is optimised for these values only */
497 if (BITS_IN_JSAMPLE != 8)
499 if (sizeof(JDIMENSION) != 4)
502 if (simd_support & JSIMD_MMI)
509 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
510 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
512 void (*mmifct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
514 switch (cinfo->out_color_space) {
516 mmifct = jsimd_h2v2_extrgb_merged_upsample_mmi;
520 mmifct = jsimd_h2v2_extrgbx_merged_upsample_mmi;
523 mmifct = jsimd_h2v2_extbgr_merged_upsample_mmi;
527 mmifct = jsimd_h2v2_extbgrx_merged_upsample_mmi;
531 mmifct = jsimd_h2v2_extxbgr_merged_upsample_mmi;
535 mmifct = jsimd_h2v2_extxrgb_merged_upsample_mmi;
538 mmifct = jsimd_h2v2_merged_upsample_mmi;
542 mmifct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
546 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
547 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
549 void (*mmifct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
551 switch (cinfo->out_color_space) {
553 mmifct = jsimd_h2v1_extrgb_merged_upsample_mmi;
557 mmifct = jsimd_h2v1_extrgbx_merged_upsample_mmi;
560 mmifct = jsimd_h2v1_extbgr_merged_upsample_mmi;
564 mmifct = jsimd_h2v1_extbgrx_merged_upsample_mmi;
568 mmifct = jsimd_h2v1_extxbgr_merged_upsample_mmi;
572 mmifct = jsimd_h2v1_extxrgb_merged_upsample_mmi;
575 mmifct = jsimd_h2v1_merged_upsample_mmi;
579 mmifct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
583 jsimd_can_convsamp(void)
589 jsimd_can_convsamp_float(void)
595 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
601 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
602 FAST_FLOAT *workspace)
607 jsimd_can_fdct_islow(void)
611 /* The code is optimised for these values only */
614 if (sizeof(DCTELEM) != 2)
617 if (simd_support & JSIMD_MMI)
624 jsimd_can_fdct_ifast(void)
628 /* The code is optimised for these values only */
631 if (sizeof(DCTELEM) != 2)
634 if (simd_support & JSIMD_MMI)
641 jsimd_can_fdct_float(void)
647 jsimd_fdct_islow(DCTELEM *data)
649 jsimd_fdct_islow_mmi(data);
653 jsimd_fdct_ifast(DCTELEM *data)
655 jsimd_fdct_ifast_mmi(data);
659 jsimd_fdct_float(FAST_FLOAT *data)
664 jsimd_can_quantize(void)
668 /* The code is optimised for these values only */
671 if (sizeof(JCOEF) != 2)
673 if (sizeof(DCTELEM) != 2)
676 if (simd_support & JSIMD_MMI)
683 jsimd_can_quantize_float(void)
689 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
691 jsimd_quantize_mmi(coef_block, divisors, workspace);
695 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
696 FAST_FLOAT *workspace)
701 jsimd_can_idct_2x2(void)
707 jsimd_can_idct_4x4(void)
713 jsimd_can_idct_6x6(void)
719 jsimd_can_idct_12x12(void)
725 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
726 JCOEFPTR coef_block, JSAMPARRAY output_buf,
727 JDIMENSION output_col)
732 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
733 JCOEFPTR coef_block, JSAMPARRAY output_buf,
734 JDIMENSION output_col)
739 jsimd_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr,
740 JCOEFPTR coef_block, JSAMPARRAY output_buf,
741 JDIMENSION output_col)
746 jsimd_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr,
747 JCOEFPTR coef_block, JSAMPARRAY output_buf,
748 JDIMENSION output_col)
753 jsimd_can_idct_islow(void)
757 /* The code is optimised for these values only */
760 if (sizeof(JCOEF) != 2)
762 if (BITS_IN_JSAMPLE != 8)
764 if (sizeof(JDIMENSION) != 4)
766 if (sizeof(ISLOW_MULT_TYPE) != 2)
769 if (simd_support & JSIMD_MMI)
776 jsimd_can_idct_ifast(void)
780 /* The code is optimised for these values only */
783 if (sizeof(JCOEF) != 2)
785 if (BITS_IN_JSAMPLE != 8)
787 if (sizeof(JDIMENSION) != 4)
789 if (sizeof(IFAST_MULT_TYPE) != 2)
791 if (IFAST_SCALE_BITS != 2)
794 if (simd_support & JSIMD_MMI)
801 jsimd_can_idct_float(void)
807 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
808 JCOEFPTR coef_block, JSAMPARRAY output_buf,
809 JDIMENSION output_col)
811 jsimd_idct_islow_mmi(compptr->dct_table, coef_block, output_buf, output_col);
815 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
816 JCOEFPTR coef_block, JSAMPARRAY output_buf,
817 JDIMENSION output_col)
819 jsimd_idct_ifast_mmi(compptr->dct_table, coef_block, output_buf, output_col);
823 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
824 JCOEFPTR coef_block, JSAMPARRAY output_buf,
825 JDIMENSION output_col)
830 jsimd_can_huff_encode_one_block(void)
836 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
837 int last_dc_val, c_derived_tbl *dctbl,
838 c_derived_tbl *actbl)
844 jsimd_can_encode_mcu_AC_first_prepare(void)
850 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
851 const int *jpeg_natural_order_start, int Sl,
852 int Al, JCOEF *values, size_t *zerobits)
857 jsimd_can_encode_mcu_AC_refine_prepare(void)
863 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
864 const int *jpeg_natural_order_start, int Sl,
865 int Al, JCOEF *absvalues, size_t *bits)