4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander.
6 * Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
7 * Copyright (C) 2015, 2018, 2022, Matthieu Darbois.
8 * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
10 * Based on the x86 SIMD extension for IJG JPEG library,
11 * Copyright (C) 1999-2006, MIYASAKA Masaru.
12 * For conditions of distribution and use, see copyright notice in jsimdext.inc
14 * This file contains the interface between the "normal" portions
15 * of the library and the SIMD implementations when running on a
16 * 64-bit MIPS architecture.
19 #define JPEG_INTERNALS
20 #include "../../jinclude.h"
21 #include "../../jpeglib.h"
22 #include "../../jsimd.h"
23 #include "../../jdct.h"
24 #include "../../jsimddct.h"
29 static THREAD_LOCAL unsigned int simd_support = ~0;
31 #if defined(__linux__)
33 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
36 check_feature(char *buffer, char *feature)
42 if (strncmp(buffer, "ASEs implemented", 16) != 0)
45 while (isspace(*buffer))
48 /* Check if 'feature' is present in the buffer as a separate word */
49 while ((p = strstr(buffer, feature))) {
50 if (p > buffer && !isspace(*(p - 1))) {
55 if (*p != 0 && !isspace(*p)) {
65 parse_proc_cpuinfo(int bufsize)
67 char *buffer = (char *)malloc(bufsize);
75 fd = fopen("/proc/cpuinfo", "r");
77 while (fgets(buffer, bufsize, fd)) {
78 if (!strchr(buffer, '\n') && !feof(fd)) {
79 /* "impossible" happened - insufficient size of the buffer! */
84 if (check_feature(buffer, "loongson-mmi"))
85 simd_support |= JSIMD_MMI;
96 * Check what SIMD accelerations are supported.
104 #if defined(__linux__)
105 int bufsize = 1024; /* an initial guess for the line buffer size limit */
108 if (simd_support != ~0U)
113 #if defined(__linux__)
114 while (!parse_proc_cpuinfo(bufsize)) {
116 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
119 #elif defined(__mips_loongson_vector_rev)
120 /* Only enable MMI by default on non-Linux platforms when the compiler flags
122 simd_support |= JSIMD_MMI;
126 /* Force different settings through environment variables */
127 env = getenv("JSIMD_FORCEMMI");
128 if ((env != NULL) && (strcmp(env, "1") == 0))
129 simd_support = JSIMD_MMI;
130 env = getenv("JSIMD_FORCENONE");
131 if ((env != NULL) && (strcmp(env, "1") == 0))
137 jsimd_can_rgb_ycc(void)
141 /* The code is optimised for these values only */
142 if (BITS_IN_JSAMPLE != 8)
144 if (sizeof(JDIMENSION) != 4)
146 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
149 if (simd_support & JSIMD_MMI)
156 jsimd_can_rgb_gray(void)
160 /* The code is optimised for these values only */
161 if (BITS_IN_JSAMPLE != 8)
163 if (sizeof(JDIMENSION) != 4)
165 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
168 if (simd_support & JSIMD_MMI)
175 jsimd_can_ycc_rgb(void)
179 /* The code is optimised for these values only */
180 if (BITS_IN_JSAMPLE != 8)
182 if (sizeof(JDIMENSION) != 4)
184 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
187 if (simd_support & JSIMD_MMI)
194 jsimd_can_ycc_rgb565(void)
200 jsimd_c_can_null_convert(void)
206 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
207 JSAMPIMAGE output_buf, JDIMENSION output_row,
210 void (*mmifct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
212 switch (cinfo->in_color_space) {
214 mmifct = jsimd_extrgb_ycc_convert_mmi;
218 mmifct = jsimd_extrgbx_ycc_convert_mmi;
221 mmifct = jsimd_extbgr_ycc_convert_mmi;
225 mmifct = jsimd_extbgrx_ycc_convert_mmi;
229 mmifct = jsimd_extxbgr_ycc_convert_mmi;
233 mmifct = jsimd_extxrgb_ycc_convert_mmi;
236 mmifct = jsimd_rgb_ycc_convert_mmi;
240 mmifct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
244 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
245 JSAMPIMAGE output_buf, JDIMENSION output_row,
248 void (*mmifct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
250 switch (cinfo->in_color_space) {
252 mmifct = jsimd_extrgb_gray_convert_mmi;
256 mmifct = jsimd_extrgbx_gray_convert_mmi;
259 mmifct = jsimd_extbgr_gray_convert_mmi;
263 mmifct = jsimd_extbgrx_gray_convert_mmi;
267 mmifct = jsimd_extxbgr_gray_convert_mmi;
271 mmifct = jsimd_extxrgb_gray_convert_mmi;
274 mmifct = jsimd_rgb_gray_convert_mmi;
278 mmifct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
282 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
283 JDIMENSION input_row, JSAMPARRAY output_buf,
286 void (*mmifct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
288 switch (cinfo->out_color_space) {
290 mmifct = jsimd_ycc_extrgb_convert_mmi;
294 mmifct = jsimd_ycc_extrgbx_convert_mmi;
297 mmifct = jsimd_ycc_extbgr_convert_mmi;
301 mmifct = jsimd_ycc_extbgrx_convert_mmi;
305 mmifct = jsimd_ycc_extxbgr_convert_mmi;
309 mmifct = jsimd_ycc_extxrgb_convert_mmi;
312 mmifct = jsimd_ycc_rgb_convert_mmi;
316 mmifct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
320 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
321 JDIMENSION input_row, JSAMPARRAY output_buf,
327 jsimd_c_null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
328 JSAMPIMAGE output_buf, JDIMENSION output_row,
334 jsimd_can_h2v2_downsample(void)
338 /* The code is optimised for these values only */
339 if (BITS_IN_JSAMPLE != 8)
341 if (sizeof(JDIMENSION) != 4)
344 if (simd_support & JSIMD_MMI)
351 jsimd_can_h2v2_smooth_downsample(void)
357 jsimd_can_h2v1_downsample(void)
363 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
364 JSAMPARRAY input_data, JSAMPARRAY output_data)
366 jsimd_h2v2_downsample_mmi(cinfo->image_width, cinfo->max_v_samp_factor,
367 compptr->v_samp_factor, compptr->width_in_blocks,
368 input_data, output_data);
372 jsimd_h2v2_smooth_downsample(j_compress_ptr cinfo,
373 jpeg_component_info *compptr,
374 JSAMPARRAY input_data, JSAMPARRAY output_data)
379 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
380 JSAMPARRAY input_data, JSAMPARRAY output_data)
385 jsimd_can_h2v2_upsample(void)
391 jsimd_can_h2v1_upsample(void)
397 jsimd_can_int_upsample(void)
403 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
404 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
409 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
410 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
415 jsimd_int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
416 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
421 jsimd_can_h2v2_fancy_upsample(void)
425 /* The code is optimised for these values only */
426 if (BITS_IN_JSAMPLE != 8)
428 if (sizeof(JDIMENSION) != 4)
431 if (simd_support & JSIMD_MMI)
438 jsimd_can_h2v1_fancy_upsample(void)
442 /* The code is optimised for these values only */
443 if (BITS_IN_JSAMPLE != 8)
445 if (sizeof(JDIMENSION) != 4)
448 if (simd_support & JSIMD_MMI)
455 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
456 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
458 jsimd_h2v2_fancy_upsample_mmi(cinfo->max_v_samp_factor,
459 compptr->downsampled_width, input_data,
464 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
465 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
467 jsimd_h2v1_fancy_upsample_mmi(cinfo->max_v_samp_factor,
468 compptr->downsampled_width, input_data,
473 jsimd_can_h2v2_merged_upsample(void)
477 /* The code is optimised for these values only */
478 if (BITS_IN_JSAMPLE != 8)
480 if (sizeof(JDIMENSION) != 4)
483 if (simd_support & JSIMD_MMI)
490 jsimd_can_h2v1_merged_upsample(void)
494 /* The code is optimised for these values only */
495 if (BITS_IN_JSAMPLE != 8)
497 if (sizeof(JDIMENSION) != 4)
500 if (simd_support & JSIMD_MMI)
507 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
508 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
510 void (*mmifct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
512 switch (cinfo->out_color_space) {
514 mmifct = jsimd_h2v2_extrgb_merged_upsample_mmi;
518 mmifct = jsimd_h2v2_extrgbx_merged_upsample_mmi;
521 mmifct = jsimd_h2v2_extbgr_merged_upsample_mmi;
525 mmifct = jsimd_h2v2_extbgrx_merged_upsample_mmi;
529 mmifct = jsimd_h2v2_extxbgr_merged_upsample_mmi;
533 mmifct = jsimd_h2v2_extxrgb_merged_upsample_mmi;
536 mmifct = jsimd_h2v2_merged_upsample_mmi;
540 mmifct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
544 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
545 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
547 void (*mmifct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
549 switch (cinfo->out_color_space) {
551 mmifct = jsimd_h2v1_extrgb_merged_upsample_mmi;
555 mmifct = jsimd_h2v1_extrgbx_merged_upsample_mmi;
558 mmifct = jsimd_h2v1_extbgr_merged_upsample_mmi;
562 mmifct = jsimd_h2v1_extbgrx_merged_upsample_mmi;
566 mmifct = jsimd_h2v1_extxbgr_merged_upsample_mmi;
570 mmifct = jsimd_h2v1_extxrgb_merged_upsample_mmi;
573 mmifct = jsimd_h2v1_merged_upsample_mmi;
577 mmifct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
581 jsimd_can_convsamp(void)
587 jsimd_can_convsamp_float(void)
593 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
599 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
600 FAST_FLOAT *workspace)
605 jsimd_can_fdct_islow(void)
609 /* The code is optimised for these values only */
612 if (sizeof(DCTELEM) != 2)
615 if (simd_support & JSIMD_MMI)
622 jsimd_can_fdct_ifast(void)
626 /* The code is optimised for these values only */
629 if (sizeof(DCTELEM) != 2)
632 if (simd_support & JSIMD_MMI)
639 jsimd_can_fdct_float(void)
645 jsimd_fdct_islow(DCTELEM *data)
647 jsimd_fdct_islow_mmi(data);
651 jsimd_fdct_ifast(DCTELEM *data)
653 jsimd_fdct_ifast_mmi(data);
657 jsimd_fdct_float(FAST_FLOAT *data)
662 jsimd_can_quantize(void)
666 /* The code is optimised for these values only */
669 if (sizeof(JCOEF) != 2)
671 if (sizeof(DCTELEM) != 2)
674 if (simd_support & JSIMD_MMI)
681 jsimd_can_quantize_float(void)
687 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
689 jsimd_quantize_mmi(coef_block, divisors, workspace);
693 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
694 FAST_FLOAT *workspace)
699 jsimd_can_idct_2x2(void)
705 jsimd_can_idct_4x4(void)
711 jsimd_can_idct_6x6(void)
717 jsimd_can_idct_12x12(void)
723 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
724 JCOEFPTR coef_block, JSAMPARRAY output_buf,
725 JDIMENSION output_col)
730 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
731 JCOEFPTR coef_block, JSAMPARRAY output_buf,
732 JDIMENSION output_col)
737 jsimd_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr,
738 JCOEFPTR coef_block, JSAMPARRAY output_buf,
739 JDIMENSION output_col)
744 jsimd_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr,
745 JCOEFPTR coef_block, JSAMPARRAY output_buf,
746 JDIMENSION output_col)
751 jsimd_can_idct_islow(void)
755 /* The code is optimised for these values only */
758 if (sizeof(JCOEF) != 2)
760 if (BITS_IN_JSAMPLE != 8)
762 if (sizeof(JDIMENSION) != 4)
764 if (sizeof(ISLOW_MULT_TYPE) != 2)
767 if (simd_support & JSIMD_MMI)
774 jsimd_can_idct_ifast(void)
778 /* The code is optimised for these values only */
781 if (sizeof(JCOEF) != 2)
783 if (BITS_IN_JSAMPLE != 8)
785 if (sizeof(JDIMENSION) != 4)
787 if (sizeof(IFAST_MULT_TYPE) != 2)
789 if (IFAST_SCALE_BITS != 2)
792 if (simd_support & JSIMD_MMI)
799 jsimd_can_idct_float(void)
805 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
806 JCOEFPTR coef_block, JSAMPARRAY output_buf,
807 JDIMENSION output_col)
809 jsimd_idct_islow_mmi(compptr->dct_table, coef_block, output_buf, output_col);
813 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
814 JCOEFPTR coef_block, JSAMPARRAY output_buf,
815 JDIMENSION output_col)
817 jsimd_idct_ifast_mmi(compptr->dct_table, coef_block, output_buf, output_col);
821 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
822 JCOEFPTR coef_block, JSAMPARRAY output_buf,
823 JDIMENSION output_col)
828 jsimd_can_huff_encode_one_block(void)
834 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
835 int last_dc_val, c_derived_tbl *dctbl,
836 c_derived_tbl *actbl)
842 jsimd_can_encode_mcu_AC_first_prepare(void)
848 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
849 const int *jpeg_natural_order_start, int Sl,
850 int Al, UJCOEF *values, size_t *zerobits)
855 jsimd_can_encode_mcu_AC_refine_prepare(void)
861 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
862 const int *jpeg_natural_order_start, int Sl,
863 int Al, UJCOEF *absvalues, size_t *bits)