4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
6 * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
7 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
8 * Copyright (C) 2019, Google LLC.
9 * Copyright (C) 2020, Arm Limited.
11 * Based on the x86 SIMD extension for IJG JPEG library,
12 * Copyright (C) 1999-2006, MIYASAKA Masaru.
13 * For conditions of distribution and use, see copyright notice in jsimdext.inc
15 * This file contains the interface between the "normal" portions
16 * of the library and the SIMD implementations when running on a
17 * 32-bit Arm architecture.
20 #define JPEG_INTERNALS
21 #include "../../../jinclude.h"
22 #include "../../../jpeglib.h"
23 #include "../../../jsimd.h"
24 #include "../../../jdct.h"
25 #include "../../../jsimddct.h"
26 #include "../../jsimd.h"
30 static THREAD_LOCAL unsigned int simd_support = ~0;
31 static THREAD_LOCAL unsigned int simd_huffman = 1;
33 #if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
35 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
38 check_feature(char *buffer, char *feature)
44 if (strncmp(buffer, "Features", 8) != 0)
47 while (isspace(*buffer))
50 /* Check if 'feature' is present in the buffer as a separate word */
51 while ((p = strstr(buffer, feature))) {
52 if (p > buffer && !isspace(*(p - 1))) {
57 if (*p != 0 && !isspace(*p)) {
67 parse_proc_cpuinfo(int bufsize)
69 char *buffer = (char *)malloc(bufsize);
77 fd = fopen("/proc/cpuinfo", "r");
79 while (fgets(buffer, bufsize, fd)) {
80 if (!strchr(buffer, '\n') && !feof(fd)) {
81 /* "impossible" happened - insufficient size of the buffer! */
86 if (check_feature(buffer, "neon"))
87 simd_support |= JSIMD_NEON;
98 * Check what SIMD accelerations are supported.
106 #if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
107 int bufsize = 1024; /* an initial guess for the line buffer size limit */
110 if (simd_support != ~0U)
115 #if defined(__ARM_NEON__)
116 simd_support |= JSIMD_NEON;
117 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
118 /* We still have a chance to use Neon regardless of globally used
119 * -mcpu/-mfpu options passed to gcc by performing runtime detection via
120 * /proc/cpuinfo parsing on linux/android */
121 while (!parse_proc_cpuinfo(bufsize)) {
123 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
129 /* Force different settings through environment variables */
130 if (!GETENV_S(env, 2, "JSIMD_FORCENEON") && !strcmp(env, "1"))
131 simd_support = JSIMD_NEON;
132 if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
134 if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
140 jsimd_can_rgb_ycc(void)
144 /* The code is optimised for these values only */
145 if (BITS_IN_JSAMPLE != 8)
147 if (sizeof(JDIMENSION) != 4)
149 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
152 if (simd_support & JSIMD_NEON)
159 jsimd_can_rgb_gray(void)
163 /* The code is optimised for these values only */
164 if (BITS_IN_JSAMPLE != 8)
166 if (sizeof(JDIMENSION) != 4)
168 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
171 if (simd_support & JSIMD_NEON)
178 jsimd_can_ycc_rgb(void)
182 /* The code is optimised for these values only */
183 if (BITS_IN_JSAMPLE != 8)
185 if (sizeof(JDIMENSION) != 4)
187 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
190 if (simd_support & JSIMD_NEON)
197 jsimd_can_ycc_rgb565(void)
201 /* The code is optimised for these values only */
202 if (BITS_IN_JSAMPLE != 8)
204 if (sizeof(JDIMENSION) != 4)
207 if (simd_support & JSIMD_NEON)
214 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
215 JSAMPIMAGE output_buf, JDIMENSION output_row,
218 void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
220 switch (cinfo->in_color_space) {
222 neonfct = jsimd_extrgb_ycc_convert_neon;
226 neonfct = jsimd_extrgbx_ycc_convert_neon;
229 neonfct = jsimd_extbgr_ycc_convert_neon;
233 neonfct = jsimd_extbgrx_ycc_convert_neon;
237 neonfct = jsimd_extxbgr_ycc_convert_neon;
241 neonfct = jsimd_extxrgb_ycc_convert_neon;
244 neonfct = jsimd_extrgb_ycc_convert_neon;
248 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
252 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
253 JSAMPIMAGE output_buf, JDIMENSION output_row,
256 void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
258 switch (cinfo->in_color_space) {
260 neonfct = jsimd_extrgb_gray_convert_neon;
264 neonfct = jsimd_extrgbx_gray_convert_neon;
267 neonfct = jsimd_extbgr_gray_convert_neon;
271 neonfct = jsimd_extbgrx_gray_convert_neon;
275 neonfct = jsimd_extxbgr_gray_convert_neon;
279 neonfct = jsimd_extxrgb_gray_convert_neon;
282 neonfct = jsimd_extrgb_gray_convert_neon;
286 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
290 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
291 JDIMENSION input_row, JSAMPARRAY output_buf,
294 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
296 switch (cinfo->out_color_space) {
298 neonfct = jsimd_ycc_extrgb_convert_neon;
302 neonfct = jsimd_ycc_extrgbx_convert_neon;
305 neonfct = jsimd_ycc_extbgr_convert_neon;
309 neonfct = jsimd_ycc_extbgrx_convert_neon;
313 neonfct = jsimd_ycc_extxbgr_convert_neon;
317 neonfct = jsimd_ycc_extxrgb_convert_neon;
320 neonfct = jsimd_ycc_extrgb_convert_neon;
324 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
328 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
329 JDIMENSION input_row, JSAMPARRAY output_buf,
332 jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
333 output_buf, num_rows);
337 jsimd_can_h2v2_downsample(void)
341 /* The code is optimised for these values only */
342 if (BITS_IN_JSAMPLE != 8)
346 if (sizeof(JDIMENSION) != 4)
349 if (simd_support & JSIMD_NEON)
356 jsimd_can_h2v1_downsample(void)
360 /* The code is optimised for these values only */
361 if (BITS_IN_JSAMPLE != 8)
365 if (sizeof(JDIMENSION) != 4)
368 if (simd_support & JSIMD_NEON)
375 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
376 JSAMPARRAY input_data, JSAMPARRAY output_data)
378 jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
379 compptr->v_samp_factor, compptr->width_in_blocks,
380 input_data, output_data);
384 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
385 JSAMPARRAY input_data, JSAMPARRAY output_data)
387 jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
388 compptr->v_samp_factor, compptr->width_in_blocks,
389 input_data, output_data);
393 jsimd_can_h2v2_upsample(void)
397 /* The code is optimised for these values only */
398 if (BITS_IN_JSAMPLE != 8)
400 if (sizeof(JDIMENSION) != 4)
403 if (simd_support & JSIMD_NEON)
410 jsimd_can_h2v1_upsample(void)
414 /* The code is optimised for these values only */
415 if (BITS_IN_JSAMPLE != 8)
417 if (sizeof(JDIMENSION) != 4)
419 if (simd_support & JSIMD_NEON)
426 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
427 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
429 jsimd_h2v2_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
430 input_data, output_data_ptr);
434 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
435 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
437 jsimd_h2v1_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
438 input_data, output_data_ptr);
442 jsimd_can_h2v2_fancy_upsample(void)
446 /* The code is optimised for these values only */
447 if (BITS_IN_JSAMPLE != 8)
449 if (sizeof(JDIMENSION) != 4)
452 if (simd_support & JSIMD_NEON)
459 jsimd_can_h2v1_fancy_upsample(void)
463 /* The code is optimised for these values only */
464 if (BITS_IN_JSAMPLE != 8)
466 if (sizeof(JDIMENSION) != 4)
469 if (simd_support & JSIMD_NEON)
476 jsimd_can_h1v2_fancy_upsample(void)
480 /* The code is optimised for these values only */
481 if (BITS_IN_JSAMPLE != 8)
483 if (sizeof(JDIMENSION) != 4)
486 if (simd_support & JSIMD_NEON)
493 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
494 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
496 jsimd_h2v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
497 compptr->downsampled_width, input_data,
502 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
503 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
505 jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
506 compptr->downsampled_width, input_data,
511 jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
512 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
514 jsimd_h1v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
515 compptr->downsampled_width, input_data,
520 jsimd_can_h2v2_merged_upsample(void)
524 /* The code is optimised for these values only */
525 if (BITS_IN_JSAMPLE != 8)
527 if (sizeof(JDIMENSION) != 4)
530 if (simd_support & JSIMD_NEON)
537 jsimd_can_h2v1_merged_upsample(void)
541 /* The code is optimised for these values only */
542 if (BITS_IN_JSAMPLE != 8)
544 if (sizeof(JDIMENSION) != 4)
547 if (simd_support & JSIMD_NEON)
554 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
555 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
557 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
559 switch (cinfo->out_color_space) {
561 neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
565 neonfct = jsimd_h2v2_extrgbx_merged_upsample_neon;
568 neonfct = jsimd_h2v2_extbgr_merged_upsample_neon;
572 neonfct = jsimd_h2v2_extbgrx_merged_upsample_neon;
576 neonfct = jsimd_h2v2_extxbgr_merged_upsample_neon;
580 neonfct = jsimd_h2v2_extxrgb_merged_upsample_neon;
583 neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
587 neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
591 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
592 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
594 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
596 switch (cinfo->out_color_space) {
598 neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
602 neonfct = jsimd_h2v1_extrgbx_merged_upsample_neon;
605 neonfct = jsimd_h2v1_extbgr_merged_upsample_neon;
609 neonfct = jsimd_h2v1_extbgrx_merged_upsample_neon;
613 neonfct = jsimd_h2v1_extxbgr_merged_upsample_neon;
617 neonfct = jsimd_h2v1_extxrgb_merged_upsample_neon;
620 neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
624 neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
628 jsimd_can_convsamp(void)
632 /* The code is optimised for these values only */
635 if (BITS_IN_JSAMPLE != 8)
637 if (sizeof(JDIMENSION) != 4)
639 if (sizeof(DCTELEM) != 2)
642 if (simd_support & JSIMD_NEON)
649 jsimd_can_convsamp_float(void)
655 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
658 jsimd_convsamp_neon(sample_data, start_col, workspace);
662 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
663 FAST_FLOAT *workspace)
668 jsimd_can_fdct_islow(void)
672 /* The code is optimised for these values only */
675 if (sizeof(DCTELEM) != 2)
678 if (simd_support & JSIMD_NEON)
685 jsimd_can_fdct_ifast(void)
689 /* The code is optimised for these values only */
692 if (sizeof(DCTELEM) != 2)
695 if (simd_support & JSIMD_NEON)
702 jsimd_can_fdct_float(void)
708 jsimd_fdct_islow(DCTELEM *data)
710 jsimd_fdct_islow_neon(data);
714 jsimd_fdct_ifast(DCTELEM *data)
716 jsimd_fdct_ifast_neon(data);
720 jsimd_fdct_float(FAST_FLOAT *data)
725 jsimd_can_quantize(void)
729 /* The code is optimised for these values only */
732 if (sizeof(JCOEF) != 2)
734 if (sizeof(DCTELEM) != 2)
737 if (simd_support & JSIMD_NEON)
744 jsimd_can_quantize_float(void)
750 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
752 jsimd_quantize_neon(coef_block, divisors, workspace);
756 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
757 FAST_FLOAT *workspace)
762 jsimd_can_idct_2x2(void)
766 /* The code is optimised for these values only */
769 if (sizeof(JCOEF) != 2)
771 if (BITS_IN_JSAMPLE != 8)
773 if (sizeof(JDIMENSION) != 4)
775 if (sizeof(ISLOW_MULT_TYPE) != 2)
778 if (simd_support & JSIMD_NEON)
785 jsimd_can_idct_4x4(void)
789 /* The code is optimised for these values only */
792 if (sizeof(JCOEF) != 2)
794 if (BITS_IN_JSAMPLE != 8)
796 if (sizeof(JDIMENSION) != 4)
798 if (sizeof(ISLOW_MULT_TYPE) != 2)
801 if (simd_support & JSIMD_NEON)
808 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
809 JCOEFPTR coef_block, JSAMPARRAY output_buf,
810 JDIMENSION output_col)
812 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
816 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
817 JCOEFPTR coef_block, JSAMPARRAY output_buf,
818 JDIMENSION output_col)
820 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
824 jsimd_can_idct_islow(void)
828 /* The code is optimised for these values only */
831 if (sizeof(JCOEF) != 2)
833 if (BITS_IN_JSAMPLE != 8)
835 if (sizeof(JDIMENSION) != 4)
837 if (sizeof(ISLOW_MULT_TYPE) != 2)
840 if (simd_support & JSIMD_NEON)
847 jsimd_can_idct_ifast(void)
851 /* The code is optimised for these values only */
854 if (sizeof(JCOEF) != 2)
856 if (BITS_IN_JSAMPLE != 8)
858 if (sizeof(JDIMENSION) != 4)
860 if (sizeof(IFAST_MULT_TYPE) != 2)
862 if (IFAST_SCALE_BITS != 2)
865 if (simd_support & JSIMD_NEON)
872 jsimd_can_idct_float(void)
878 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
879 JCOEFPTR coef_block, JSAMPARRAY output_buf,
880 JDIMENSION output_col)
882 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
887 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
888 JCOEFPTR coef_block, JSAMPARRAY output_buf,
889 JDIMENSION output_col)
891 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
896 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
897 JCOEFPTR coef_block, JSAMPARRAY output_buf,
898 JDIMENSION output_col)
903 jsimd_can_huff_encode_one_block(void)
909 if (sizeof(JCOEF) != 2)
912 if (simd_support & JSIMD_NEON && simd_huffman)
919 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
920 int last_dc_val, c_derived_tbl *dctbl,
921 c_derived_tbl *actbl)
923 return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
928 jsimd_can_encode_mcu_AC_first_prepare(void)
934 if (sizeof(JCOEF) != 2)
937 if (simd_support & JSIMD_NEON)
944 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
945 const int *jpeg_natural_order_start, int Sl,
946 int Al, UJCOEF *values, size_t *zerobits)
948 jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
949 Sl, Al, values, zerobits);
953 jsimd_can_encode_mcu_AC_refine_prepare(void)
959 if (sizeof(JCOEF) != 2)
962 if (simd_support & JSIMD_NEON)
969 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
970 const int *jpeg_natural_order_start, int Sl,
971 int Al, UJCOEF *absvalues, size_t *bits)
973 return jsimd_encode_mcu_AC_refine_prepare_neon(block,
974 jpeg_natural_order_start, Sl,
975 Al, absvalues, bits);