4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
6 * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
7 * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
8 * Copyright (C) 2019, Google LLC.
9 * Copyright (C) 2020, Arm Limited.
11 * Based on the x86 SIMD extension for IJG JPEG library,
12 * Copyright (C) 1999-2006, MIYASAKA Masaru.
13 * For conditions of distribution and use, see copyright notice in jsimdext.inc
15 * This file contains the interface between the "normal" portions
16 * of the library and the SIMD implementations when running on a
17 * 32-bit Arm architecture.
20 #define JPEG_INTERNALS
21 #include "../../../jinclude.h"
22 #include "../../../jpeglib.h"
23 #include "../../../jsimd.h"
24 #include "../../../jdct.h"
25 #include "../../../jsimddct.h"
26 #include "../../jsimd.h"
30 static unsigned int simd_support = ~0;
31 static unsigned int simd_huffman = 1;
33 #if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
35 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
38 check_feature(char *buffer, char *feature)
44 if (strncmp(buffer, "Features", 8) != 0)
47 while (isspace(*buffer))
50 /* Check if 'feature' is present in the buffer as a separate word */
51 while ((p = strstr(buffer, feature))) {
52 if (p > buffer && !isspace(*(p - 1))) {
57 if (*p != 0 && !isspace(*p)) {
67 parse_proc_cpuinfo(int bufsize)
69 char *buffer = (char *)malloc(bufsize);
77 fd = fopen("/proc/cpuinfo", "r");
79 while (fgets(buffer, bufsize, fd)) {
80 if (!strchr(buffer, '\n') && !feof(fd)) {
81 /* "impossible" happened - insufficient size of the buffer! */
86 if (check_feature(buffer, "neon"))
87 simd_support |= JSIMD_NEON;
98 * Check what SIMD accelerations are supported.
100 * FIXME: This code is racy under a multi-threaded environment.
108 #if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
109 int bufsize = 1024; /* an initial guess for the line buffer size limit */
112 if (simd_support != ~0U)
117 #if defined(__ARM_NEON__)
118 simd_support |= JSIMD_NEON;
119 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
120 /* We still have a chance to use Neon regardless of globally used
121 * -mcpu/-mfpu options passed to gcc by performing runtime detection via
122 * /proc/cpuinfo parsing on linux/android */
123 while (!parse_proc_cpuinfo(bufsize)) {
125 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
131 /* Force different settings through environment variables */
132 if (!GETENV_S(env, 2, "JSIMD_FORCENEON") && !strcmp(env, "1"))
133 simd_support = JSIMD_NEON;
134 if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
136 if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
142 jsimd_can_rgb_ycc(void)
146 /* The code is optimised for these values only */
147 if (BITS_IN_JSAMPLE != 8)
149 if (sizeof(JDIMENSION) != 4)
151 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
154 if (simd_support & JSIMD_NEON)
161 jsimd_can_rgb_gray(void)
165 /* The code is optimised for these values only */
166 if (BITS_IN_JSAMPLE != 8)
168 if (sizeof(JDIMENSION) != 4)
170 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
173 if (simd_support & JSIMD_NEON)
180 jsimd_can_ycc_rgb(void)
184 /* The code is optimised for these values only */
185 if (BITS_IN_JSAMPLE != 8)
187 if (sizeof(JDIMENSION) != 4)
189 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
192 if (simd_support & JSIMD_NEON)
199 jsimd_can_ycc_rgb565(void)
203 /* The code is optimised for these values only */
204 if (BITS_IN_JSAMPLE != 8)
206 if (sizeof(JDIMENSION) != 4)
209 if (simd_support & JSIMD_NEON)
216 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
217 JSAMPIMAGE output_buf, JDIMENSION output_row,
220 void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
222 switch (cinfo->in_color_space) {
224 neonfct = jsimd_extrgb_ycc_convert_neon;
228 neonfct = jsimd_extrgbx_ycc_convert_neon;
231 neonfct = jsimd_extbgr_ycc_convert_neon;
235 neonfct = jsimd_extbgrx_ycc_convert_neon;
239 neonfct = jsimd_extxbgr_ycc_convert_neon;
243 neonfct = jsimd_extxrgb_ycc_convert_neon;
246 neonfct = jsimd_extrgb_ycc_convert_neon;
250 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
254 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
255 JSAMPIMAGE output_buf, JDIMENSION output_row,
258 void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
260 switch (cinfo->in_color_space) {
262 neonfct = jsimd_extrgb_gray_convert_neon;
266 neonfct = jsimd_extrgbx_gray_convert_neon;
269 neonfct = jsimd_extbgr_gray_convert_neon;
273 neonfct = jsimd_extbgrx_gray_convert_neon;
277 neonfct = jsimd_extxbgr_gray_convert_neon;
281 neonfct = jsimd_extxrgb_gray_convert_neon;
284 neonfct = jsimd_extrgb_gray_convert_neon;
288 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
292 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
293 JDIMENSION input_row, JSAMPARRAY output_buf,
296 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
298 switch (cinfo->out_color_space) {
300 neonfct = jsimd_ycc_extrgb_convert_neon;
304 neonfct = jsimd_ycc_extrgbx_convert_neon;
307 neonfct = jsimd_ycc_extbgr_convert_neon;
311 neonfct = jsimd_ycc_extbgrx_convert_neon;
315 neonfct = jsimd_ycc_extxbgr_convert_neon;
319 neonfct = jsimd_ycc_extxrgb_convert_neon;
322 neonfct = jsimd_ycc_extrgb_convert_neon;
326 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
330 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
331 JDIMENSION input_row, JSAMPARRAY output_buf,
334 jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
335 output_buf, num_rows);
339 jsimd_can_h2v2_downsample(void)
343 /* The code is optimised for these values only */
344 if (BITS_IN_JSAMPLE != 8)
348 if (sizeof(JDIMENSION) != 4)
351 if (simd_support & JSIMD_NEON)
358 jsimd_can_h2v1_downsample(void)
362 /* The code is optimised for these values only */
363 if (BITS_IN_JSAMPLE != 8)
367 if (sizeof(JDIMENSION) != 4)
370 if (simd_support & JSIMD_NEON)
377 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
378 JSAMPARRAY input_data, JSAMPARRAY output_data)
380 jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
381 compptr->v_samp_factor, compptr->width_in_blocks,
382 input_data, output_data);
386 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
387 JSAMPARRAY input_data, JSAMPARRAY output_data)
389 jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
390 compptr->v_samp_factor, compptr->width_in_blocks,
391 input_data, output_data);
395 jsimd_can_h2v2_upsample(void)
399 /* The code is optimised for these values only */
400 if (BITS_IN_JSAMPLE != 8)
402 if (sizeof(JDIMENSION) != 4)
405 if (simd_support & JSIMD_NEON)
412 jsimd_can_h2v1_upsample(void)
416 /* The code is optimised for these values only */
417 if (BITS_IN_JSAMPLE != 8)
419 if (sizeof(JDIMENSION) != 4)
421 if (simd_support & JSIMD_NEON)
428 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
429 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
431 jsimd_h2v2_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
432 input_data, output_data_ptr);
436 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
437 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
439 jsimd_h2v1_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
440 input_data, output_data_ptr);
444 jsimd_can_h2v2_fancy_upsample(void)
448 /* The code is optimised for these values only */
449 if (BITS_IN_JSAMPLE != 8)
451 if (sizeof(JDIMENSION) != 4)
454 if (simd_support & JSIMD_NEON)
461 jsimd_can_h2v1_fancy_upsample(void)
465 /* The code is optimised for these values only */
466 if (BITS_IN_JSAMPLE != 8)
468 if (sizeof(JDIMENSION) != 4)
471 if (simd_support & JSIMD_NEON)
478 jsimd_can_h1v2_fancy_upsample(void)
482 /* The code is optimised for these values only */
483 if (BITS_IN_JSAMPLE != 8)
485 if (sizeof(JDIMENSION) != 4)
488 if (simd_support & JSIMD_NEON)
495 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
496 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
498 jsimd_h2v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
499 compptr->downsampled_width, input_data,
504 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
505 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
507 jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
508 compptr->downsampled_width, input_data,
513 jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
514 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
516 jsimd_h1v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
517 compptr->downsampled_width, input_data,
522 jsimd_can_h2v2_merged_upsample(void)
526 /* The code is optimised for these values only */
527 if (BITS_IN_JSAMPLE != 8)
529 if (sizeof(JDIMENSION) != 4)
532 if (simd_support & JSIMD_NEON)
539 jsimd_can_h2v1_merged_upsample(void)
543 /* The code is optimised for these values only */
544 if (BITS_IN_JSAMPLE != 8)
546 if (sizeof(JDIMENSION) != 4)
549 if (simd_support & JSIMD_NEON)
556 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
557 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
559 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
561 switch (cinfo->out_color_space) {
563 neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
567 neonfct = jsimd_h2v2_extrgbx_merged_upsample_neon;
570 neonfct = jsimd_h2v2_extbgr_merged_upsample_neon;
574 neonfct = jsimd_h2v2_extbgrx_merged_upsample_neon;
578 neonfct = jsimd_h2v2_extxbgr_merged_upsample_neon;
582 neonfct = jsimd_h2v2_extxrgb_merged_upsample_neon;
585 neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
589 neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
593 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
594 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
596 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
598 switch (cinfo->out_color_space) {
600 neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
604 neonfct = jsimd_h2v1_extrgbx_merged_upsample_neon;
607 neonfct = jsimd_h2v1_extbgr_merged_upsample_neon;
611 neonfct = jsimd_h2v1_extbgrx_merged_upsample_neon;
615 neonfct = jsimd_h2v1_extxbgr_merged_upsample_neon;
619 neonfct = jsimd_h2v1_extxrgb_merged_upsample_neon;
622 neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
626 neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
630 jsimd_can_convsamp(void)
634 /* The code is optimised for these values only */
637 if (BITS_IN_JSAMPLE != 8)
639 if (sizeof(JDIMENSION) != 4)
641 if (sizeof(DCTELEM) != 2)
644 if (simd_support & JSIMD_NEON)
651 jsimd_can_convsamp_float(void)
657 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
660 jsimd_convsamp_neon(sample_data, start_col, workspace);
664 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
665 FAST_FLOAT *workspace)
670 jsimd_can_fdct_islow(void)
674 /* The code is optimised for these values only */
677 if (sizeof(DCTELEM) != 2)
680 if (simd_support & JSIMD_NEON)
687 jsimd_can_fdct_ifast(void)
691 /* The code is optimised for these values only */
694 if (sizeof(DCTELEM) != 2)
697 if (simd_support & JSIMD_NEON)
704 jsimd_can_fdct_float(void)
710 jsimd_fdct_islow(DCTELEM *data)
712 jsimd_fdct_islow_neon(data);
716 jsimd_fdct_ifast(DCTELEM *data)
718 jsimd_fdct_ifast_neon(data);
722 jsimd_fdct_float(FAST_FLOAT *data)
727 jsimd_can_quantize(void)
731 /* The code is optimised for these values only */
734 if (sizeof(JCOEF) != 2)
736 if (sizeof(DCTELEM) != 2)
739 if (simd_support & JSIMD_NEON)
746 jsimd_can_quantize_float(void)
752 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
754 jsimd_quantize_neon(coef_block, divisors, workspace);
758 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
759 FAST_FLOAT *workspace)
764 jsimd_can_idct_2x2(void)
768 /* The code is optimised for these values only */
771 if (sizeof(JCOEF) != 2)
773 if (BITS_IN_JSAMPLE != 8)
775 if (sizeof(JDIMENSION) != 4)
777 if (sizeof(ISLOW_MULT_TYPE) != 2)
780 if (simd_support & JSIMD_NEON)
787 jsimd_can_idct_4x4(void)
791 /* The code is optimised for these values only */
794 if (sizeof(JCOEF) != 2)
796 if (BITS_IN_JSAMPLE != 8)
798 if (sizeof(JDIMENSION) != 4)
800 if (sizeof(ISLOW_MULT_TYPE) != 2)
803 if (simd_support & JSIMD_NEON)
810 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
811 JCOEFPTR coef_block, JSAMPARRAY output_buf,
812 JDIMENSION output_col)
814 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
818 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
819 JCOEFPTR coef_block, JSAMPARRAY output_buf,
820 JDIMENSION output_col)
822 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
826 jsimd_can_idct_islow(void)
830 /* The code is optimised for these values only */
833 if (sizeof(JCOEF) != 2)
835 if (BITS_IN_JSAMPLE != 8)
837 if (sizeof(JDIMENSION) != 4)
839 if (sizeof(ISLOW_MULT_TYPE) != 2)
842 if (simd_support & JSIMD_NEON)
849 jsimd_can_idct_ifast(void)
853 /* The code is optimised for these values only */
856 if (sizeof(JCOEF) != 2)
858 if (BITS_IN_JSAMPLE != 8)
860 if (sizeof(JDIMENSION) != 4)
862 if (sizeof(IFAST_MULT_TYPE) != 2)
864 if (IFAST_SCALE_BITS != 2)
867 if (simd_support & JSIMD_NEON)
874 jsimd_can_idct_float(void)
880 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
881 JCOEFPTR coef_block, JSAMPARRAY output_buf,
882 JDIMENSION output_col)
884 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
889 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
890 JCOEFPTR coef_block, JSAMPARRAY output_buf,
891 JDIMENSION output_col)
893 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
898 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
899 JCOEFPTR coef_block, JSAMPARRAY output_buf,
900 JDIMENSION output_col)
905 jsimd_can_huff_encode_one_block(void)
911 if (sizeof(JCOEF) != 2)
914 if (simd_support & JSIMD_NEON && simd_huffman)
921 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
922 int last_dc_val, c_derived_tbl *dctbl,
923 c_derived_tbl *actbl)
925 return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
930 jsimd_can_encode_mcu_AC_first_prepare(void)
936 if (sizeof(JCOEF) != 2)
939 if (simd_support & JSIMD_NEON)
946 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
947 const int *jpeg_natural_order_start, int Sl,
948 int Al, JCOEF *values, size_t *zerobits)
950 jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
951 Sl, Al, values, zerobits);
955 jsimd_can_encode_mcu_AC_refine_prepare(void)
961 if (sizeof(JCOEF) != 2)
964 if (simd_support & JSIMD_NEON)
971 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
972 const int *jpeg_natural_order_start, int Sl,
973 int Al, JCOEF *absvalues, size_t *bits)
975 return jsimd_encode_mcu_AC_refine_prepare_neon(block,
976 jpeg_natural_order_start, Sl,
977 Al, absvalues, bits);