4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
6 * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
7 * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
8 * Copyright (C) 2019, Google LLC.
9 * Copyright (C) 2020, Arm Limited.
11 * Based on the x86 SIMD extension for IJG JPEG library,
12 * Copyright (C) 1999-2006, MIYASAKA Masaru.
13 * For conditions of distribution and use, see copyright notice in jsimdext.inc
15 * This file contains the interface between the "normal" portions
16 * of the library and the SIMD implementations when running on a
17 * 32-bit Arm architecture.
20 #define JPEG_INTERNALS
21 #include "../../../jinclude.h"
22 #include "../../../jpeglib.h"
23 #include "../../../jsimd.h"
24 #include "../../../jdct.h"
25 #include "../../../jsimddct.h"
26 #include "../../jsimd.h"
31 //Changes for JPEG GAMMA enhancement in thumbnail
35 static unsigned int simd_support = ~0;
36 static unsigned int simd_huffman = 1;
38 #if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
40 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
43 check_feature(char *buffer, char *feature)
49 if (strncmp(buffer, "Features", 8) != 0)
52 while (isspace(*buffer))
55 /* Check if 'feature' is present in the buffer as a separate word */
56 while ((p = strstr(buffer, feature))) {
57 if (p > buffer && !isspace(*(p - 1))) {
62 if (*p != 0 && !isspace(*p)) {
72 parse_proc_cpuinfo(int bufsize)
74 char *buffer = (char *)malloc(bufsize);
82 fd = fopen("/proc/cpuinfo", "r");
84 while (fgets(buffer, bufsize, fd)) {
85 if (!strchr(buffer, '\n') && !feof(fd)) {
86 /* "impossible" happened - insufficient size of the buffer! */
91 if (check_feature(buffer, "neon"))
92 simd_support |= JSIMD_NEON;
103 * Check what SIMD accelerations are supported.
105 * FIXME: This code is racy under a multi-threaded environment.
113 #if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
114 int bufsize = 1024; /* an initial guess for the line buffer size limit */
117 if (simd_support != ~0U)
122 #if defined(__ARM_NEON__)
123 simd_support |= JSIMD_NEON;
124 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
125 /* We still have a chance to use Neon regardless of globally used
126 * -mcpu/-mfpu options passed to gcc by performing runtime detection via
127 * /proc/cpuinfo parsing on linux/android */
128 while (!parse_proc_cpuinfo(bufsize)) {
130 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
136 /* Force different settings through environment variables */
137 if (!GETENV_S(env, 2, "JSIMD_FORCENEON") && !strcmp(env, "1"))
138 simd_support = JSIMD_NEON;
139 if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
141 if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
147 jsimd_can_rgb_ycc(void)
151 /* The code is optimised for these values only */
152 if (BITS_IN_JSAMPLE != 8)
154 if (sizeof(JDIMENSION) != 4)
156 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
159 if (simd_support & JSIMD_NEON)
166 jsimd_can_rgb_gray(void)
170 /* The code is optimised for these values only */
171 if (BITS_IN_JSAMPLE != 8)
173 if (sizeof(JDIMENSION) != 4)
175 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
178 if (simd_support & JSIMD_NEON)
185 jsimd_can_ycc_rgb(void)
189 /* The code is optimised for these values only */
190 if (BITS_IN_JSAMPLE != 8)
192 if (sizeof(JDIMENSION) != 4)
194 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
197 if (simd_support & JSIMD_NEON)
204 jsimd_can_ycc_rgb565(void)
208 /* The code is optimised for these values only */
209 if (BITS_IN_JSAMPLE != 8)
211 if (sizeof(JDIMENSION) != 4)
214 if (simd_support & JSIMD_NEON)
221 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
222 JSAMPIMAGE output_buf, JDIMENSION output_row,
225 void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
227 switch (cinfo->in_color_space) {
229 neonfct = jsimd_extrgb_ycc_convert_neon;
233 neonfct = jsimd_extrgbx_ycc_convert_neon;
236 neonfct = jsimd_extbgr_ycc_convert_neon;
240 neonfct = jsimd_extbgrx_ycc_convert_neon;
244 neonfct = jsimd_extxbgr_ycc_convert_neon;
248 neonfct = jsimd_extxrgb_ycc_convert_neon;
251 neonfct = jsimd_extrgb_ycc_convert_neon;
255 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
259 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
260 JSAMPIMAGE output_buf, JDIMENSION output_row,
263 void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
265 switch (cinfo->in_color_space) {
267 neonfct = jsimd_extrgb_gray_convert_neon;
271 neonfct = jsimd_extrgbx_gray_convert_neon;
274 neonfct = jsimd_extbgr_gray_convert_neon;
278 neonfct = jsimd_extbgrx_gray_convert_neon;
282 neonfct = jsimd_extxbgr_gray_convert_neon;
286 neonfct = jsimd_extxrgb_gray_convert_neon;
289 neonfct = jsimd_extrgb_gray_convert_neon;
293 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
297 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
298 JDIMENSION input_row, JSAMPARRAY output_buf,
301 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
303 switch (cinfo->out_color_space) {
305 neonfct = jsimd_ycc_extrgb_convert_neon;
309 neonfct = jsimd_ycc_extrgbx_convert_neon;
312 neonfct = jsimd_ycc_extbgr_convert_neon;
316 neonfct = jsimd_ycc_extbgrx_convert_neon;
320 neonfct = jsimd_ycc_extxbgr_convert_neon;
324 neonfct = jsimd_ycc_extxrgb_convert_neon;
327 neonfct = jsimd_ycc_extrgb_convert_neon;
332 if (simd_support & JSIMD_NEON) {
333 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
334 PickColor* pickColor = cinfo->pick_color_data;
335 if(pickColor && pickColor->enablePickColor && output_buf) {
336 int w = cinfo->output_width;
337 unsigned char *ptr = *output_buf;
338 if(pickColor->perc <= 0) {
339 w = pickColor->x2 - pickColor->x1 + 1;
340 ptr = (*output_buf) + (pickColor->x1 * 3);
342 jsimd_pick_color(ptr, pickColor, w);
346 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
351 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
352 JDIMENSION input_row, JSAMPARRAY output_buf,
355 jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
356 output_buf, num_rows);
360 jsimd_can_h2v2_downsample(void)
364 /* The code is optimised for these values only */
365 if (BITS_IN_JSAMPLE != 8)
369 if (sizeof(JDIMENSION) != 4)
372 if (simd_support & JSIMD_NEON)
379 jsimd_can_h2v1_downsample(void)
383 /* The code is optimised for these values only */
384 if (BITS_IN_JSAMPLE != 8)
388 if (sizeof(JDIMENSION) != 4)
391 if (simd_support & JSIMD_NEON)
398 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
399 JSAMPARRAY input_data, JSAMPARRAY output_data)
401 jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
402 compptr->v_samp_factor, compptr->width_in_blocks,
403 input_data, output_data);
407 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
408 JSAMPARRAY input_data, JSAMPARRAY output_data)
410 jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
411 compptr->v_samp_factor, compptr->width_in_blocks,
412 input_data, output_data);
416 jsimd_can_h2v2_upsample(void)
420 /* The code is optimised for these values only */
421 if (BITS_IN_JSAMPLE != 8)
423 if (sizeof(JDIMENSION) != 4)
426 if (simd_support & JSIMD_NEON)
433 jsimd_can_h2v1_upsample(void)
437 /* The code is optimised for these values only */
438 if (BITS_IN_JSAMPLE != 8)
440 if (sizeof(JDIMENSION) != 4)
442 if (simd_support & JSIMD_NEON)
449 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
450 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
452 jsimd_h2v2_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
453 input_data, output_data_ptr);
457 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
458 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
460 jsimd_h2v1_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
461 input_data, output_data_ptr);
465 jsimd_can_h2v2_fancy_upsample(void)
469 /* The code is optimised for these values only */
470 if (BITS_IN_JSAMPLE != 8)
472 if (sizeof(JDIMENSION) != 4)
475 if (simd_support & JSIMD_NEON)
482 jsimd_can_h2v1_fancy_upsample(void)
486 /* The code is optimised for these values only */
487 if (BITS_IN_JSAMPLE != 8)
489 if (sizeof(JDIMENSION) != 4)
492 if (simd_support & JSIMD_NEON)
499 jsimd_can_h1v2_fancy_upsample(void)
503 /* The code is optimised for these values only */
504 if (BITS_IN_JSAMPLE != 8)
506 if (sizeof(JDIMENSION) != 4)
509 if (simd_support & JSIMD_NEON)
516 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
517 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
519 jsimd_h2v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
520 compptr->downsampled_width, input_data,
525 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
526 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
528 jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
529 compptr->downsampled_width, input_data,
534 jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
535 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
537 jsimd_h1v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
538 compptr->downsampled_width, input_data,
543 jsimd_can_h2v2_merged_upsample(void)
547 /* The code is optimised for these values only */
548 if (BITS_IN_JSAMPLE != 8)
550 if (sizeof(JDIMENSION) != 4)
553 if (simd_support & JSIMD_NEON)
560 jsimd_can_h2v1_merged_upsample(void)
564 /* The code is optimised for these values only */
565 if (BITS_IN_JSAMPLE != 8)
567 if (sizeof(JDIMENSION) != 4)
570 if (simd_support & JSIMD_NEON)
577 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
578 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
580 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
582 switch (cinfo->out_color_space) {
584 neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
588 neonfct = jsimd_h2v2_extrgbx_merged_upsample_neon;
591 neonfct = jsimd_h2v2_extbgr_merged_upsample_neon;
595 neonfct = jsimd_h2v2_extbgrx_merged_upsample_neon;
599 neonfct = jsimd_h2v2_extxbgr_merged_upsample_neon;
603 neonfct = jsimd_h2v2_extxrgb_merged_upsample_neon;
606 neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
610 neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
614 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
615 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
617 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
619 switch (cinfo->out_color_space) {
621 neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
625 neonfct = jsimd_h2v1_extrgbx_merged_upsample_neon;
628 neonfct = jsimd_h2v1_extbgr_merged_upsample_neon;
632 neonfct = jsimd_h2v1_extbgrx_merged_upsample_neon;
636 neonfct = jsimd_h2v1_extxbgr_merged_upsample_neon;
640 neonfct = jsimd_h2v1_extxrgb_merged_upsample_neon;
643 neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
647 neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
651 jsimd_can_convsamp(void)
655 /* The code is optimised for these values only */
658 if (BITS_IN_JSAMPLE != 8)
660 if (sizeof(JDIMENSION) != 4)
662 if (sizeof(DCTELEM) != 2)
665 if (simd_support & JSIMD_NEON)
672 jsimd_can_convsamp_float(void)
678 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
681 jsimd_convsamp_neon(sample_data, start_col, workspace);
685 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
686 FAST_FLOAT *workspace)
691 jsimd_can_fdct_islow(void)
695 /* The code is optimised for these values only */
698 if (sizeof(DCTELEM) != 2)
701 if (simd_support & JSIMD_NEON)
708 jsimd_can_fdct_ifast(void)
712 /* The code is optimised for these values only */
715 if (sizeof(DCTELEM) != 2)
718 if (simd_support & JSIMD_NEON)
725 jsimd_can_fdct_float(void)
731 jsimd_fdct_islow(DCTELEM *data)
733 jsimd_fdct_islow_neon(data);
737 jsimd_fdct_ifast(DCTELEM *data)
739 jsimd_fdct_ifast_neon(data);
743 jsimd_fdct_float(FAST_FLOAT *data)
748 jsimd_can_quantize(void)
752 /* The code is optimised for these values only */
755 if (sizeof(JCOEF) != 2)
757 if (sizeof(DCTELEM) != 2)
760 if (simd_support & JSIMD_NEON)
767 jsimd_can_quantize_float(void)
773 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
775 jsimd_quantize_neon(coef_block, divisors, workspace);
779 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
780 FAST_FLOAT *workspace)
785 jsimd_can_idct_2x2(void)
789 /* The code is optimised for these values only */
792 if (sizeof(JCOEF) != 2)
794 if (BITS_IN_JSAMPLE != 8)
796 if (sizeof(JDIMENSION) != 4)
798 if (sizeof(ISLOW_MULT_TYPE) != 2)
801 if (simd_support & JSIMD_NEON)
808 jsimd_can_idct_4x4(void)
812 /* The code is optimised for these values only */
815 if (sizeof(JCOEF) != 2)
817 if (BITS_IN_JSAMPLE != 8)
819 if (sizeof(JDIMENSION) != 4)
821 if (sizeof(ISLOW_MULT_TYPE) != 2)
824 if (simd_support & JSIMD_NEON)
831 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
832 JCOEFPTR coef_block, JSAMPARRAY output_buf,
833 JDIMENSION output_col)
835 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
839 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
840 JCOEFPTR coef_block, JSAMPARRAY output_buf,
841 JDIMENSION output_col)
843 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
847 jsimd_can_idct_islow(void)
851 /* The code is optimised for these values only */
854 if (sizeof(JCOEF) != 2)
856 if (BITS_IN_JSAMPLE != 8)
858 if (sizeof(JDIMENSION) != 4)
860 if (sizeof(ISLOW_MULT_TYPE) != 2)
863 if (simd_support & JSIMD_NEON)
870 jsimd_can_idct_ifast(void)
874 /* The code is optimised for these values only */
877 if (sizeof(JCOEF) != 2)
879 if (BITS_IN_JSAMPLE != 8)
881 if (sizeof(JDIMENSION) != 4)
883 if (sizeof(IFAST_MULT_TYPE) != 2)
885 if (IFAST_SCALE_BITS != 2)
888 if (simd_support & JSIMD_NEON)
895 jsimd_can_idct_float(void)
901 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
902 JCOEFPTR coef_block, JSAMPARRAY output_buf,
903 JDIMENSION output_col)
905 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
910 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
911 JCOEFPTR coef_block, JSAMPARRAY output_buf,
912 JDIMENSION output_col)
914 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
919 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
920 JCOEFPTR coef_block, JSAMPARRAY output_buf,
921 JDIMENSION output_col)
926 jsimd_can_huff_encode_one_block(void)
932 if (sizeof(JCOEF) != 2)
935 if (simd_support & JSIMD_NEON && simd_huffman)
942 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
943 int last_dc_val, c_derived_tbl *dctbl,
944 c_derived_tbl *actbl)
946 return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
951 jsimd_can_encode_mcu_AC_first_prepare(void)
957 if (sizeof(JCOEF) != 2)
960 if (simd_support & JSIMD_NEON)
967 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
968 const int *jpeg_natural_order_start, int Sl,
969 int Al, JCOEF *values, size_t *zerobits)
971 jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
972 Sl, Al, values, zerobits);
976 jsimd_can_encode_mcu_AC_refine_prepare(void)
982 if (sizeof(JCOEF) != 2)
985 if (simd_support & JSIMD_NEON)
992 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
993 const int *jpeg_natural_order_start, int Sl,
994 int Al, JCOEF *absvalues, size_t *bits)
996 return jsimd_encode_mcu_AC_refine_prepare_neon(block,
997 jpeg_natural_order_start, Sl,
998 Al, absvalues, bits);