4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
6 * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, D. R. Commander.
7 * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
8 * Copyright (C) 2019, Google LLC.
10 * Based on the x86 SIMD extension for IJG JPEG library,
11 * Copyright (C) 1999-2006, MIYASAKA Masaru.
12 * For conditions of distribution and use, see copyright notice in jsimdext.inc
14 * This file contains the interface between the "normal" portions
15 * of the library and the SIMD implementations when running on a
16 * 32-bit Arm architecture.
19 #define JPEG_INTERNALS
20 #include "../../jinclude.h"
21 #include "../../jpeglib.h"
22 #include "../../jsimd.h"
23 #include "../../jdct.h"
24 #include "../../jsimddct.h"
32 //Changes for JPEG GAMMA enhancement in thumbnail
36 static unsigned int simd_support = ~0;
37 static unsigned int simd_huffman = 1;
39 #if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
41 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
44 check_feature(char *buffer, char *feature)
50 if (strncmp(buffer, "Features", 8) != 0)
53 while (isspace(*buffer))
56 /* Check if 'feature' is present in the buffer as a separate word */
57 while ((p = strstr(buffer, feature))) {
58 if (p > buffer && !isspace(*(p - 1))) {
63 if (*p != 0 && !isspace(*p)) {
73 parse_proc_cpuinfo(int bufsize)
75 char *buffer = (char *)malloc(bufsize);
83 fd = fopen("/proc/cpuinfo", "r");
85 while (fgets(buffer, bufsize, fd)) {
86 if (!strchr(buffer, '\n') && !feof(fd)) {
87 /* "impossible" happened - insufficient size of the buffer! */
92 if (check_feature(buffer, "neon"))
93 simd_support |= JSIMD_NEON;
104 * Check what SIMD accelerations are supported.
106 * FIXME: This code is racy under a multi-threaded environment.
114 #if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
115 int bufsize = 1024; /* an initial guess for the line buffer size limit */
118 if (simd_support != ~0U)
123 #if defined(__ARM_NEON__)
124 simd_support |= JSIMD_NEON;
125 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
126 /* We still have a chance to use Neon regardless of globally used
127 * -mcpu/-mfpu options passed to gcc by performing runtime detection via
128 * /proc/cpuinfo parsing on linux/android */
129 while (!parse_proc_cpuinfo(bufsize)) {
131 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
137 /* Force different settings through environment variables */
138 env = getenv("JSIMD_FORCENEON");
139 if ((env != NULL) && (strcmp(env, "1") == 0))
140 simd_support = JSIMD_NEON;
141 env = getenv("JSIMD_FORCENONE");
142 if ((env != NULL) && (strcmp(env, "1") == 0))
144 env = getenv("JSIMD_NOHUFFENC");
145 if ((env != NULL) && (strcmp(env, "1") == 0))
151 jsimd_can_rgb_ycc(void)
155 /* The code is optimised for these values only */
156 if (BITS_IN_JSAMPLE != 8)
158 if (sizeof(JDIMENSION) != 4)
160 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
163 if (simd_support & JSIMD_NEON)
170 jsimd_can_rgb_gray(void)
176 jsimd_can_ycc_rgb(void)
180 /* The code is optimised for these values only */
181 if (BITS_IN_JSAMPLE != 8)
183 if (sizeof(JDIMENSION) != 4)
185 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
188 if (simd_support & JSIMD_NEON)
195 jsimd_can_ycc_rgb565(void)
199 /* The code is optimised for these values only */
200 if (BITS_IN_JSAMPLE != 8)
202 if (sizeof(JDIMENSION) != 4)
205 if (simd_support & JSIMD_NEON)
212 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
213 JSAMPIMAGE output_buf, JDIMENSION output_row,
216 void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
218 switch (cinfo->in_color_space) {
220 neonfct = jsimd_extrgb_ycc_convert_neon;
224 neonfct = jsimd_extrgbx_ycc_convert_neon;
227 neonfct = jsimd_extbgr_ycc_convert_neon;
231 neonfct = jsimd_extbgrx_ycc_convert_neon;
235 neonfct = jsimd_extxbgr_ycc_convert_neon;
239 neonfct = jsimd_extxrgb_ycc_convert_neon;
242 neonfct = jsimd_extrgb_ycc_convert_neon;
246 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
250 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
251 JSAMPIMAGE output_buf, JDIMENSION output_row,
257 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
258 JDIMENSION input_row, JSAMPARRAY output_buf,
261 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
263 switch (cinfo->out_color_space) {
265 neonfct = jsimd_ycc_extrgb_convert_neon;
269 neonfct = jsimd_ycc_extrgbx_convert_neon;
272 neonfct = jsimd_ycc_extbgr_convert_neon;
276 neonfct = jsimd_ycc_extbgrx_convert_neon;
280 neonfct = jsimd_ycc_extxbgr_convert_neon;
284 neonfct = jsimd_ycc_extxrgb_convert_neon;
287 neonfct = jsimd_ycc_extrgb_convert_neon;
292 if (simd_support & JSIMD_NEON) {
293 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
294 PickColor* pickColor = cinfo->pick_color_data;
295 if(pickColor && pickColor->enablePickColor && output_buf) {
296 int w = cinfo->output_width;
297 unsigned char *ptr = *output_buf;
298 if(pickColor->perc <= 0) {
299 w = pickColor->x2 - pickColor->x1 + 1;
300 ptr = (*output_buf) + (pickColor->x1 * 3);
302 jsimd_pick_color(ptr, pickColor, w);
306 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
311 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
312 JDIMENSION input_row, JSAMPARRAY output_buf,
315 jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
316 output_buf, num_rows);
320 jsimd_can_h2v2_downsample(void)
326 jsimd_can_h2v1_downsample(void)
332 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
333 JSAMPARRAY input_data, JSAMPARRAY output_data)
338 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
339 JSAMPARRAY input_data, JSAMPARRAY output_data)
344 jsimd_can_h2v2_upsample(void)
350 jsimd_can_h2v1_upsample(void)
356 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
357 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
362 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
363 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
368 jsimd_can_h2v2_fancy_upsample(void)
374 jsimd_can_h2v1_fancy_upsample(void)
378 /* The code is optimised for these values only */
379 if (BITS_IN_JSAMPLE != 8)
381 if (sizeof(JDIMENSION) != 4)
384 if (simd_support & JSIMD_NEON)
391 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
392 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
397 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
398 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
400 jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
401 compptr->downsampled_width, input_data,
406 jsimd_can_h2v2_merged_upsample(void)
412 jsimd_can_h2v1_merged_upsample(void)
418 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
419 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
424 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
425 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
430 jsimd_can_convsamp(void)
434 /* The code is optimised for these values only */
437 if (BITS_IN_JSAMPLE != 8)
439 if (sizeof(JDIMENSION) != 4)
441 if (sizeof(DCTELEM) != 2)
444 if (simd_support & JSIMD_NEON)
451 jsimd_can_convsamp_float(void)
457 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
460 jsimd_convsamp_neon(sample_data, start_col, workspace);
464 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
465 FAST_FLOAT *workspace)
470 jsimd_can_fdct_islow(void)
476 jsimd_can_fdct_ifast(void)
480 /* The code is optimised for these values only */
483 if (sizeof(DCTELEM) != 2)
486 if (simd_support & JSIMD_NEON)
493 jsimd_can_fdct_float(void)
499 jsimd_fdct_islow(DCTELEM *data)
504 jsimd_fdct_ifast(DCTELEM *data)
506 jsimd_fdct_ifast_neon(data);
510 jsimd_fdct_float(FAST_FLOAT *data)
515 jsimd_can_quantize(void)
519 /* The code is optimised for these values only */
522 if (sizeof(JCOEF) != 2)
524 if (sizeof(DCTELEM) != 2)
527 if (simd_support & JSIMD_NEON)
534 jsimd_can_quantize_float(void)
540 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
542 jsimd_quantize_neon(coef_block, divisors, workspace);
546 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
547 FAST_FLOAT *workspace)
552 jsimd_can_idct_2x2(void)
556 /* The code is optimised for these values only */
559 if (sizeof(JCOEF) != 2)
561 if (BITS_IN_JSAMPLE != 8)
563 if (sizeof(JDIMENSION) != 4)
565 if (sizeof(ISLOW_MULT_TYPE) != 2)
568 if (simd_support & JSIMD_NEON)
575 jsimd_can_idct_4x4(void)
579 /* The code is optimised for these values only */
582 if (sizeof(JCOEF) != 2)
584 if (BITS_IN_JSAMPLE != 8)
586 if (sizeof(JDIMENSION) != 4)
588 if (sizeof(ISLOW_MULT_TYPE) != 2)
591 if (simd_support & JSIMD_NEON)
598 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
599 JCOEFPTR coef_block, JSAMPARRAY output_buf,
600 JDIMENSION output_col)
602 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
606 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
607 JCOEFPTR coef_block, JSAMPARRAY output_buf,
608 JDIMENSION output_col)
610 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
614 jsimd_can_idct_islow(void)
618 /* The code is optimised for these values only */
621 if (sizeof(JCOEF) != 2)
623 if (BITS_IN_JSAMPLE != 8)
625 if (sizeof(JDIMENSION) != 4)
627 if (sizeof(ISLOW_MULT_TYPE) != 2)
630 if (simd_support & JSIMD_NEON)
637 jsimd_can_idct_ifast(void)
641 /* The code is optimised for these values only */
644 if (sizeof(JCOEF) != 2)
646 if (BITS_IN_JSAMPLE != 8)
648 if (sizeof(JDIMENSION) != 4)
650 if (sizeof(IFAST_MULT_TYPE) != 2)
652 if (IFAST_SCALE_BITS != 2)
655 if (simd_support & JSIMD_NEON)
662 jsimd_can_idct_float(void)
668 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
669 JCOEFPTR coef_block, JSAMPARRAY output_buf,
670 JDIMENSION output_col)
672 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
677 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
678 JCOEFPTR coef_block, JSAMPARRAY output_buf,
679 JDIMENSION output_col)
681 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
686 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
687 JCOEFPTR coef_block, JSAMPARRAY output_buf,
688 JDIMENSION output_col)
693 jsimd_can_huff_encode_one_block(void)
699 if (sizeof(JCOEF) != 2)
702 if (simd_support & JSIMD_NEON && simd_huffman)
709 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
710 int last_dc_val, c_derived_tbl *dctbl,
711 c_derived_tbl *actbl)
713 return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
718 jsimd_can_encode_mcu_AC_first_prepare(void)
724 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
725 const int *jpeg_natural_order_start, int Sl,
726 int Al, JCOEF *values, size_t *zerobits)
731 jsimd_can_encode_mcu_AC_refine_prepare(void)
737 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
738 const int *jpeg_natural_order_start, int Sl,
739 int Al, JCOEF *absvalues, size_t *bits)