4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
6 * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, D. R. Commander.
7 * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
9 * Based on the x86 SIMD extension for IJG JPEG library,
10 * Copyright (C) 1999-2006, MIYASAKA Masaru.
11 * For conditions of distribution and use, see copyright notice in jsimdext.inc
13 * This file contains the interface between the "normal" portions
14 * of the library and the SIMD implementations when running on a
15 * 32-bit ARM architecture.
18 #define JPEG_INTERNALS
19 #include "../../jinclude.h"
20 #include "../../jpeglib.h"
21 #include "../../jsimd.h"
22 #include "../../jdct.h"
23 #include "../../jsimddct.h"
31 //Changes for JPEG GAMMA enhancement in thumbnail
35 static unsigned int simd_support = ~0;
36 static unsigned int simd_huffman = 1;
38 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
40 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
43 check_feature(char *buffer, char *feature)
49 if (strncmp(buffer, "Features", 8) != 0)
52 while (isspace(*buffer))
55 /* Check if 'feature' is present in the buffer as a separate word */
56 while ((p = strstr(buffer, feature))) {
57 if (p > buffer && !isspace(*(p - 1))) {
62 if (*p != 0 && !isspace(*p)) {
72 parse_proc_cpuinfo(int bufsize)
74 char *buffer = (char *)malloc(bufsize);
82 fd = fopen("/proc/cpuinfo", "r");
84 while (fgets(buffer, bufsize, fd)) {
85 if (!strchr(buffer, '\n') && !feof(fd)) {
86 /* "impossible" happened - insufficient size of the buffer! */
91 if (check_feature(buffer, "neon"))
92 simd_support |= JSIMD_NEON;
103 * Check what SIMD accelerations are supported.
105 * FIXME: This code is racy under a multi-threaded environment.
113 #if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
114 int bufsize = 1024; /* an initial guess for the line buffer size limit */
117 if (simd_support != ~0U)
122 #if defined(__ARM_NEON__)
123 simd_support |= JSIMD_NEON;
124 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
125 /* We still have a chance to use NEON regardless of globally used
126 * -mcpu/-mfpu options passed to gcc by performing runtime detection via
127 * /proc/cpuinfo parsing on linux/android */
128 while (!parse_proc_cpuinfo(bufsize)) {
130 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
136 /* Force different settings through environment variables */
137 env = getenv("JSIMD_FORCENEON");
138 if ((env != NULL) && (strcmp(env, "1") == 0))
139 simd_support = JSIMD_NEON;
140 env = getenv("JSIMD_FORCENONE");
141 if ((env != NULL) && (strcmp(env, "1") == 0))
143 env = getenv("JSIMD_NOHUFFENC");
144 if ((env != NULL) && (strcmp(env, "1") == 0))
150 jsimd_can_rgb_ycc(void)
154 /* The code is optimised for these values only */
155 if (BITS_IN_JSAMPLE != 8)
157 if (sizeof(JDIMENSION) != 4)
159 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
162 if (simd_support & JSIMD_NEON)
169 jsimd_can_rgb_gray(void)
175 jsimd_can_ycc_rgb(void)
179 /* The code is optimised for these values only */
180 if (BITS_IN_JSAMPLE != 8)
182 if (sizeof(JDIMENSION) != 4)
184 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
187 if (simd_support & JSIMD_NEON)
194 jsimd_can_ycc_rgb565(void)
198 /* The code is optimised for these values only */
199 if (BITS_IN_JSAMPLE != 8)
201 if (sizeof(JDIMENSION) != 4)
204 if (simd_support & JSIMD_NEON)
211 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
212 JSAMPIMAGE output_buf, JDIMENSION output_row,
215 void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
217 switch (cinfo->in_color_space) {
219 neonfct = jsimd_extrgb_ycc_convert_neon;
223 neonfct = jsimd_extrgbx_ycc_convert_neon;
226 neonfct = jsimd_extbgr_ycc_convert_neon;
230 neonfct = jsimd_extbgrx_ycc_convert_neon;
234 neonfct = jsimd_extxbgr_ycc_convert_neon;
238 neonfct = jsimd_extxrgb_ycc_convert_neon;
241 neonfct = jsimd_extrgb_ycc_convert_neon;
245 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
249 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
250 JSAMPIMAGE output_buf, JDIMENSION output_row,
256 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
257 JDIMENSION input_row, JSAMPARRAY output_buf,
260 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
262 switch (cinfo->out_color_space) {
264 neonfct = jsimd_ycc_extrgb_convert_neon;
268 neonfct = jsimd_ycc_extrgbx_convert_neon;
271 neonfct = jsimd_ycc_extbgr_convert_neon;
275 neonfct = jsimd_ycc_extbgrx_convert_neon;
279 neonfct = jsimd_ycc_extxbgr_convert_neon;
283 neonfct = jsimd_ycc_extxrgb_convert_neon;
286 neonfct = jsimd_ycc_extrgb_convert_neon;
291 if (simd_support & JSIMD_NEON) {
292 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
293 PickColor* pickColor = cinfo->pick_color_data;
294 if(pickColor && pickColor->enablePickColor && output_buf) {
295 int w = cinfo->output_width;
296 unsigned char *ptr = *output_buf;
297 if(pickColor->perc <= 0) {
298 w = pickColor->x2 - pickColor->x1 + 1;
299 ptr = (*output_buf) + (pickColor->x1 * 3);
301 jsimd_pick_color(ptr, pickColor, w);
305 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
310 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
311 JDIMENSION input_row, JSAMPARRAY output_buf,
314 jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
315 output_buf, num_rows);
319 jsimd_can_h2v2_downsample(void)
325 jsimd_can_h2v1_downsample(void)
331 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
332 JSAMPARRAY input_data, JSAMPARRAY output_data)
337 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
338 JSAMPARRAY input_data, JSAMPARRAY output_data)
343 jsimd_can_h2v2_upsample(void)
349 jsimd_can_h2v1_upsample(void)
355 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
356 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
361 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
362 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
367 jsimd_can_h2v2_fancy_upsample(void)
373 jsimd_can_h2v1_fancy_upsample(void)
377 /* The code is optimised for these values only */
378 if (BITS_IN_JSAMPLE != 8)
380 if (sizeof(JDIMENSION) != 4)
383 if (simd_support & JSIMD_NEON)
390 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
391 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
396 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
397 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
399 jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
400 compptr->downsampled_width, input_data,
405 jsimd_can_h2v2_merged_upsample(void)
411 jsimd_can_h2v1_merged_upsample(void)
417 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
418 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
423 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
424 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
429 jsimd_can_convsamp(void)
433 /* The code is optimised for these values only */
436 if (BITS_IN_JSAMPLE != 8)
438 if (sizeof(JDIMENSION) != 4)
440 if (sizeof(DCTELEM) != 2)
443 if (simd_support & JSIMD_NEON)
450 jsimd_can_convsamp_float(void)
456 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
459 jsimd_convsamp_neon(sample_data, start_col, workspace);
463 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
464 FAST_FLOAT *workspace)
469 jsimd_can_fdct_islow(void)
475 jsimd_can_fdct_ifast(void)
479 /* The code is optimised for these values only */
482 if (sizeof(DCTELEM) != 2)
485 if (simd_support & JSIMD_NEON)
492 jsimd_can_fdct_float(void)
498 jsimd_fdct_islow(DCTELEM *data)
503 jsimd_fdct_ifast(DCTELEM *data)
505 jsimd_fdct_ifast_neon(data);
509 jsimd_fdct_float(FAST_FLOAT *data)
514 jsimd_can_quantize(void)
518 /* The code is optimised for these values only */
521 if (sizeof(JCOEF) != 2)
523 if (sizeof(DCTELEM) != 2)
526 if (simd_support & JSIMD_NEON)
533 jsimd_can_quantize_float(void)
539 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
541 jsimd_quantize_neon(coef_block, divisors, workspace);
545 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
546 FAST_FLOAT *workspace)
551 jsimd_can_idct_2x2(void)
555 /* The code is optimised for these values only */
558 if (sizeof(JCOEF) != 2)
560 if (BITS_IN_JSAMPLE != 8)
562 if (sizeof(JDIMENSION) != 4)
564 if (sizeof(ISLOW_MULT_TYPE) != 2)
567 if (simd_support & JSIMD_NEON)
574 jsimd_can_idct_4x4(void)
578 /* The code is optimised for these values only */
581 if (sizeof(JCOEF) != 2)
583 if (BITS_IN_JSAMPLE != 8)
585 if (sizeof(JDIMENSION) != 4)
587 if (sizeof(ISLOW_MULT_TYPE) != 2)
590 if (simd_support & JSIMD_NEON)
597 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
598 JCOEFPTR coef_block, JSAMPARRAY output_buf,
599 JDIMENSION output_col)
601 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
605 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
606 JCOEFPTR coef_block, JSAMPARRAY output_buf,
607 JDIMENSION output_col)
609 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
613 jsimd_can_idct_islow(void)
617 /* The code is optimised for these values only */
620 if (sizeof(JCOEF) != 2)
622 if (BITS_IN_JSAMPLE != 8)
624 if (sizeof(JDIMENSION) != 4)
626 if (sizeof(ISLOW_MULT_TYPE) != 2)
629 if (simd_support & JSIMD_NEON)
636 jsimd_can_idct_ifast(void)
640 /* The code is optimised for these values only */
643 if (sizeof(JCOEF) != 2)
645 if (BITS_IN_JSAMPLE != 8)
647 if (sizeof(JDIMENSION) != 4)
649 if (sizeof(IFAST_MULT_TYPE) != 2)
651 if (IFAST_SCALE_BITS != 2)
654 if (simd_support & JSIMD_NEON)
661 jsimd_can_idct_float(void)
667 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
668 JCOEFPTR coef_block, JSAMPARRAY output_buf,
669 JDIMENSION output_col)
671 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
676 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
677 JCOEFPTR coef_block, JSAMPARRAY output_buf,
678 JDIMENSION output_col)
680 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
685 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
686 JCOEFPTR coef_block, JSAMPARRAY output_buf,
687 JDIMENSION output_col)
692 jsimd_can_huff_encode_one_block(void)
698 if (sizeof(JCOEF) != 2)
701 if (simd_support & JSIMD_NEON && simd_huffman)
708 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
709 int last_dc_val, c_derived_tbl *dctbl,
710 c_derived_tbl *actbl)
712 return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
717 jsimd_can_encode_mcu_AC_first_prepare(void)
723 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
724 const int *jpeg_natural_order_start, int Sl,
725 int Al, JCOEF *values, size_t *zerobits)
730 jsimd_can_encode_mcu_AC_refine_prepare(void)
736 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
737 const int *jpeg_natural_order_start, int Sl,
738 int Al, JCOEF *absvalues, size_t *bits)