4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
6 * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, D. R. Commander.
7 * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
9 * Based on the x86 SIMD extension for IJG JPEG library,
10 * Copyright (C) 1999-2006, MIYASAKA Masaru.
11 * For conditions of distribution and use, see copyright notice in jsimdext.inc
13 * This file contains the interface between the "normal" portions
14 * of the library and the SIMD implementations when running on a
15 * 32-bit ARM architecture.
18 #define JPEG_INTERNALS
19 #include "../../jinclude.h"
20 #include "../../jpeglib.h"
21 #include "../../jsimd.h"
22 #include "../../jdct.h"
23 #include "../../jsimddct.h"
30 static unsigned int simd_support = ~0;
31 static unsigned int simd_huffman = 1;
33 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
35 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
38 check_feature(char *buffer, char *feature)
44 if (strncmp(buffer, "Features", 8) != 0)
47 while (isspace(*buffer))
50 /* Check if 'feature' is present in the buffer as a separate word */
51 while ((p = strstr(buffer, feature))) {
52 if (p > buffer && !isspace(*(p - 1))) {
57 if (*p != 0 && !isspace(*p)) {
67 parse_proc_cpuinfo(int bufsize)
69 char *buffer = (char *)malloc(bufsize);
77 fd = fopen("/proc/cpuinfo", "r");
79 while (fgets(buffer, bufsize, fd)) {
80 if (!strchr(buffer, '\n') && !feof(fd)) {
81 /* "impossible" happened - insufficient size of the buffer! */
86 if (check_feature(buffer, "neon"))
87 simd_support |= JSIMD_NEON;
98 * Check what SIMD accelerations are supported.
100 * FIXME: This code is racy under a multi-threaded environment.
108 #if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
109 int bufsize = 1024; /* an initial guess for the line buffer size limit */
112 if (simd_support != ~0U)
117 #if defined(__ARM_NEON__)
118 simd_support |= JSIMD_NEON;
119 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
120 /* We still have a chance to use NEON regardless of globally used
121 * -mcpu/-mfpu options passed to gcc by performing runtime detection via
122 * /proc/cpuinfo parsing on linux/android */
123 while (!parse_proc_cpuinfo(bufsize)) {
125 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
131 /* Force different settings through environment variables */
132 env = getenv("JSIMD_FORCENEON");
133 if ((env != NULL) && (strcmp(env, "1") == 0))
134 simd_support = JSIMD_NEON;
135 env = getenv("JSIMD_FORCENONE");
136 if ((env != NULL) && (strcmp(env, "1") == 0))
138 env = getenv("JSIMD_NOHUFFENC");
139 if ((env != NULL) && (strcmp(env, "1") == 0))
145 jsimd_can_rgb_ycc(void)
149 /* The code is optimised for these values only */
150 if (BITS_IN_JSAMPLE != 8)
152 if (sizeof(JDIMENSION) != 4)
154 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
157 if (simd_support & JSIMD_NEON)
164 jsimd_can_rgb_gray(void)
170 jsimd_can_ycc_rgb(void)
174 /* The code is optimised for these values only */
175 if (BITS_IN_JSAMPLE != 8)
177 if (sizeof(JDIMENSION) != 4)
179 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
182 if (simd_support & JSIMD_NEON)
189 jsimd_can_ycc_rgb565(void)
193 /* The code is optimised for these values only */
194 if (BITS_IN_JSAMPLE != 8)
196 if (sizeof(JDIMENSION) != 4)
199 if (simd_support & JSIMD_NEON)
206 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
207 JSAMPIMAGE output_buf, JDIMENSION output_row,
210 void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
212 switch (cinfo->in_color_space) {
214 neonfct = jsimd_extrgb_ycc_convert_neon;
218 neonfct = jsimd_extrgbx_ycc_convert_neon;
221 neonfct = jsimd_extbgr_ycc_convert_neon;
225 neonfct = jsimd_extbgrx_ycc_convert_neon;
229 neonfct = jsimd_extxbgr_ycc_convert_neon;
233 neonfct = jsimd_extxrgb_ycc_convert_neon;
236 neonfct = jsimd_extrgb_ycc_convert_neon;
240 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
244 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
245 JSAMPIMAGE output_buf, JDIMENSION output_row,
251 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
252 JDIMENSION input_row, JSAMPARRAY output_buf,
255 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
257 switch (cinfo->out_color_space) {
259 neonfct = jsimd_ycc_extrgb_convert_neon;
263 neonfct = jsimd_ycc_extrgbx_convert_neon;
266 neonfct = jsimd_ycc_extbgr_convert_neon;
270 neonfct = jsimd_ycc_extbgrx_convert_neon;
274 neonfct = jsimd_ycc_extxbgr_convert_neon;
278 neonfct = jsimd_ycc_extxrgb_convert_neon;
281 neonfct = jsimd_ycc_extrgb_convert_neon;
285 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
289 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
290 JDIMENSION input_row, JSAMPARRAY output_buf,
293 jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
294 output_buf, num_rows);
298 jsimd_can_h2v2_downsample(void)
304 jsimd_can_h2v1_downsample(void)
310 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
311 JSAMPARRAY input_data, JSAMPARRAY output_data)
316 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
317 JSAMPARRAY input_data, JSAMPARRAY output_data)
322 jsimd_can_h2v2_upsample(void)
328 jsimd_can_h2v1_upsample(void)
334 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
335 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
340 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
341 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
346 jsimd_can_h2v2_fancy_upsample(void)
352 jsimd_can_h2v1_fancy_upsample(void)
356 /* The code is optimised for these values only */
357 if (BITS_IN_JSAMPLE != 8)
359 if (sizeof(JDIMENSION) != 4)
362 if (simd_support & JSIMD_NEON)
369 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
370 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
375 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
376 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
378 jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
379 compptr->downsampled_width, input_data,
384 jsimd_can_h2v2_merged_upsample(void)
390 jsimd_can_h2v1_merged_upsample(void)
396 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
397 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
402 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
403 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
408 jsimd_can_convsamp(void)
412 /* The code is optimised for these values only */
415 if (BITS_IN_JSAMPLE != 8)
417 if (sizeof(JDIMENSION) != 4)
419 if (sizeof(DCTELEM) != 2)
422 if (simd_support & JSIMD_NEON)
429 jsimd_can_convsamp_float(void)
435 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
438 jsimd_convsamp_neon(sample_data, start_col, workspace);
442 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
443 FAST_FLOAT *workspace)
448 jsimd_can_fdct_islow(void)
454 jsimd_can_fdct_ifast(void)
458 /* The code is optimised for these values only */
461 if (sizeof(DCTELEM) != 2)
464 if (simd_support & JSIMD_NEON)
471 jsimd_can_fdct_float(void)
477 jsimd_fdct_islow(DCTELEM *data)
482 jsimd_fdct_ifast(DCTELEM *data)
484 jsimd_fdct_ifast_neon(data);
488 jsimd_fdct_float(FAST_FLOAT *data)
493 jsimd_can_quantize(void)
497 /* The code is optimised for these values only */
500 if (sizeof(JCOEF) != 2)
502 if (sizeof(DCTELEM) != 2)
505 if (simd_support & JSIMD_NEON)
512 jsimd_can_quantize_float(void)
518 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
520 jsimd_quantize_neon(coef_block, divisors, workspace);
524 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
525 FAST_FLOAT *workspace)
530 jsimd_can_idct_2x2(void)
534 /* The code is optimised for these values only */
537 if (sizeof(JCOEF) != 2)
539 if (BITS_IN_JSAMPLE != 8)
541 if (sizeof(JDIMENSION) != 4)
543 if (sizeof(ISLOW_MULT_TYPE) != 2)
546 if (simd_support & JSIMD_NEON)
553 jsimd_can_idct_4x4(void)
557 /* The code is optimised for these values only */
560 if (sizeof(JCOEF) != 2)
562 if (BITS_IN_JSAMPLE != 8)
564 if (sizeof(JDIMENSION) != 4)
566 if (sizeof(ISLOW_MULT_TYPE) != 2)
569 if (simd_support & JSIMD_NEON)
576 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
577 JCOEFPTR coef_block, JSAMPARRAY output_buf,
578 JDIMENSION output_col)
580 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
584 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
585 JCOEFPTR coef_block, JSAMPARRAY output_buf,
586 JDIMENSION output_col)
588 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
592 jsimd_can_idct_islow(void)
596 /* The code is optimised for these values only */
599 if (sizeof(JCOEF) != 2)
601 if (BITS_IN_JSAMPLE != 8)
603 if (sizeof(JDIMENSION) != 4)
605 if (sizeof(ISLOW_MULT_TYPE) != 2)
608 if (simd_support & JSIMD_NEON)
615 jsimd_can_idct_ifast(void)
619 /* The code is optimised for these values only */
622 if (sizeof(JCOEF) != 2)
624 if (BITS_IN_JSAMPLE != 8)
626 if (sizeof(JDIMENSION) != 4)
628 if (sizeof(IFAST_MULT_TYPE) != 2)
630 if (IFAST_SCALE_BITS != 2)
633 if (simd_support & JSIMD_NEON)
640 jsimd_can_idct_float(void)
646 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
647 JCOEFPTR coef_block, JSAMPARRAY output_buf,
648 JDIMENSION output_col)
650 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
655 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
656 JCOEFPTR coef_block, JSAMPARRAY output_buf,
657 JDIMENSION output_col)
659 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
664 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
665 JCOEFPTR coef_block, JSAMPARRAY output_buf,
666 JDIMENSION output_col)
671 jsimd_can_huff_encode_one_block(void)
677 if (sizeof(JCOEF) != 2)
680 if (simd_support & JSIMD_NEON && simd_huffman)
687 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
688 int last_dc_val, c_derived_tbl *dctbl,
689 c_derived_tbl *actbl)
691 return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
696 jsimd_can_encode_mcu_AC_first_prepare(void)
702 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
703 const int *jpeg_natural_order_start, int Sl,
704 int Al, JCOEF *values, size_t *zerobits)
709 jsimd_can_encode_mcu_AC_refine_prepare(void)
715 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
716 const int *jpeg_natural_order_start, int Sl,
717 int Al, JCOEF *absvalues, size_t *bits)