4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright 2009-2011 D. R. Commander
7 * Based on the x86 SIMD extension for IJG JPEG library,
8 * Copyright (C) 1999-2006, MIYASAKA Masaru.
9 * For conditions of distribution and use, see copyright notice in jsimdext.inc
11 * This file contains the interface between the "normal" portions
12 * of the library and the SIMD implementations when running on
15 * Based on the stubs from 'jsimd_none.c'
18 #define JPEG_INTERNALS
19 #include "../jinclude.h"
20 #include "../jpeglib.h"
23 #include "../jsimddct.h"
30 static unsigned int simd_support = ~0;
32 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
34 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
37 check_feature (char *buffer, char *feature)
42 if (strncmp(buffer, "Features", 8) != 0)
45 while (isspace(*buffer))
48 /* Check if 'feature' is present in the buffer as a separate word */
49 while ((p = strstr(buffer, feature))) {
50 if (p > buffer && !isspace(*(p - 1))) {
55 if (*p != 0 && !isspace(*p)) {
65 parse_proc_cpuinfo (int bufsize)
67 char *buffer = (char *)malloc(bufsize);
74 fd = fopen("/proc/cpuinfo", "r");
76 while (fgets(buffer, bufsize, fd)) {
77 if (!strchr(buffer, '\n') && !feof(fd)) {
78 /* "impossible" happened - insufficient size of the buffer! */
83 if (check_feature(buffer, "neon"))
84 simd_support |= JSIMD_ARM_NEON;
95 * Check what SIMD accelerations are supported.
97 * FIXME: This code is racy under a multi-threaded environment.
103 #if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
104 int bufsize = 1024; /* an initial guess for the line buffer size limit */
107 if (simd_support != ~0U)
112 #if defined(__ARM_NEON__)
113 simd_support |= JSIMD_ARM_NEON;
114 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
115 /* We still have a chance to use NEON regardless of globally used
116 * -mcpu/-mfpu options passed to gcc by performing runtime detection via
117 * /proc/cpuinfo parsing on linux/android */
118 while (!parse_proc_cpuinfo(bufsize)) {
120 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
125 /* Force different settings through environment variables */
126 env = getenv("JSIMD_FORCE_ARM_NEON");
127 if ((env != NULL) && (strcmp(env, "1") == 0))
128 simd_support &= JSIMD_ARM_NEON;
129 env = getenv("JSIMD_FORCE_NO_SIMD");
130 if ((env != NULL) && (strcmp(env, "1") == 0))
135 jsimd_can_rgb_ycc (void)
139 /* The code is optimised for these values only */
140 if (BITS_IN_JSAMPLE != 8)
142 if (sizeof(JDIMENSION) != 4)
144 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
147 if (simd_support & JSIMD_ARM_NEON)
154 jsimd_can_rgb_gray (void)
162 jsimd_can_ycc_rgb (void)
166 /* The code is optimised for these values only */
167 if (BITS_IN_JSAMPLE != 8)
169 if (sizeof(JDIMENSION) != 4)
171 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
173 if (simd_support & JSIMD_ARM_NEON)
180 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
181 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
182 JDIMENSION output_row, int num_rows)
184 void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
186 switch(cinfo->in_color_space)
189 neonfct=jsimd_extrgb_ycc_convert_neon;
193 neonfct=jsimd_extrgbx_ycc_convert_neon;
196 neonfct=jsimd_extbgr_ycc_convert_neon;
200 neonfct=jsimd_extbgrx_ycc_convert_neon;
204 neonfct=jsimd_extxbgr_ycc_convert_neon;
208 neonfct=jsimd_extxrgb_ycc_convert_neon;
211 neonfct=jsimd_extrgb_ycc_convert_neon;
215 if (simd_support & JSIMD_ARM_NEON)
216 neonfct(cinfo->image_width, input_buf,
217 output_buf, output_row, num_rows);
221 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
222 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
223 JDIMENSION output_row, int num_rows)
228 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
229 JSAMPIMAGE input_buf, JDIMENSION input_row,
230 JSAMPARRAY output_buf, int num_rows)
232 void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
234 switch(cinfo->out_color_space)
237 neonfct=jsimd_ycc_extrgb_convert_neon;
241 neonfct=jsimd_ycc_extrgbx_convert_neon;
244 neonfct=jsimd_ycc_extbgr_convert_neon;
248 neonfct=jsimd_ycc_extbgrx_convert_neon;
252 neonfct=jsimd_ycc_extxbgr_convert_neon;
256 neonfct=jsimd_ycc_extxrgb_convert_neon;
259 neonfct=jsimd_ycc_extrgb_convert_neon;
263 if (simd_support & JSIMD_ARM_NEON)
264 neonfct(cinfo->output_width, input_buf,
265 input_row, output_buf, num_rows);
269 jsimd_can_h2v2_downsample (void)
277 jsimd_can_h2v1_downsample (void)
285 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
286 JSAMPARRAY input_data, JSAMPARRAY output_data)
291 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
292 JSAMPARRAY input_data, JSAMPARRAY output_data)
297 jsimd_can_h2v2_upsample (void)
305 jsimd_can_h2v1_upsample (void)
313 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
314 jpeg_component_info * compptr,
315 JSAMPARRAY input_data,
316 JSAMPARRAY * output_data_ptr)
321 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
322 jpeg_component_info * compptr,
323 JSAMPARRAY input_data,
324 JSAMPARRAY * output_data_ptr)
329 jsimd_can_h2v2_fancy_upsample (void)
337 jsimd_can_h2v1_fancy_upsample (void)
345 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
346 jpeg_component_info * compptr,
347 JSAMPARRAY input_data,
348 JSAMPARRAY * output_data_ptr)
353 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
354 jpeg_component_info * compptr,
355 JSAMPARRAY input_data,
356 JSAMPARRAY * output_data_ptr)
361 jsimd_can_h2v2_merged_upsample (void)
369 jsimd_can_h2v1_merged_upsample (void)
377 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
378 JSAMPIMAGE input_buf,
379 JDIMENSION in_row_group_ctr,
380 JSAMPARRAY output_buf)
385 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
386 JSAMPIMAGE input_buf,
387 JDIMENSION in_row_group_ctr,
388 JSAMPARRAY output_buf)
393 jsimd_can_convsamp (void)
397 /* The code is optimised for these values only */
400 if (BITS_IN_JSAMPLE != 8)
402 if (sizeof(JDIMENSION) != 4)
404 if (sizeof(DCTELEM) != 2)
407 if (simd_support & JSIMD_ARM_NEON)
414 jsimd_can_convsamp_float (void)
422 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
425 if (simd_support & JSIMD_ARM_NEON)
426 jsimd_convsamp_neon(sample_data, start_col, workspace);
430 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
431 FAST_FLOAT * workspace)
436 jsimd_can_fdct_islow (void)
444 jsimd_can_fdct_ifast (void)
448 /* The code is optimised for these values only */
451 if (sizeof(DCTELEM) != 2)
454 if (simd_support & JSIMD_ARM_NEON)
461 jsimd_can_fdct_float (void)
469 jsimd_fdct_islow (DCTELEM * data)
474 jsimd_fdct_ifast (DCTELEM * data)
476 if (simd_support & JSIMD_ARM_NEON)
477 jsimd_fdct_ifast_neon(data);
481 jsimd_fdct_float (FAST_FLOAT * data)
486 jsimd_can_quantize (void)
490 /* The code is optimised for these values only */
493 if (sizeof(JCOEF) != 2)
495 if (sizeof(DCTELEM) != 2)
498 if (simd_support & JSIMD_ARM_NEON)
505 jsimd_can_quantize_float (void)
513 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
516 if (simd_support & JSIMD_ARM_NEON)
517 jsimd_quantize_neon(coef_block, divisors, workspace);
521 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
522 FAST_FLOAT * workspace)
527 jsimd_can_idct_2x2 (void)
531 /* The code is optimised for these values only */
534 if (sizeof(JCOEF) != 2)
536 if (BITS_IN_JSAMPLE != 8)
538 if (sizeof(JDIMENSION) != 4)
540 if (sizeof(ISLOW_MULT_TYPE) != 2)
543 if ((simd_support & JSIMD_ARM_NEON))
550 jsimd_can_idct_4x4 (void)
554 /* The code is optimised for these values only */
557 if (sizeof(JCOEF) != 2)
559 if (BITS_IN_JSAMPLE != 8)
561 if (sizeof(JDIMENSION) != 4)
563 if (sizeof(ISLOW_MULT_TYPE) != 2)
566 if ((simd_support & JSIMD_ARM_NEON))
573 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
574 JCOEFPTR coef_block, JSAMPARRAY output_buf,
575 JDIMENSION output_col)
577 if ((simd_support & JSIMD_ARM_NEON))
578 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
582 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
583 JCOEFPTR coef_block, JSAMPARRAY output_buf,
584 JDIMENSION output_col)
586 if ((simd_support & JSIMD_ARM_NEON))
587 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
591 jsimd_can_idct_islow (void)
595 /* The code is optimised for these values only */
598 if (sizeof(JCOEF) != 2)
600 if (BITS_IN_JSAMPLE != 8)
602 if (sizeof(JDIMENSION) != 4)
604 if (sizeof(ISLOW_MULT_TYPE) != 2)
607 if (simd_support & JSIMD_ARM_NEON)
614 jsimd_can_idct_ifast (void)
618 /* The code is optimised for these values only */
621 if (sizeof(JCOEF) != 2)
623 if (BITS_IN_JSAMPLE != 8)
625 if (sizeof(JDIMENSION) != 4)
627 if (sizeof(IFAST_MULT_TYPE) != 2)
629 if (IFAST_SCALE_BITS != 2)
632 if ((simd_support & JSIMD_ARM_NEON))
639 jsimd_can_idct_float (void)
647 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
648 JCOEFPTR coef_block, JSAMPARRAY output_buf,
649 JDIMENSION output_col)
651 if ((simd_support & JSIMD_ARM_NEON))
652 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, output_col);
656 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
657 JCOEFPTR coef_block, JSAMPARRAY output_buf,
658 JDIMENSION output_col)
660 if ((simd_support & JSIMD_ARM_NEON))
661 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, output_col);
665 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
666 JCOEFPTR coef_block, JSAMPARRAY output_buf,
667 JDIMENSION output_col)