4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2009-2011, 2014-2016, 2018, D. R. Commander.
6 * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
8 * Based on the x86 SIMD extension for IJG JPEG library,
9 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10 * For conditions of distribution and use, see copyright notice in jsimdext.inc
12 * This file contains the interface between the "normal" portions
13 * of the library and the SIMD implementations when running on a
14 * PowerPC architecture.
18 /* This must be defined first as it re-defines GLOBAL otherwise */
19 #include <proto/exec.h>
22 #define JPEG_INTERNALS
23 #include "../../jinclude.h"
24 #include "../../jpeglib.h"
25 #include "../../jsimd.h"
26 #include "../../jdct.h"
27 #include "../../jsimddct.h"
32 #if defined(__APPLE__)
33 #include <sys/types.h>
34 #include <sys/sysctl.h>
35 #elif defined(__OpenBSD__)
36 #include <sys/param.h>
37 #include <sys/sysctl.h>
38 #include <machine/cpu.h>
39 #elif defined(__FreeBSD__)
40 #include <machine/cpu.h>
44 static unsigned int simd_support = ~0;
46 #if !defined(__ALTIVEC__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
48 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
51 check_feature(char *buffer, char *feature)
57 if (strncmp(buffer, "cpu", 3) != 0)
60 while (isspace(*buffer))
63 /* Check if 'feature' is present in the buffer as a separate word */
64 while ((p = strstr(buffer, feature))) {
65 if (p > buffer && !isspace(*(p - 1))) {
70 if (*p != 0 && !isspace(*p)) {
80 parse_proc_cpuinfo(int bufsize)
82 char *buffer = (char *)malloc(bufsize);
90 fd = fopen("/proc/cpuinfo", "r");
92 while (fgets(buffer, bufsize, fd)) {
93 if (!strchr(buffer, '\n') && !feof(fd)) {
94 /* "impossible" happened - insufficient size of the buffer! */
99 if (check_feature(buffer, "altivec"))
100 simd_support |= JSIMD_ALTIVEC;
111 * Check what SIMD accelerations are supported.
113 * FIXME: This code is racy under a multi-threaded environment.
121 #if !defined(__ALTIVEC__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
122 int bufsize = 1024; /* an initial guess for the line buffer size limit */
123 #elif defined(__amigaos4__)
125 #elif defined(__APPLE__)
126 int mib[2] = { CTL_HW, HW_VECTORUNIT };
128 size_t len = sizeof(altivec);
129 #elif defined(__OpenBSD__)
130 int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC };
132 size_t len = sizeof(altivec);
133 #elif defined(__FreeBSD__)
134 unsigned long cpufeatures = 0;
137 if (simd_support != ~0U)
142 #if defined(__ALTIVEC__)
143 simd_support |= JSIMD_ALTIVEC;
144 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
145 while (!parse_proc_cpuinfo(bufsize)) {
147 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
150 #elif defined(__amigaos4__)
151 IExec->GetCPUInfoTags(GCIT_VectorUnit, &altivec, TAG_DONE);
152 if (altivec == VECTORTYPE_ALTIVEC)
153 simd_support |= JSIMD_ALTIVEC;
154 #elif defined(__APPLE__) || defined(__OpenBSD__)
155 if (sysctl(mib, 2, &altivec, &len, NULL, 0) == 0 && altivec != 0)
156 simd_support |= JSIMD_ALTIVEC;
157 #elif defined(__FreeBSD__)
158 elf_aux_info(AT_HWCAP, &cpufeatures, sizeof(cpufeatures));
159 if (cpufeatures & PPC_FEATURE_HAS_ALTIVEC)
160 simd_support |= JSIMD_ALTIVEC;
164 /* Force different settings through environment variables */
165 env = getenv("JSIMD_FORCEALTIVEC");
166 if ((env != NULL) && (strcmp(env, "1") == 0))
167 simd_support = JSIMD_ALTIVEC;
168 env = getenv("JSIMD_FORCENONE");
169 if ((env != NULL) && (strcmp(env, "1") == 0))
175 jsimd_can_rgb_ycc(void)
179 /* The code is optimised for these values only */
180 if (BITS_IN_JSAMPLE != 8)
182 if (sizeof(JDIMENSION) != 4)
184 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
187 if (simd_support & JSIMD_ALTIVEC)
194 jsimd_can_rgb_gray(void)
198 /* The code is optimised for these values only */
199 if (BITS_IN_JSAMPLE != 8)
201 if (sizeof(JDIMENSION) != 4)
203 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
206 if (simd_support & JSIMD_ALTIVEC)
213 jsimd_can_ycc_rgb(void)
217 /* The code is optimised for these values only */
218 if (BITS_IN_JSAMPLE != 8)
220 if (sizeof(JDIMENSION) != 4)
222 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
225 if (simd_support & JSIMD_ALTIVEC)
232 jsimd_can_ycc_rgb565(void)
238 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
239 JSAMPIMAGE output_buf, JDIMENSION output_row,
242 void (*altivecfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
244 switch (cinfo->in_color_space) {
246 altivecfct = jsimd_extrgb_ycc_convert_altivec;
250 altivecfct = jsimd_extrgbx_ycc_convert_altivec;
253 altivecfct = jsimd_extbgr_ycc_convert_altivec;
257 altivecfct = jsimd_extbgrx_ycc_convert_altivec;
261 altivecfct = jsimd_extxbgr_ycc_convert_altivec;
265 altivecfct = jsimd_extxrgb_ycc_convert_altivec;
268 altivecfct = jsimd_rgb_ycc_convert_altivec;
272 altivecfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
276 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
277 JSAMPIMAGE output_buf, JDIMENSION output_row,
280 void (*altivecfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
282 switch (cinfo->in_color_space) {
284 altivecfct = jsimd_extrgb_gray_convert_altivec;
288 altivecfct = jsimd_extrgbx_gray_convert_altivec;
291 altivecfct = jsimd_extbgr_gray_convert_altivec;
295 altivecfct = jsimd_extbgrx_gray_convert_altivec;
299 altivecfct = jsimd_extxbgr_gray_convert_altivec;
303 altivecfct = jsimd_extxrgb_gray_convert_altivec;
306 altivecfct = jsimd_rgb_gray_convert_altivec;
310 altivecfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
314 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
315 JDIMENSION input_row, JSAMPARRAY output_buf,
318 void (*altivecfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
320 switch (cinfo->out_color_space) {
322 altivecfct = jsimd_ycc_extrgb_convert_altivec;
326 altivecfct = jsimd_ycc_extrgbx_convert_altivec;
329 altivecfct = jsimd_ycc_extbgr_convert_altivec;
333 altivecfct = jsimd_ycc_extbgrx_convert_altivec;
337 altivecfct = jsimd_ycc_extxbgr_convert_altivec;
341 altivecfct = jsimd_ycc_extxrgb_convert_altivec;
344 altivecfct = jsimd_ycc_rgb_convert_altivec;
348 altivecfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
352 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
353 JDIMENSION input_row, JSAMPARRAY output_buf,
359 jsimd_can_h2v2_downsample(void)
363 /* The code is optimised for these values only */
364 if (BITS_IN_JSAMPLE != 8)
366 if (sizeof(JDIMENSION) != 4)
369 if (simd_support & JSIMD_ALTIVEC)
376 jsimd_can_h2v1_downsample(void)
380 /* The code is optimised for these values only */
381 if (BITS_IN_JSAMPLE != 8)
383 if (sizeof(JDIMENSION) != 4)
386 if (simd_support & JSIMD_ALTIVEC)
393 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
394 JSAMPARRAY input_data, JSAMPARRAY output_data)
396 jsimd_h2v2_downsample_altivec(cinfo->image_width, cinfo->max_v_samp_factor,
397 compptr->v_samp_factor,
398 compptr->width_in_blocks, input_data,
403 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
404 JSAMPARRAY input_data, JSAMPARRAY output_data)
406 jsimd_h2v1_downsample_altivec(cinfo->image_width, cinfo->max_v_samp_factor,
407 compptr->v_samp_factor,
408 compptr->width_in_blocks, input_data,
413 jsimd_can_h2v2_upsample(void)
417 /* The code is optimised for these values only */
418 if (BITS_IN_JSAMPLE != 8)
420 if (sizeof(JDIMENSION) != 4)
423 if (simd_support & JSIMD_ALTIVEC)
430 jsimd_can_h2v1_upsample(void)
434 /* The code is optimised for these values only */
435 if (BITS_IN_JSAMPLE != 8)
437 if (sizeof(JDIMENSION) != 4)
440 if (simd_support & JSIMD_ALTIVEC)
447 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
448 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
450 jsimd_h2v2_upsample_altivec(cinfo->max_v_samp_factor, cinfo->output_width,
451 input_data, output_data_ptr);
455 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
456 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
458 jsimd_h2v1_upsample_altivec(cinfo->max_v_samp_factor, cinfo->output_width,
459 input_data, output_data_ptr);
463 jsimd_can_h2v2_fancy_upsample(void)
467 /* The code is optimised for these values only */
468 if (BITS_IN_JSAMPLE != 8)
470 if (sizeof(JDIMENSION) != 4)
473 if (simd_support & JSIMD_ALTIVEC)
480 jsimd_can_h2v1_fancy_upsample(void)
484 /* The code is optimised for these values only */
485 if (BITS_IN_JSAMPLE != 8)
487 if (sizeof(JDIMENSION) != 4)
490 if (simd_support & JSIMD_ALTIVEC)
497 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
498 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
500 jsimd_h2v2_fancy_upsample_altivec(cinfo->max_v_samp_factor,
501 compptr->downsampled_width, input_data,
506 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
507 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
509 jsimd_h2v1_fancy_upsample_altivec(cinfo->max_v_samp_factor,
510 compptr->downsampled_width, input_data,
515 jsimd_can_h2v2_merged_upsample(void)
519 /* The code is optimised for these values only */
520 if (BITS_IN_JSAMPLE != 8)
522 if (sizeof(JDIMENSION) != 4)
525 if (simd_support & JSIMD_ALTIVEC)
532 jsimd_can_h2v1_merged_upsample(void)
536 /* The code is optimised for these values only */
537 if (BITS_IN_JSAMPLE != 8)
539 if (sizeof(JDIMENSION) != 4)
542 if (simd_support & JSIMD_ALTIVEC)
549 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
550 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
552 void (*altivecfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
554 switch (cinfo->out_color_space) {
556 altivecfct = jsimd_h2v2_extrgb_merged_upsample_altivec;
560 altivecfct = jsimd_h2v2_extrgbx_merged_upsample_altivec;
563 altivecfct = jsimd_h2v2_extbgr_merged_upsample_altivec;
567 altivecfct = jsimd_h2v2_extbgrx_merged_upsample_altivec;
571 altivecfct = jsimd_h2v2_extxbgr_merged_upsample_altivec;
575 altivecfct = jsimd_h2v2_extxrgb_merged_upsample_altivec;
578 altivecfct = jsimd_h2v2_merged_upsample_altivec;
582 altivecfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
586 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
587 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
589 void (*altivecfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
591 switch (cinfo->out_color_space) {
593 altivecfct = jsimd_h2v1_extrgb_merged_upsample_altivec;
597 altivecfct = jsimd_h2v1_extrgbx_merged_upsample_altivec;
600 altivecfct = jsimd_h2v1_extbgr_merged_upsample_altivec;
604 altivecfct = jsimd_h2v1_extbgrx_merged_upsample_altivec;
608 altivecfct = jsimd_h2v1_extxbgr_merged_upsample_altivec;
612 altivecfct = jsimd_h2v1_extxrgb_merged_upsample_altivec;
615 altivecfct = jsimd_h2v1_merged_upsample_altivec;
619 altivecfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
623 jsimd_can_convsamp(void)
627 /* The code is optimised for these values only */
630 if (BITS_IN_JSAMPLE != 8)
632 if (sizeof(JDIMENSION) != 4)
634 if (sizeof(DCTELEM) != 2)
637 if (simd_support & JSIMD_ALTIVEC)
644 jsimd_can_convsamp_float(void)
650 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
653 jsimd_convsamp_altivec(sample_data, start_col, workspace);
657 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
658 FAST_FLOAT *workspace)
663 jsimd_can_fdct_islow(void)
667 /* The code is optimised for these values only */
670 if (sizeof(DCTELEM) != 2)
673 if (simd_support & JSIMD_ALTIVEC)
680 jsimd_can_fdct_ifast(void)
684 /* The code is optimised for these values only */
687 if (sizeof(DCTELEM) != 2)
690 if (simd_support & JSIMD_ALTIVEC)
697 jsimd_can_fdct_float(void)
703 jsimd_fdct_islow(DCTELEM *data)
705 jsimd_fdct_islow_altivec(data);
709 jsimd_fdct_ifast(DCTELEM *data)
711 jsimd_fdct_ifast_altivec(data);
715 jsimd_fdct_float(FAST_FLOAT *data)
720 jsimd_can_quantize(void)
724 /* The code is optimised for these values only */
727 if (sizeof(JCOEF) != 2)
729 if (sizeof(DCTELEM) != 2)
732 if (simd_support & JSIMD_ALTIVEC)
739 jsimd_can_quantize_float(void)
745 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
747 jsimd_quantize_altivec(coef_block, divisors, workspace);
751 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
752 FAST_FLOAT *workspace)
757 jsimd_can_idct_2x2(void)
763 jsimd_can_idct_4x4(void)
769 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
770 JCOEFPTR coef_block, JSAMPARRAY output_buf,
771 JDIMENSION output_col)
776 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
777 JCOEFPTR coef_block, JSAMPARRAY output_buf,
778 JDIMENSION output_col)
783 jsimd_can_idct_islow(void)
787 /* The code is optimised for these values only */
790 if (sizeof(JCOEF) != 2)
793 if (simd_support & JSIMD_ALTIVEC)
800 jsimd_can_idct_ifast(void)
804 /* The code is optimised for these values only */
807 if (sizeof(JCOEF) != 2)
810 if (simd_support & JSIMD_ALTIVEC)
817 jsimd_can_idct_float(void)
823 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
824 JCOEFPTR coef_block, JSAMPARRAY output_buf,
825 JDIMENSION output_col)
827 jsimd_idct_islow_altivec(compptr->dct_table, coef_block, output_buf,
832 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
833 JCOEFPTR coef_block, JSAMPARRAY output_buf,
834 JDIMENSION output_col)
836 jsimd_idct_ifast_altivec(compptr->dct_table, coef_block, output_buf,
841 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
842 JCOEFPTR coef_block, JSAMPARRAY output_buf,
843 JDIMENSION output_col)
848 jsimd_can_huff_encode_one_block(void)
854 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
855 int last_dc_val, c_derived_tbl *dctbl,
856 c_derived_tbl *actbl)
862 jsimd_can_encode_mcu_AC_first_prepare(void)
868 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
869 const int *jpeg_natural_order_start, int Sl,
870 int Al, JCOEF *values, size_t *zerobits)
875 jsimd_can_encode_mcu_AC_refine_prepare(void)
881 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
882 const int *jpeg_natural_order_start, int Sl,
883 int Al, JCOEF *absvalues, size_t *bits)