From: subhransu mohanty Date: Thu, 27 Dec 2018 03:48:00 +0000 (+0900) Subject: lottie/vector: move pixman code to separate pixman folder. X-Git-Tag: submit/tizen/20190102.045226~4 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=66259e78e5f67a5b249728f104e4881bce59d3d4;p=platform%2Fcore%2Fuifw%2Flottie-player.git lottie/vector: move pixman code to separate pixman folder. Change-Id: Ic2d59825403814849f46cad8496a7b409b6d3fda --- diff --git a/src/vector/CMakeLists.txt b/src/vector/CMakeLists.txt index 4c2acfc..573a33b 100644 --- a/src/vector/CMakeLists.txt +++ b/src/vector/CMakeLists.txt @@ -1,5 +1,6 @@ add_subdirectory(freetype) +add_subdirectory(pixman) target_sources(lottie-player PRIVATE @@ -11,7 +12,6 @@ target_sources(lottie-player "${CMAKE_CURRENT_LIST_DIR}/vdrawhelper.cpp" "${CMAKE_CURRENT_LIST_DIR}/vdrawhelper_sse2.cpp" "${CMAKE_CURRENT_LIST_DIR}/vdrawhelper_neon.cpp" - "${CMAKE_CURRENT_LIST_DIR}/vregion.cpp" "${CMAKE_CURRENT_LIST_DIR}/vrle.cpp" "${CMAKE_CURRENT_LIST_DIR}/vpath.cpp" "${CMAKE_CURRENT_LIST_DIR}/vpathmesure.cpp" @@ -24,14 +24,6 @@ target_sources(lottie-player "${CMAKE_CURRENT_LIST_DIR}/vdrawable.cpp" ) -IF("${ARCH}" STREQUAL "arm") -SET(CMAKE_ASM_FLAGS "${CFLAGS} -x assembler-with-cpp") -target_sources(lottie-player - PRIVATE - "${CMAKE_CURRENT_LIST_DIR}/pixman-arm-neon-asm.S" - ) -ENDIF("${ARCH}" STREQUAL "arm") - target_include_directories(lottie-player PRIVATE "${CMAKE_CURRENT_LIST_DIR}" diff --git a/src/vector/meson.build b/src/vector/meson.build index d56d49c..3246e43 100644 --- a/src/vector/meson.build +++ b/src/vector/meson.build @@ -1,7 +1,9 @@ subdir('freetype') +subdir('pixman') vector_dep = [freetype_dep] +vector_dep += pixman_dep source_file = files('vdasher.cpp') source_file += files('vbrush.cpp') @@ -13,7 +15,6 @@ source_file += files('vdrawhelper_sse2.cpp') source_file += files('vdrawable.cpp') -source_file += files('vregion.cpp') source_file += files('vrle.cpp') source_file += files('vpath.cpp') source_file += files('vpathmesure.cpp') diff --git a/src/vector/pixman-arm-neon-asm.S b/src/vector/pixman-arm-neon-asm.S deleted file mode 100644 index f2203c2..0000000 --- a/src/vector/pixman-arm-neon-asm.S +++ /dev/null @@ -1,497 +0,0 @@ -/* - * Copyright © 2009 Nokia Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) - */ - -/* - * This file contains implementations of NEON optimized pixel processing - * functions. There is no full and detailed tutorial, but some functions - * (those which are exposing some new or interesting features) are - * extensively commented and can be used as examples. - * - * You may want to have a look at the comments for following functions: - * - pixman_composite_over_8888_0565_asm_neon - * - pixman_composite_over_n_8_0565_asm_neon - */ - -/* Prevent the stack from becoming executable for no reason... */ -#if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits -#endif - - .text - .fpu neon - .arch armv7a - .object_arch armv4 - .eabi_attribute 10, 0 /* suppress Tag_FP_arch */ - .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */ - .arm - .altmacro - .p2align 2 - - -//#include "pixman-arm-asm.h" -/* Supplementary macro for setting function attributes */ -.macro pixman_asm_function fname - .func fname - .global fname -#ifdef __ELF__ - .hidden fname - .type fname, %function -#endif -fname: -.endm - -//#include "pixman-private.h" -/* - * The defines which are shared between C and assembly code - */ - -/* bilinear interpolation precision (must be < 8) */ -#define BILINEAR_INTERPOLATION_BITS 7 -#define BILINEAR_INTERPOLATION_RANGE (1 << BILINEAR_INTERPOLATION_BITS) - -#include "pixman-arm-neon-asm.h" - -/* Global configuration options and preferences */ - -/* - * The code can optionally make use of unaligned memory accesses to improve - * performance of handling leading/trailing pixels for each scanline. - * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for - * example in linux if unaligned memory accesses are not configured to - * generate.exceptions. - */ -.set RESPECT_STRICT_ALIGNMENT, 1 - -/* - * Set default prefetch type. There is a choice between the following options: - * - * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work - * as NOP to workaround some HW bugs or for whatever other reason) - * - * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where - * advanced prefetch intruduces heavy overhead) - * - * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8 - * which can run ARM and NEON instructions simultaneously so that extra ARM - * instructions do not add (many) extra cycles, but improve prefetch efficiency) - * - * Note: some types of function can't support advanced prefetch and fallback - * to simple one (those which handle 24bpp pixels) - */ -.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED - -/* Prefetch distance in pixels for simple prefetch */ -.set PREFETCH_DISTANCE_SIMPLE, 64 - -/* - * Implementation of pixman_composite_over_8888_0565_asm_neon - * - * This function takes a8r8g8b8 source buffer, r5g6b5 destination buffer and - * performs OVER compositing operation. Function fast_composite_over_8888_0565 - * from pixman-fast-path.c does the same in C and can be used as a reference. - * - * First we need to have some NEON assembly code which can do the actual - * operation on the pixels and provide it to the template macro. - * - * Template macro quite conveniently takes care of emitting all the necessary - * code for memory reading and writing (including quite tricky cases of - * handling unaligned leading/trailing pixels), so we only need to deal with - * the data in NEON registers. - * - * NEON registers allocation in general is recommented to be the following: - * d0, d1, d2, d3 - contain loaded source pixel data - * d4, d5, d6, d7 - contain loaded destination pixels (if they are needed) - * d24, d25, d26, d27 - contain loading mask pixel data (if mask is used) - * d28, d29, d30, d31 - place for storing the result (destination pixels) - * - * As can be seen above, four 64-bit NEON registers are used for keeping - * intermediate pixel data and up to 8 pixels can be processed in one step - * for 32bpp formats (16 pixels for 16bpp, 32 pixels for 8bpp). - * - * This particular function uses the following registers allocation: - * d0, d1, d2, d3 - contain loaded source pixel data - * d4, d5 - contain loaded destination pixels (they are needed) - * d28, d29 - place for storing the result (destination pixels) - */ - -/* - * Step one. We need to have some code to do some arithmetics on pixel data. - * This is implemented as a pair of macros: '*_head' and '*_tail'. When used - * back-to-back, they take pixel data from {d0, d1, d2, d3} and {d4, d5}, - * perform all the needed calculations and write the result to {d28, d29}. - * The rationale for having two macros and not just one will be explained - * later. In practice, any single monolitic function which does the work can - * be split into two parts in any arbitrary way without affecting correctness. - * - * There is one special trick here too. Common template macro can optionally - * make our life a bit easier by doing R, G, B, A color components - * deinterleaving for 32bpp pixel formats (and this feature is used in - * 'pixman_composite_over_8888_0565_asm_neon' function). So it means that - * instead of having 8 packed pixels in {d0, d1, d2, d3} registers, we - * actually use d0 register for blue channel (a vector of eight 8-bit - * values), d1 register for green, d2 for red and d3 for alpha. This - * simple conversion can be also done with a few NEON instructions: - * - * Packed to planar conversion: - * vuzp.8 d0, d1 - * vuzp.8 d2, d3 - * vuzp.8 d1, d3 - * vuzp.8 d0, d2 - * - * Planar to packed conversion: - * vzip.8 d0, d2 - * vzip.8 d1, d3 - * vzip.8 d2, d3 - * vzip.8 d0, d1 - * - * But pixel can be loaded directly in planar format using VLD4.8 NEON - * instruction. It is 1 cycle slower than VLD1.32, so this is not always - * desirable, that's why deinterleaving is optional. - * - * But anyway, here is the code: - */ - -/* - * OK, now we got almost everything that we need. Using the above two - * macros, the work can be done right. But now we want to optimize - * it a bit. ARM Cortex-A8 is an in-order core, and benefits really - * a lot from good code scheduling and software pipelining. - * - * Let's construct some code, which will run in the core main loop. - * Some pseudo-code of the main loop will look like this: - * head - * while (...) { - * tail - * head - * } - * tail - * - * It may look a bit weird, but this setup allows to hide instruction - * latencies better and also utilize dual-issue capability more - * efficiently (make pairs of load-store and ALU instructions). - * - * So what we need now is a '*_tail_head' macro, which will be used - * in the core main loop. A trivial straightforward implementation - * of this macro would look like this: - * - * pixman_composite_over_8888_0565_process_pixblock_tail - * vst1.16 {d28, d29}, [DST_W, :128]! - * vld1.16 {d4, d5}, [DST_R, :128]! - * vld4.32 {d0, d1, d2, d3}, [SRC]! - * pixman_composite_over_8888_0565_process_pixblock_head - * cache_preload 8, 8 - * - * Now it also got some VLD/VST instructions. We simply can't move from - * processing one block of pixels to the other one with just arithmetics. - * The previously processed data needs to be written to memory and new - * data needs to be fetched. Fortunately, this main loop does not deal - * with partial leading/trailing pixels and can load/store a full block - * of pixels in a bulk. Additionally, destination buffer is already - * 16 bytes aligned here (which is good for performance). - * - * New things here are DST_R, DST_W, SRC and MASK identifiers. These - * are the aliases for ARM registers which are used as pointers for - * accessing data. We maintain separate pointers for reading and writing - * destination buffer (DST_R and DST_W). - * - * Another new thing is 'cache_preload' macro. It is used for prefetching - * data into CPU L2 cache and improve performance when dealing with large - * images which are far larger than cache size. It uses one argument - * (actually two, but they need to be the same here) - number of pixels - * in a block. Looking into 'pixman-arm-neon-asm.h' can provide some - * details about this macro. Moreover, if good performance is needed - * the code from this macro needs to be copied into '*_tail_head' macro - * and mixed with the rest of code for optimal instructions scheduling. - * We are actually doing it below. - * - * Now after all the explanations, here is the optimized code. - * Different instruction streams (originaling from '*_head', '*_tail' - * and 'cache_preload' macro) use different indentation levels for - * better readability. Actually taking the code from one of these - * indentation levels and ignoring a few VLD/VST instructions would - * result in exactly the code from '*_head', '*_tail' or 'cache_preload' - * macro! - */ - -/* - * And now the final part. We are using 'generate_composite_function' macro - * to put all the stuff together. We are specifying the name of the function - * which we want to get, number of bits per pixel for the source, mask and - * destination (0 if unused, like mask in this case). Next come some bit - * flags: - * FLAG_DST_READWRITE - tells that the destination buffer is both read - * and written, for write-only buffer we would use - * FLAG_DST_WRITEONLY flag instead - * FLAG_DEINTERLEAVE_32BPP - tells that we prefer to work with planar data - * and separate color channels for 32bpp format. - * The next things are: - * - the number of pixels processed per iteration (8 in this case, because - * that's the maximum what can fit into four 64-bit NEON registers). - * - prefetch distance, measured in pixel blocks. In this case it is 5 times - * by 8 pixels. That would be 40 pixels, or up to 160 bytes. Optimal - * prefetch distance can be selected by running some benchmarks. - * - * After that we specify some macros, these are 'default_init', - * 'default_cleanup' here which are empty (but it is possible to have custom - * init/cleanup macros to be able to save/restore some extra NEON registers - * like d8-d15 or do anything else) followed by - * 'pixman_composite_over_8888_0565_process_pixblock_head', - * 'pixman_composite_over_8888_0565_process_pixblock_tail' and - * 'pixman_composite_over_8888_0565_process_pixblock_tail_head' - * which we got implemented above. - * - * The last part is the NEON registers allocation scheme. - */ - -/******************************************************************************/ - -/******************************************************************************/ - .macro pixman_composite_out_reverse_8888_8888_process_pixblock_head - vmvn.8 d24, d3 /* get inverted alpha */ - /* do alpha blending */ - vmull.u8 q8, d24, d4 - vmull.u8 q9, d24, d5 - vmull.u8 q10, d24, d6 - vmull.u8 q11, d24, d7 - .endm - - .macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 - .endm - -/******************************************************************************/ - -.macro pixman_composite_over_8888_8888_process_pixblock_head - pixman_composite_out_reverse_8888_8888_process_pixblock_head -.endm - -.macro pixman_composite_over_8888_8888_process_pixblock_tail - pixman_composite_out_reverse_8888_8888_process_pixblock_tail - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -.macro pixman_composite_over_8888_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vrshr.u16 q14, q8, #8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - PF cmp PF_X, ORIG_W - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 - fetch_src_pixblock - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - vmvn.8 d22, d3 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF subge PF_X, PF_X, ORIG_W - vmull.u8 q8, d22, d4 - PF subges PF_CTL, PF_CTL, #0x10 - vmull.u8 q9, d22, d5 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - vmull.u8 q10, d22, d6 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vmull.u8 q11, d22, d7 -.endm - -generate_composite_function \ - pixman_composite_over_8888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_process_pixblock_tail_head - -generate_composite_function_single_scanline \ - pixman_composite_scanline_over_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_n_8888_process_pixblock_head - /* deinterleaved source pixels in {d0, d1, d2, d3} */ - /* inverted alpha in {d24} */ - /* destination pixels in {d4, d5, d6, d7} */ - vmull.u8 q8, d24, d4 - vmull.u8 q9, d24, d5 - vmull.u8 q10, d24, d6 - vmull.u8 q11, d24, d7 -.endm - -.macro pixman_composite_over_n_8888_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q2, q10, #8 - vrshr.u16 q3, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q2, q10 - vraddhn.u16 d31, q3, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -.macro pixman_composite_over_n_8888_process_pixblock_tail_head - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q2, q10, #8 - vrshr.u16 q3, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q2, q10 - vraddhn.u16 d31, q3, q11 - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vqadd.u8 q14, q0, q14 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0x0F - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vqadd.u8 q15, q1, q15 - PF cmp PF_X, ORIG_W - vmull.u8 q8, d24, d4 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - vmull.u8 q9, d24, d5 - PF subge PF_X, PF_X, ORIG_W - vmull.u8 q10, d24, d6 - PF subges PF_CTL, PF_CTL, #0x10 - vmull.u8 q11, d24, d7 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -.macro pixman_composite_over_n_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d3[0]}, [DUMMY] - vdup.8 d0, d3[0] - vdup.8 d1, d3[1] - vdup.8 d2, d3[2] - vdup.8 d3, d3[3] - vmvn.8 d24, d3 /* get inverted alpha */ -.endm - -generate_composite_function \ - pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8888_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_n_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_src_n_8888_process_pixblock_head -.endm - -.macro pixman_composite_src_n_8888_process_pixblock_tail -.endm - -.macro pixman_composite_src_n_8888_process_pixblock_tail_head - vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! -.endm - -.macro pixman_composite_src_n_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d0[0]}, [DUMMY] - vsli.u64 d0, d0, #32 - vorr d1, d0, d0 - vorr q1, q0, q0 -.endm - -.macro pixman_composite_src_n_8888_cleanup -.endm - -generate_composite_function \ - pixman_composite_src_n_8888_asm_neon, 0, 0, 32, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 0, /* prefetch distance */ \ - pixman_composite_src_n_8888_init, \ - pixman_composite_src_n_8888_cleanup, \ - pixman_composite_src_n_8888_process_pixblock_head, \ - pixman_composite_src_n_8888_process_pixblock_tail, \ - pixman_composite_src_n_8888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_8888_8888_process_pixblock_head -.endm - -.macro pixman_composite_src_8888_8888_process_pixblock_tail -.endm - -.macro pixman_composite_src_8888_8888_process_pixblock_tail_head - vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! - fetch_src_pixblock - cache_preload 8, 8 -.endm - -generate_composite_function \ - pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_8888_8888_process_pixblock_head, \ - pixman_composite_src_8888_8888_process_pixblock_tail, \ - pixman_composite_src_8888_8888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ diff --git a/src/vector/pixman-arm-neon-asm.h b/src/vector/pixman-arm-neon-asm.h deleted file mode 100644 index 6add220..0000000 --- a/src/vector/pixman-arm-neon-asm.h +++ /dev/null @@ -1,1126 +0,0 @@ -/* - * Copyright © 2009 Nokia Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) - */ - -/* - * This file contains a macro ('generate_composite_function') which can - * construct 2D image processing functions, based on a common template. - * Any combinations of source, destination and mask images with 8bpp, - * 16bpp, 24bpp, 32bpp color formats are supported. - * - * This macro takes care of: - * - handling of leading and trailing unaligned pixels - * - doing most of the work related to L2 cache preload - * - encourages the use of software pipelining for better instructions - * scheduling - * - * The user of this macro has to provide some configuration parameters - * (bit depths for the images, prefetch distance, etc.) and a set of - * macros, which should implement basic code chunks responsible for - * pixels processing. See 'pixman-arm-neon-asm.S' file for the usage - * examples. - * - * TODO: - * - try overlapped pixel method (from Ian Rickards) when processing - * exactly two blocks of pixels - * - maybe add an option to do reverse scanline processing - */ - -/* - * Bit flags for 'generate_composite_function' macro which are used - * to tune generated functions behavior. - */ -.set FLAG_DST_WRITEONLY, 0 -.set FLAG_DST_READWRITE, 1 -.set FLAG_DEINTERLEAVE_32BPP, 2 - -/* - * Offset in stack where mask and source pointer/stride can be accessed - * from 'init' macro. This is useful for doing special handling for solid mask. - */ -.set ARGS_STACK_OFFSET, 40 - -/* - * Constants for selecting preferable prefetch type. - */ -.set PREFETCH_TYPE_NONE, 0 /* No prefetch at all */ -.set PREFETCH_TYPE_SIMPLE, 1 /* A simple, fixed-distance-ahead prefetch */ -.set PREFETCH_TYPE_ADVANCED, 2 /* Advanced fine-grained prefetch */ - -/* - * Definitions of supplementary pixld/pixst macros (for partial load/store of - * pixel data). - */ - -.macro pixldst1 op, elem_size, reg1, mem_operand, abits -.if abits > 0 - op&.&elem_size {d®1}, [&mem_operand&, :&abits&]! -.else - op&.&elem_size {d®1}, [&mem_operand&]! -.endif -.endm - -.macro pixldst2 op, elem_size, reg1, reg2, mem_operand, abits -.if abits > 0 - op&.&elem_size {d®1, d®2}, [&mem_operand&, :&abits&]! -.else - op&.&elem_size {d®1, d®2}, [&mem_operand&]! -.endif -.endm - -.macro pixldst4 op, elem_size, reg1, reg2, reg3, reg4, mem_operand, abits -.if abits > 0 - op&.&elem_size {d®1, d®2, d®3, d®4}, [&mem_operand&, :&abits&]! -.else - op&.&elem_size {d®1, d®2, d®3, d®4}, [&mem_operand&]! -.endif -.endm - -.macro pixldst0 op, elem_size, reg1, idx, mem_operand, abits - op&.&elem_size {d®1[idx]}, [&mem_operand&]! -.endm - -.macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand - op&.&elem_size {d®1, d®2, d®3}, [&mem_operand&]! -.endm - -.macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand - op&.&elem_size {d®1[idx], d®2[idx], d®3[idx]}, [&mem_operand&]! -.endm - -.macro pixldst numbytes, op, elem_size, basereg, mem_operand, abits -.if numbytes == 32 - pixldst4 op, elem_size, %(basereg+4), %(basereg+5), \ - %(basereg+6), %(basereg+7), mem_operand, abits -.elseif numbytes == 16 - pixldst2 op, elem_size, %(basereg+2), %(basereg+3), mem_operand, abits -.elseif numbytes == 8 - pixldst1 op, elem_size, %(basereg+1), mem_operand, abits -.elseif numbytes == 4 - .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 32) - pixldst0 op, 32, %(basereg+0), 1, mem_operand, abits - .elseif elem_size == 16 - pixldst0 op, 16, %(basereg+0), 2, mem_operand, abits - pixldst0 op, 16, %(basereg+0), 3, mem_operand, abits - .else - pixldst0 op, 8, %(basereg+0), 4, mem_operand, abits - pixldst0 op, 8, %(basereg+0), 5, mem_operand, abits - pixldst0 op, 8, %(basereg+0), 6, mem_operand, abits - pixldst0 op, 8, %(basereg+0), 7, mem_operand, abits - .endif -.elseif numbytes == 2 - .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 16) - pixldst0 op, 16, %(basereg+0), 1, mem_operand, abits - .else - pixldst0 op, 8, %(basereg+0), 2, mem_operand, abits - pixldst0 op, 8, %(basereg+0), 3, mem_operand, abits - .endif -.elseif numbytes == 1 - pixldst0 op, 8, %(basereg+0), 1, mem_operand, abits -.else - .error "unsupported size: numbytes" -.endif -.endm - -.macro pixld numpix, bpp, basereg, mem_operand, abits=0 -.if bpp > 0 -.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) - pixldst4 vld4, 8, %(basereg+4), %(basereg+5), \ - %(basereg+6), %(basereg+7), mem_operand, abits -.elseif (bpp == 24) && (numpix == 8) - pixldst3 vld3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand -.elseif (bpp == 24) && (numpix == 4) - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand -.elseif (bpp == 24) && (numpix == 2) - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand -.elseif (bpp == 24) && (numpix == 1) - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand -.else - pixldst %(numpix * bpp / 8), vld1, %(bpp), basereg, mem_operand, abits -.endif -.endif -.endm - -.macro pixst numpix, bpp, basereg, mem_operand, abits=0 -.if bpp > 0 -.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) - pixldst4 vst4, 8, %(basereg+4), %(basereg+5), \ - %(basereg+6), %(basereg+7), mem_operand, abits -.elseif (bpp == 24) && (numpix == 8) - pixldst3 vst3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand -.elseif (bpp == 24) && (numpix == 4) - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand -.elseif (bpp == 24) && (numpix == 2) - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand -.elseif (bpp == 24) && (numpix == 1) - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand -.else - pixldst %(numpix * bpp / 8), vst1, %(bpp), basereg, mem_operand, abits -.endif -.endif -.endm - -.macro pixld_a numpix, bpp, basereg, mem_operand -.if (bpp * numpix) <= 128 - pixld numpix, bpp, basereg, mem_operand, %(bpp * numpix) -.else - pixld numpix, bpp, basereg, mem_operand, 128 -.endif -.endm - -.macro pixst_a numpix, bpp, basereg, mem_operand -.if (bpp * numpix) <= 128 - pixst numpix, bpp, basereg, mem_operand, %(bpp * numpix) -.else - pixst numpix, bpp, basereg, mem_operand, 128 -.endif -.endm - -/* - * Pixel fetcher for nearest scaling (needs TMP1, TMP2, VX, UNIT_X register - * aliases to be defined) - */ -.macro pixld1_s elem_size, reg1, mem_operand -.if elem_size == 16 - mov TMP1, VX, asr #16 - adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED - bpl 5b - add TMP1, mem_operand, TMP1, asl #1 - mov TMP2, VX, asr #16 - adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED - bpl 5b - add TMP2, mem_operand, TMP2, asl #1 - vld1.16 {d®1&[0]}, [TMP1, :16] - mov TMP1, VX, asr #16 - adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED - bpl 5b - add TMP1, mem_operand, TMP1, asl #1 - vld1.16 {d®1&[1]}, [TMP2, :16] - mov TMP2, VX, asr #16 - adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED - bpl 5b - add TMP2, mem_operand, TMP2, asl #1 - vld1.16 {d®1&[2]}, [TMP1, :16] - vld1.16 {d®1&[3]}, [TMP2, :16] -.elseif elem_size == 32 - mov TMP1, VX, asr #16 - adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED - bpl 5b - add TMP1, mem_operand, TMP1, asl #2 - mov TMP2, VX, asr #16 - adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED - bpl 5b - add TMP2, mem_operand, TMP2, asl #2 - vld1.32 {d®1&[0]}, [TMP1, :32] - vld1.32 {d®1&[1]}, [TMP2, :32] -.else - .error "unsupported" -.endif -.endm - -.macro pixld2_s elem_size, reg1, reg2, mem_operand -.if 0 /* elem_size == 32 */ - mov TMP1, VX, asr #16 - add VX, VX, UNIT_X, asl #1 - add TMP1, mem_operand, TMP1, asl #2 - mov TMP2, VX, asr #16 - sub VX, VX, UNIT_X - add TMP2, mem_operand, TMP2, asl #2 - vld1.32 {d®1&[0]}, [TMP1, :32] - mov TMP1, VX, asr #16 - add VX, VX, UNIT_X, asl #1 - add TMP1, mem_operand, TMP1, asl #2 - vld1.32 {d®2&[0]}, [TMP2, :32] - mov TMP2, VX, asr #16 - add VX, VX, UNIT_X - add TMP2, mem_operand, TMP2, asl #2 - vld1.32 {d®1&[1]}, [TMP1, :32] - vld1.32 {d®2&[1]}, [TMP2, :32] -.else - pixld1_s elem_size, reg1, mem_operand - pixld1_s elem_size, reg2, mem_operand -.endif -.endm - -.macro pixld0_s elem_size, reg1, idx, mem_operand -.if elem_size == 16 - mov TMP1, VX, asr #16 - adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED - bpl 5b - add TMP1, mem_operand, TMP1, asl #1 - vld1.16 {d®1&[idx]}, [TMP1, :16] -.elseif elem_size == 32 - mov TMP1, VX, asr #16 - adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED - bpl 5b - add TMP1, mem_operand, TMP1, asl #2 - vld1.32 {d®1&[idx]}, [TMP1, :32] -.endif -.endm - -.macro pixld_s_internal numbytes, elem_size, basereg, mem_operand -.if numbytes == 32 - pixld2_s elem_size, %(basereg+4), %(basereg+5), mem_operand - pixld2_s elem_size, %(basereg+6), %(basereg+7), mem_operand - pixdeinterleave elem_size, %(basereg+4) -.elseif numbytes == 16 - pixld2_s elem_size, %(basereg+2), %(basereg+3), mem_operand -.elseif numbytes == 8 - pixld1_s elem_size, %(basereg+1), mem_operand -.elseif numbytes == 4 - .if elem_size == 32 - pixld0_s elem_size, %(basereg+0), 1, mem_operand - .elseif elem_size == 16 - pixld0_s elem_size, %(basereg+0), 2, mem_operand - pixld0_s elem_size, %(basereg+0), 3, mem_operand - .else - pixld0_s elem_size, %(basereg+0), 4, mem_operand - pixld0_s elem_size, %(basereg+0), 5, mem_operand - pixld0_s elem_size, %(basereg+0), 6, mem_operand - pixld0_s elem_size, %(basereg+0), 7, mem_operand - .endif -.elseif numbytes == 2 - .if elem_size == 16 - pixld0_s elem_size, %(basereg+0), 1, mem_operand - .else - pixld0_s elem_size, %(basereg+0), 2, mem_operand - pixld0_s elem_size, %(basereg+0), 3, mem_operand - .endif -.elseif numbytes == 1 - pixld0_s elem_size, %(basereg+0), 1, mem_operand -.else - .error "unsupported size: numbytes" -.endif -.endm - -.macro pixld_s numpix, bpp, basereg, mem_operand -.if bpp > 0 - pixld_s_internal %(numpix * bpp / 8), %(bpp), basereg, mem_operand -.endif -.endm - -.macro vuzp8 reg1, reg2 - vuzp.8 d®1, d®2 -.endm - -.macro vzip8 reg1, reg2 - vzip.8 d®1, d®2 -.endm - -/* deinterleave B, G, R, A channels for eight 32bpp pixels in 4 registers */ -.macro pixdeinterleave bpp, basereg -.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0) - vuzp8 %(basereg+0), %(basereg+1) - vuzp8 %(basereg+2), %(basereg+3) - vuzp8 %(basereg+1), %(basereg+3) - vuzp8 %(basereg+0), %(basereg+2) -.endif -.endm - -/* interleave B, G, R, A channels for eight 32bpp pixels in 4 registers */ -.macro pixinterleave bpp, basereg -.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0) - vzip8 %(basereg+0), %(basereg+2) - vzip8 %(basereg+1), %(basereg+3) - vzip8 %(basereg+2), %(basereg+3) - vzip8 %(basereg+0), %(basereg+1) -.endif -.endm - -/* - * This is a macro for implementing cache preload. The main idea is that - * cache preload logic is mostly independent from the rest of pixels - * processing code. It starts at the top left pixel and moves forward - * across pixels and can jump across scanlines. Prefetch distance is - * handled in an 'incremental' way: it starts from 0 and advances to the - * optimal distance over time. After reaching optimal prefetch distance, - * it is kept constant. There are some checks which prevent prefetching - * unneeded pixel lines below the image (but it still can prefetch a bit - * more data on the right side of the image - not a big issue and may - * be actually helpful when rendering text glyphs). Additional trick is - * the use of LDR instruction for prefetch instead of PLD when moving to - * the next line, the point is that we have a high chance of getting TLB - * miss in this case, and PLD would be useless. - * - * This sounds like it may introduce a noticeable overhead (when working with - * fully cached data). But in reality, due to having a separate pipeline and - * instruction queue for NEON unit in ARM Cortex-A8, normal ARM code can - * execute simultaneously with NEON and be completely shadowed by it. Thus - * we get no performance overhead at all (*). This looks like a very nice - * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher, - * but still can implement some rather advanced prefetch logic in software - * for almost zero cost! - * - * (*) The overhead of the prefetcher is visible when running some trivial - * pixels processing like simple copy. Anyway, having prefetch is a must - * when working with the graphics data. - */ -.macro PF a, x:vararg -.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_ADVANCED) - a x -.endif -.endm - -.macro cache_preload std_increment, boost_increment -.if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0) -.if regs_shortage - PF ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */ -.endif -.if std_increment != 0 - PF add PF_X, PF_X, #std_increment -.endif - PF tst PF_CTL, #0xF - PF addne PF_X, PF_X, #boost_increment - PF subne PF_CTL, PF_CTL, #1 - PF cmp PF_X, ORIG_W -.if src_bpp_shift >= 0 - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] -.endif -.if dst_r_bpp != 0 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] -.endif -.if mask_bpp_shift >= 0 - PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] -.endif - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 -.if src_bpp_shift >= 0 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! -.endif -.if dst_r_bpp != 0 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! -.endif -.if mask_bpp_shift >= 0 - PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! -.endif -.endif -.endm - -.macro cache_preload_simple -.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_SIMPLE) -.if src_bpp > 0 - pld [SRC, #(PREFETCH_DISTANCE_SIMPLE * src_bpp / 8)] -.endif -.if dst_r_bpp > 0 - pld [DST_R, #(PREFETCH_DISTANCE_SIMPLE * dst_r_bpp / 8)] -.endif -.if mask_bpp > 0 - pld [MASK, #(PREFETCH_DISTANCE_SIMPLE * mask_bpp / 8)] -.endif -.endif -.endm - -.macro fetch_mask_pixblock - pixld pixblock_size, mask_bpp, \ - (mask_basereg - pixblock_size * mask_bpp / 64), MASK -.endm - -/* - * Macro which is used to process leading pixels until destination - * pointer is properly aligned (at 16 bytes boundary). When destination - * buffer uses 16bpp format, this is unnecessary, or even pointless. - */ -.macro ensure_destination_ptr_alignment process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head -.if dst_w_bpp != 24 - tst DST_R, #0xF - beq 2f - -.irp lowbit, 1, 2, 4, 8, 16 -local skip1 -.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp)) -.if lowbit < 16 /* we don't need more than 16-byte alignment */ - tst DST_R, #lowbit - beq 1f -.endif - pixld_src (lowbit * 8 / dst_w_bpp), src_bpp, src_basereg, SRC - pixld (lowbit * 8 / dst_w_bpp), mask_bpp, mask_basereg, MASK -.if dst_r_bpp > 0 - pixld_a (lowbit * 8 / dst_r_bpp), dst_r_bpp, dst_r_basereg, DST_R -.else - add DST_R, DST_R, #lowbit -.endif - PF add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp) - sub W, W, #(lowbit * 8 / dst_w_bpp) -1: -.endif -.endr - pixdeinterleave src_bpp, src_basereg - pixdeinterleave mask_bpp, mask_basereg - pixdeinterleave dst_r_bpp, dst_r_basereg - - process_pixblock_head - cache_preload 0, pixblock_size - cache_preload_simple - process_pixblock_tail - - pixinterleave dst_w_bpp, dst_w_basereg -.irp lowbit, 1, 2, 4, 8, 16 -.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp)) -.if lowbit < 16 /* we don't need more than 16-byte alignment */ - tst DST_W, #lowbit - beq 1f -.endif - pixst_a (lowbit * 8 / dst_w_bpp), dst_w_bpp, dst_w_basereg, DST_W -1: -.endif -.endr -.endif -2: -.endm - -/* - * Special code for processing up to (pixblock_size - 1) remaining - * trailing pixels. As SIMD processing performs operation on - * pixblock_size pixels, anything smaller than this has to be loaded - * and stored in a special way. Loading and storing of pixel data is - * performed in such a way that we fill some 'slots' in the NEON - * registers (some slots naturally are unused), then perform compositing - * operation as usual. In the end, the data is taken from these 'slots' - * and saved to memory. - * - * cache_preload_flag - allows to suppress prefetch if - * set to 0 - * dst_aligned_flag - selects whether destination buffer - * is aligned - */ -.macro process_trailing_pixels cache_preload_flag, \ - dst_aligned_flag, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - tst W, #(pixblock_size - 1) - beq 2f -.irp chunk_size, 16, 8, 4, 2, 1 -.if pixblock_size > chunk_size - tst W, #chunk_size - beq 1f - pixld_src chunk_size, src_bpp, src_basereg, SRC - pixld chunk_size, mask_bpp, mask_basereg, MASK -.if dst_aligned_flag != 0 - pixld_a chunk_size, dst_r_bpp, dst_r_basereg, DST_R -.else - pixld chunk_size, dst_r_bpp, dst_r_basereg, DST_R -.endif -.if cache_preload_flag != 0 - PF add PF_X, PF_X, #chunk_size -.endif -1: -.endif -.endr - pixdeinterleave src_bpp, src_basereg - pixdeinterleave mask_bpp, mask_basereg - pixdeinterleave dst_r_bpp, dst_r_basereg - - process_pixblock_head -.if cache_preload_flag != 0 - cache_preload 0, pixblock_size - cache_preload_simple -.endif - process_pixblock_tail - pixinterleave dst_w_bpp, dst_w_basereg -.irp chunk_size, 16, 8, 4, 2, 1 -.if pixblock_size > chunk_size - tst W, #chunk_size - beq 1f -.if dst_aligned_flag != 0 - pixst_a chunk_size, dst_w_bpp, dst_w_basereg, DST_W -.else - pixst chunk_size, dst_w_bpp, dst_w_basereg, DST_W -.endif -1: -.endif -.endr -2: -.endm - -/* - * Macro, which performs all the needed operations to switch to the next - * scanline and start the next loop iteration unless all the scanlines - * are already processed. - */ -.macro advance_to_next_scanline start_of_loop_label -.if regs_shortage - ldrd W, [sp] /* load W and H (width and height) from stack */ -.else - mov W, ORIG_W -.endif - add DST_W, DST_W, DST_STRIDE, lsl #dst_bpp_shift -.if src_bpp != 0 - add SRC, SRC, SRC_STRIDE, lsl #src_bpp_shift -.endif -.if mask_bpp != 0 - add MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift -.endif -.if (dst_w_bpp != 24) - sub DST_W, DST_W, W, lsl #dst_bpp_shift -.endif -.if (src_bpp != 24) && (src_bpp != 0) - sub SRC, SRC, W, lsl #src_bpp_shift -.endif -.if (mask_bpp != 24) && (mask_bpp != 0) - sub MASK, MASK, W, lsl #mask_bpp_shift -.endif - subs H, H, #1 - mov DST_R, DST_W -.if regs_shortage - str H, [sp, #4] /* save updated height to stack */ -.endif - bge start_of_loop_label -.endm - -/* - * Registers are allocated in the following way by default: - * d0, d1, d2, d3 - reserved for loading source pixel data - * d4, d5, d6, d7 - reserved for loading destination pixel data - * d24, d25, d26, d27 - reserved for loading mask pixel data - * d28, d29, d30, d31 - final destination pixel data for writeback to memory - */ -.macro generate_composite_function fname, \ - src_bpp_, \ - mask_bpp_, \ - dst_w_bpp_, \ - flags, \ - pixblock_size_, \ - prefetch_distance, \ - init, \ - cleanup, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head, \ - dst_w_basereg_ = 28, \ - dst_r_basereg_ = 4, \ - src_basereg_ = 0, \ - mask_basereg_ = 24 - - pixman_asm_function fname - - push {r4-r12, lr} /* save all registers */ - -/* - * Select prefetch type for this function. If prefetch distance is - * set to 0 or one of the color formats is 24bpp, SIMPLE prefetch - * has to be used instead of ADVANCED. - */ - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_DEFAULT -.if prefetch_distance == 0 - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE -.elseif (PREFETCH_TYPE_CURRENT > PREFETCH_TYPE_SIMPLE) && \ - ((src_bpp_ == 24) || (mask_bpp_ == 24) || (dst_w_bpp_ == 24)) - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_SIMPLE -.endif - -/* - * Make some macro arguments globally visible and accessible - * from other macros - */ - .set src_bpp, src_bpp_ - .set mask_bpp, mask_bpp_ - .set dst_w_bpp, dst_w_bpp_ - .set pixblock_size, pixblock_size_ - .set dst_w_basereg, dst_w_basereg_ - .set dst_r_basereg, dst_r_basereg_ - .set src_basereg, src_basereg_ - .set mask_basereg, mask_basereg_ - - .macro pixld_src x:vararg - pixld x - .endm - .macro fetch_src_pixblock - pixld_src pixblock_size, src_bpp, \ - (src_basereg - pixblock_size * src_bpp / 64), SRC - .endm -/* - * Assign symbolic names to registers - */ - W .req r0 /* width (is updated during processing) */ - H .req r1 /* height (is updated during processing) */ - DST_W .req r2 /* destination buffer pointer for writes */ - DST_STRIDE .req r3 /* destination image stride */ - SRC .req r4 /* source buffer pointer */ - SRC_STRIDE .req r5 /* source image stride */ - DST_R .req r6 /* destination buffer pointer for reads */ - - MASK .req r7 /* mask pointer */ - MASK_STRIDE .req r8 /* mask stride */ - - PF_CTL .req r9 /* combined lines counter and prefetch */ - /* distance increment counter */ - PF_X .req r10 /* pixel index in a scanline for current */ - /* pretetch position */ - PF_SRC .req r11 /* pointer to source scanline start */ - /* for prefetch purposes */ - PF_DST .req r12 /* pointer to destination scanline start */ - /* for prefetch purposes */ - PF_MASK .req r14 /* pointer to mask scanline start */ - /* for prefetch purposes */ -/* - * Check whether we have enough registers for all the local variables. - * If we don't have enough registers, original width and height are - * kept on top of stack (and 'regs_shortage' variable is set to indicate - * this for the rest of code). Even if there are enough registers, the - * allocation scheme may be a bit different depending on whether source - * or mask is not used. - */ -.if (PREFETCH_TYPE_CURRENT < PREFETCH_TYPE_ADVANCED) - ORIG_W .req r10 /* saved original width */ - DUMMY .req r12 /* temporary register */ - .set regs_shortage, 0 -.elseif mask_bpp == 0 - ORIG_W .req r7 /* saved original width */ - DUMMY .req r8 /* temporary register */ - .set regs_shortage, 0 -.elseif src_bpp == 0 - ORIG_W .req r4 /* saved original width */ - DUMMY .req r5 /* temporary register */ - .set regs_shortage, 0 -.else - ORIG_W .req r1 /* saved original width */ - DUMMY .req r1 /* temporary register */ - .set regs_shortage, 1 -.endif - - .set mask_bpp_shift, -1 -.if src_bpp == 32 - .set src_bpp_shift, 2 -.elseif src_bpp == 24 - .set src_bpp_shift, 0 -.elseif src_bpp == 16 - .set src_bpp_shift, 1 -.elseif src_bpp == 8 - .set src_bpp_shift, 0 -.elseif src_bpp == 0 - .set src_bpp_shift, -1 -.else - .error "requested src bpp (src_bpp) is not supported" -.endif -.if mask_bpp == 32 - .set mask_bpp_shift, 2 -.elseif mask_bpp == 24 - .set mask_bpp_shift, 0 -.elseif mask_bpp == 8 - .set mask_bpp_shift, 0 -.elseif mask_bpp == 0 - .set mask_bpp_shift, -1 -.else - .error "requested mask bpp (mask_bpp) is not supported" -.endif -.if dst_w_bpp == 32 - .set dst_bpp_shift, 2 -.elseif dst_w_bpp == 24 - .set dst_bpp_shift, 0 -.elseif dst_w_bpp == 16 - .set dst_bpp_shift, 1 -.elseif dst_w_bpp == 8 - .set dst_bpp_shift, 0 -.else - .error "requested dst bpp (dst_w_bpp) is not supported" -.endif - -.if (((flags) & FLAG_DST_READWRITE) != 0) - .set dst_r_bpp, dst_w_bpp -.else - .set dst_r_bpp, 0 -.endif -.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0) - .set DEINTERLEAVE_32BPP_ENABLED, 1 -.else - .set DEINTERLEAVE_32BPP_ENABLED, 0 -.endif - -.if prefetch_distance < 0 || prefetch_distance > 15 - .error "invalid prefetch distance (prefetch_distance)" -.endif - -.if src_bpp > 0 - ldr SRC, [sp, #40] -.endif -.if mask_bpp > 0 - ldr MASK, [sp, #48] -.endif - PF mov PF_X, #0 -.if src_bpp > 0 - ldr SRC_STRIDE, [sp, #44] -.endif -.if mask_bpp > 0 - ldr MASK_STRIDE, [sp, #52] -.endif - mov DST_R, DST_W - -.if src_bpp == 24 - sub SRC_STRIDE, SRC_STRIDE, W - sub SRC_STRIDE, SRC_STRIDE, W, lsl #1 -.endif -.if mask_bpp == 24 - sub MASK_STRIDE, MASK_STRIDE, W - sub MASK_STRIDE, MASK_STRIDE, W, lsl #1 -.endif -.if dst_w_bpp == 24 - sub DST_STRIDE, DST_STRIDE, W - sub DST_STRIDE, DST_STRIDE, W, lsl #1 -.endif - -/* - * Setup advanced prefetcher initial state - */ - PF mov PF_SRC, SRC - PF mov PF_DST, DST_R - PF mov PF_MASK, MASK - /* PF_CTL = prefetch_distance | ((h - 1) << 4) */ - PF mov PF_CTL, H, lsl #4 - PF add PF_CTL, #(prefetch_distance - 0x10) - - init -.if regs_shortage - push {r0, r1} -.endif - subs H, H, #1 -.if regs_shortage - str H, [sp, #4] /* save updated height to stack */ -.else - mov ORIG_W, W -.endif - blt 9f - cmp W, #(pixblock_size * 2) - blt 8f -/* - * This is the start of the pipelined loop, which if optimized for - * long scanlines - */ -0: - ensure_destination_ptr_alignment process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - - /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */ - pixld_a pixblock_size, dst_r_bpp, \ - (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R - fetch_src_pixblock - pixld pixblock_size, mask_bpp, \ - (mask_basereg - pixblock_size * mask_bpp / 64), MASK - PF add PF_X, PF_X, #pixblock_size - process_pixblock_head - cache_preload 0, pixblock_size - cache_preload_simple - subs W, W, #(pixblock_size * 2) - blt 2f -1: - process_pixblock_tail_head - cache_preload_simple - subs W, W, #pixblock_size - bge 1b -2: - process_pixblock_tail - pixst_a pixblock_size, dst_w_bpp, \ - (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W - - /* Process the remaining trailing pixels in the scanline */ - process_trailing_pixels 1, 1, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - advance_to_next_scanline 0b - -.if regs_shortage - pop {r0, r1} -.endif - cleanup - pop {r4-r12, pc} /* exit */ -/* - * This is the start of the loop, designed to process images with small width - * (less than pixblock_size * 2 pixels). In this case neither pipelining - * nor prefetch are used. - */ -8: - /* Process exactly pixblock_size pixels if needed */ - tst W, #pixblock_size - beq 1f - pixld pixblock_size, dst_r_bpp, \ - (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R - fetch_src_pixblock - pixld pixblock_size, mask_bpp, \ - (mask_basereg - pixblock_size * mask_bpp / 64), MASK - process_pixblock_head - process_pixblock_tail - pixst pixblock_size, dst_w_bpp, \ - (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W -1: - /* Process the remaining trailing pixels in the scanline */ - process_trailing_pixels 0, 0, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - advance_to_next_scanline 8b -9: -.if regs_shortage - pop {r0, r1} -.endif - cleanup - pop {r4-r12, pc} /* exit */ - - .purgem fetch_src_pixblock - .purgem pixld_src - - .unreq SRC - .unreq MASK - .unreq DST_R - .unreq DST_W - .unreq ORIG_W - .unreq W - .unreq H - .unreq SRC_STRIDE - .unreq DST_STRIDE - .unreq MASK_STRIDE - .unreq PF_CTL - .unreq PF_X - .unreq PF_SRC - .unreq PF_DST - .unreq PF_MASK - .unreq DUMMY - .endfunc -.endm - -/* - * A simplified variant of function generation template for a single - * scanline processing (for implementing pixman combine functions) - */ -.macro generate_composite_function_scanline use_nearest_scaling, \ - fname, \ - src_bpp_, \ - mask_bpp_, \ - dst_w_bpp_, \ - flags, \ - pixblock_size_, \ - init, \ - cleanup, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head, \ - dst_w_basereg_ = 28, \ - dst_r_basereg_ = 4, \ - src_basereg_ = 0, \ - mask_basereg_ = 24 - - pixman_asm_function fname - - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE -/* - * Make some macro arguments globally visible and accessible - * from other macros - */ - .set src_bpp, src_bpp_ - .set mask_bpp, mask_bpp_ - .set dst_w_bpp, dst_w_bpp_ - .set pixblock_size, pixblock_size_ - .set dst_w_basereg, dst_w_basereg_ - .set dst_r_basereg, dst_r_basereg_ - .set src_basereg, src_basereg_ - .set mask_basereg, mask_basereg_ - -.if use_nearest_scaling != 0 - /* - * Assign symbolic names to registers for nearest scaling - */ - W .req r0 - DST_W .req r1 - SRC .req r2 - VX .req r3 - UNIT_X .req ip - MASK .req lr - TMP1 .req r4 - TMP2 .req r5 - DST_R .req r6 - SRC_WIDTH_FIXED .req r7 - - .macro pixld_src x:vararg - pixld_s x - .endm - - ldr UNIT_X, [sp] - push {r4-r8, lr} - ldr SRC_WIDTH_FIXED, [sp, #(24 + 4)] - .if mask_bpp != 0 - ldr MASK, [sp, #(24 + 8)] - .endif -.else - /* - * Assign symbolic names to registers - */ - W .req r0 /* width (is updated during processing) */ - DST_W .req r1 /* destination buffer pointer for writes */ - SRC .req r2 /* source buffer pointer */ - DST_R .req ip /* destination buffer pointer for reads */ - MASK .req r3 /* mask pointer */ - - .macro pixld_src x:vararg - pixld x - .endm -.endif - -.if (((flags) & FLAG_DST_READWRITE) != 0) - .set dst_r_bpp, dst_w_bpp -.else - .set dst_r_bpp, 0 -.endif -.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0) - .set DEINTERLEAVE_32BPP_ENABLED, 1 -.else - .set DEINTERLEAVE_32BPP_ENABLED, 0 -.endif - - .macro fetch_src_pixblock - pixld_src pixblock_size, src_bpp, \ - (src_basereg - pixblock_size * src_bpp / 64), SRC - .endm - - init - mov DST_R, DST_W - - cmp W, #pixblock_size - blt 8f - - ensure_destination_ptr_alignment process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - - subs W, W, #pixblock_size - blt 7f - - /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */ - pixld_a pixblock_size, dst_r_bpp, \ - (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R - fetch_src_pixblock - pixld pixblock_size, mask_bpp, \ - (mask_basereg - pixblock_size * mask_bpp / 64), MASK - process_pixblock_head - subs W, W, #pixblock_size - blt 2f -1: - process_pixblock_tail_head - subs W, W, #pixblock_size - bge 1b -2: - process_pixblock_tail - pixst_a pixblock_size, dst_w_bpp, \ - (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W -7: - /* Process the remaining trailing pixels in the scanline (dst aligned) */ - process_trailing_pixels 0, 1, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - - cleanup -.if use_nearest_scaling != 0 - pop {r4-r8, pc} /* exit */ -.else - bx lr /* exit */ -.endif -8: - /* Process the remaining trailing pixels in the scanline (dst unaligned) */ - process_trailing_pixels 0, 0, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - - cleanup - -.if use_nearest_scaling != 0 - pop {r4-r8, pc} /* exit */ - - .unreq DST_R - .unreq SRC - .unreq W - .unreq VX - .unreq UNIT_X - .unreq TMP1 - .unreq TMP2 - .unreq DST_W - .unreq MASK - .unreq SRC_WIDTH_FIXED - -.else - bx lr /* exit */ - - .unreq SRC - .unreq MASK - .unreq DST_R - .unreq DST_W - .unreq W -.endif - - .purgem fetch_src_pixblock - .purgem pixld_src - - .endfunc -.endm - -.macro generate_composite_function_single_scanline x:vararg - generate_composite_function_scanline 0, x -.endm - -.macro generate_composite_function_nearest_scanline x:vararg - generate_composite_function_scanline 1, x -.endm - -/* Default prologue/epilogue, nothing special needs to be done */ - -.macro default_init -.endm - -.macro default_cleanup -.endm - -/* - * Prologue/epilogue variant which additionally saves/restores d8-d15 - * registers (they need to be saved/restored by callee according to ABI). - * This is required if the code needs to use all the NEON registers. - */ - -.macro default_init_need_all_regs - vpush {d8-d15} -.endm - -.macro default_cleanup_need_all_regs - vpop {d8-d15} -.endm - -/******************************************************************************/ diff --git a/src/vector/pixman/CMakeLists.txt b/src/vector/pixman/CMakeLists.txt new file mode 100644 index 0000000..d38a094 --- /dev/null +++ b/src/vector/pixman/CMakeLists.txt @@ -0,0 +1,17 @@ +target_sources(lottie-player + PRIVATE + "${CMAKE_CURRENT_LIST_DIR}/vregion.cpp" + ) + +IF("${ARCH}" STREQUAL "arm") +SET(CMAKE_ASM_FLAGS "${CFLAGS} -x assembler-with-cpp") +target_sources(lottie-player + PRIVATE + "${CMAKE_CURRENT_LIST_DIR}/pixman-arm-neon-asm.S" + ) +ENDIF("${ARCH}" STREQUAL "arm") + +target_include_directories(lottie-player + PRIVATE + "${CMAKE_CURRENT_LIST_DIR}" + ) diff --git a/src/vector/pixman/meson.build b/src/vector/pixman/meson.build new file mode 100644 index 0000000..c45d81a --- /dev/null +++ b/src/vector/pixman/meson.build @@ -0,0 +1,7 @@ + +source_file = files('vregion.cpp') + +pixman_dep = declare_dependency( + include_directories : include_directories('.'), + sources : source_file + ) diff --git a/src/vector/pixman/pixman-arm-neon-asm.S b/src/vector/pixman/pixman-arm-neon-asm.S new file mode 100644 index 0000000..f2203c2 --- /dev/null +++ b/src/vector/pixman/pixman-arm-neon-asm.S @@ -0,0 +1,497 @@ +/* + * Copyright © 2009 Nokia Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) + */ + +/* + * This file contains implementations of NEON optimized pixel processing + * functions. There is no full and detailed tutorial, but some functions + * (those which are exposing some new or interesting features) are + * extensively commented and can be used as examples. + * + * You may want to have a look at the comments for following functions: + * - pixman_composite_over_8888_0565_asm_neon + * - pixman_composite_over_n_8_0565_asm_neon + */ + +/* Prevent the stack from becoming executable for no reason... */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + + .text + .fpu neon + .arch armv7a + .object_arch armv4 + .eabi_attribute 10, 0 /* suppress Tag_FP_arch */ + .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */ + .arm + .altmacro + .p2align 2 + + +//#include "pixman-arm-asm.h" +/* Supplementary macro for setting function attributes */ +.macro pixman_asm_function fname + .func fname + .global fname +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif +fname: +.endm + +//#include "pixman-private.h" +/* + * The defines which are shared between C and assembly code + */ + +/* bilinear interpolation precision (must be < 8) */ +#define BILINEAR_INTERPOLATION_BITS 7 +#define BILINEAR_INTERPOLATION_RANGE (1 << BILINEAR_INTERPOLATION_BITS) + +#include "pixman-arm-neon-asm.h" + +/* Global configuration options and preferences */ + +/* + * The code can optionally make use of unaligned memory accesses to improve + * performance of handling leading/trailing pixels for each scanline. + * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for + * example in linux if unaligned memory accesses are not configured to + * generate.exceptions. + */ +.set RESPECT_STRICT_ALIGNMENT, 1 + +/* + * Set default prefetch type. There is a choice between the following options: + * + * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work + * as NOP to workaround some HW bugs or for whatever other reason) + * + * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where + * advanced prefetch intruduces heavy overhead) + * + * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8 + * which can run ARM and NEON instructions simultaneously so that extra ARM + * instructions do not add (many) extra cycles, but improve prefetch efficiency) + * + * Note: some types of function can't support advanced prefetch and fallback + * to simple one (those which handle 24bpp pixels) + */ +.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED + +/* Prefetch distance in pixels for simple prefetch */ +.set PREFETCH_DISTANCE_SIMPLE, 64 + +/* + * Implementation of pixman_composite_over_8888_0565_asm_neon + * + * This function takes a8r8g8b8 source buffer, r5g6b5 destination buffer and + * performs OVER compositing operation. Function fast_composite_over_8888_0565 + * from pixman-fast-path.c does the same in C and can be used as a reference. + * + * First we need to have some NEON assembly code which can do the actual + * operation on the pixels and provide it to the template macro. + * + * Template macro quite conveniently takes care of emitting all the necessary + * code for memory reading and writing (including quite tricky cases of + * handling unaligned leading/trailing pixels), so we only need to deal with + * the data in NEON registers. + * + * NEON registers allocation in general is recommented to be the following: + * d0, d1, d2, d3 - contain loaded source pixel data + * d4, d5, d6, d7 - contain loaded destination pixels (if they are needed) + * d24, d25, d26, d27 - contain loading mask pixel data (if mask is used) + * d28, d29, d30, d31 - place for storing the result (destination pixels) + * + * As can be seen above, four 64-bit NEON registers are used for keeping + * intermediate pixel data and up to 8 pixels can be processed in one step + * for 32bpp formats (16 pixels for 16bpp, 32 pixels for 8bpp). + * + * This particular function uses the following registers allocation: + * d0, d1, d2, d3 - contain loaded source pixel data + * d4, d5 - contain loaded destination pixels (they are needed) + * d28, d29 - place for storing the result (destination pixels) + */ + +/* + * Step one. We need to have some code to do some arithmetics on pixel data. + * This is implemented as a pair of macros: '*_head' and '*_tail'. When used + * back-to-back, they take pixel data from {d0, d1, d2, d3} and {d4, d5}, + * perform all the needed calculations and write the result to {d28, d29}. + * The rationale for having two macros and not just one will be explained + * later. In practice, any single monolitic function which does the work can + * be split into two parts in any arbitrary way without affecting correctness. + * + * There is one special trick here too. Common template macro can optionally + * make our life a bit easier by doing R, G, B, A color components + * deinterleaving for 32bpp pixel formats (and this feature is used in + * 'pixman_composite_over_8888_0565_asm_neon' function). So it means that + * instead of having 8 packed pixels in {d0, d1, d2, d3} registers, we + * actually use d0 register for blue channel (a vector of eight 8-bit + * values), d1 register for green, d2 for red and d3 for alpha. This + * simple conversion can be also done with a few NEON instructions: + * + * Packed to planar conversion: + * vuzp.8 d0, d1 + * vuzp.8 d2, d3 + * vuzp.8 d1, d3 + * vuzp.8 d0, d2 + * + * Planar to packed conversion: + * vzip.8 d0, d2 + * vzip.8 d1, d3 + * vzip.8 d2, d3 + * vzip.8 d0, d1 + * + * But pixel can be loaded directly in planar format using VLD4.8 NEON + * instruction. It is 1 cycle slower than VLD1.32, so this is not always + * desirable, that's why deinterleaving is optional. + * + * But anyway, here is the code: + */ + +/* + * OK, now we got almost everything that we need. Using the above two + * macros, the work can be done right. But now we want to optimize + * it a bit. ARM Cortex-A8 is an in-order core, and benefits really + * a lot from good code scheduling and software pipelining. + * + * Let's construct some code, which will run in the core main loop. + * Some pseudo-code of the main loop will look like this: + * head + * while (...) { + * tail + * head + * } + * tail + * + * It may look a bit weird, but this setup allows to hide instruction + * latencies better and also utilize dual-issue capability more + * efficiently (make pairs of load-store and ALU instructions). + * + * So what we need now is a '*_tail_head' macro, which will be used + * in the core main loop. A trivial straightforward implementation + * of this macro would look like this: + * + * pixman_composite_over_8888_0565_process_pixblock_tail + * vst1.16 {d28, d29}, [DST_W, :128]! + * vld1.16 {d4, d5}, [DST_R, :128]! + * vld4.32 {d0, d1, d2, d3}, [SRC]! + * pixman_composite_over_8888_0565_process_pixblock_head + * cache_preload 8, 8 + * + * Now it also got some VLD/VST instructions. We simply can't move from + * processing one block of pixels to the other one with just arithmetics. + * The previously processed data needs to be written to memory and new + * data needs to be fetched. Fortunately, this main loop does not deal + * with partial leading/trailing pixels and can load/store a full block + * of pixels in a bulk. Additionally, destination buffer is already + * 16 bytes aligned here (which is good for performance). + * + * New things here are DST_R, DST_W, SRC and MASK identifiers. These + * are the aliases for ARM registers which are used as pointers for + * accessing data. We maintain separate pointers for reading and writing + * destination buffer (DST_R and DST_W). + * + * Another new thing is 'cache_preload' macro. It is used for prefetching + * data into CPU L2 cache and improve performance when dealing with large + * images which are far larger than cache size. It uses one argument + * (actually two, but they need to be the same here) - number of pixels + * in a block. Looking into 'pixman-arm-neon-asm.h' can provide some + * details about this macro. Moreover, if good performance is needed + * the code from this macro needs to be copied into '*_tail_head' macro + * and mixed with the rest of code for optimal instructions scheduling. + * We are actually doing it below. + * + * Now after all the explanations, here is the optimized code. + * Different instruction streams (originaling from '*_head', '*_tail' + * and 'cache_preload' macro) use different indentation levels for + * better readability. Actually taking the code from one of these + * indentation levels and ignoring a few VLD/VST instructions would + * result in exactly the code from '*_head', '*_tail' or 'cache_preload' + * macro! + */ + +/* + * And now the final part. We are using 'generate_composite_function' macro + * to put all the stuff together. We are specifying the name of the function + * which we want to get, number of bits per pixel for the source, mask and + * destination (0 if unused, like mask in this case). Next come some bit + * flags: + * FLAG_DST_READWRITE - tells that the destination buffer is both read + * and written, for write-only buffer we would use + * FLAG_DST_WRITEONLY flag instead + * FLAG_DEINTERLEAVE_32BPP - tells that we prefer to work with planar data + * and separate color channels for 32bpp format. + * The next things are: + * - the number of pixels processed per iteration (8 in this case, because + * that's the maximum what can fit into four 64-bit NEON registers). + * - prefetch distance, measured in pixel blocks. In this case it is 5 times + * by 8 pixels. That would be 40 pixels, or up to 160 bytes. Optimal + * prefetch distance can be selected by running some benchmarks. + * + * After that we specify some macros, these are 'default_init', + * 'default_cleanup' here which are empty (but it is possible to have custom + * init/cleanup macros to be able to save/restore some extra NEON registers + * like d8-d15 or do anything else) followed by + * 'pixman_composite_over_8888_0565_process_pixblock_head', + * 'pixman_composite_over_8888_0565_process_pixblock_tail' and + * 'pixman_composite_over_8888_0565_process_pixblock_tail_head' + * which we got implemented above. + * + * The last part is the NEON registers allocation scheme. + */ + +/******************************************************************************/ + +/******************************************************************************/ + .macro pixman_composite_out_reverse_8888_8888_process_pixblock_head + vmvn.8 d24, d3 /* get inverted alpha */ + /* do alpha blending */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 + vmull.u8 q11, d24, d7 + .endm + + .macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + .endm + +/******************************************************************************/ + +.macro pixman_composite_over_8888_8888_process_pixblock_head + pixman_composite_out_reverse_8888_8888_process_pixblock_head +.endm + +.macro pixman_composite_over_8888_8888_process_pixblock_tail + pixman_composite_out_reverse_8888_8888_process_pixblock_tail + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +.macro pixman_composite_over_8888_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrshr.u16 q14, q8, #8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + PF cmp PF_X, ORIG_W + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 + fetch_src_pixblock + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + vmvn.8 d22, d3 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q8, d22, d4 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q9, d22, d5 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + vmull.u8 q10, d22, d6 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vmull.u8 q11, d22, d7 +.endm + +generate_composite_function \ + pixman_composite_over_8888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_process_pixblock_tail_head + +generate_composite_function_single_scanline \ + pixman_composite_scanline_over_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_n_8888_process_pixblock_head + /* deinterleaved source pixels in {d0, d1, d2, d3} */ + /* inverted alpha in {d24} */ + /* destination pixels in {d4, d5, d6, d7} */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 + vmull.u8 q11, d24, d7 +.endm + +.macro pixman_composite_over_n_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q2, q10, #8 + vrshr.u16 q3, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q2, q10 + vraddhn.u16 d31, q3, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +.macro pixman_composite_over_n_8888_process_pixblock_tail_head + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q2, q10, #8 + vrshr.u16 q3, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q2, q10 + vraddhn.u16 d31, q3, q11 + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vqadd.u8 q14, q0, q14 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0x0F + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vqadd.u8 q15, q1, q15 + PF cmp PF_X, ORIG_W + vmull.u8 q8, d24, d4 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vmull.u8 q9, d24, d5 + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q10, d24, d6 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q11, d24, d7 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +.macro pixman_composite_over_n_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d0, d3[0] + vdup.8 d1, d3[1] + vdup.8 d2, d3[2] + vdup.8 d3, d3[3] + vmvn.8 d24, d3 /* get inverted alpha */ +.endm + +generate_composite_function \ + pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8888_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_n_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_src_n_8888_process_pixblock_head +.endm + +.macro pixman_composite_src_n_8888_process_pixblock_tail +.endm + +.macro pixman_composite_src_n_8888_process_pixblock_tail_head + vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! +.endm + +.macro pixman_composite_src_n_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d0[0]}, [DUMMY] + vsli.u64 d0, d0, #32 + vorr d1, d0, d0 + vorr q1, q0, q0 +.endm + +.macro pixman_composite_src_n_8888_cleanup +.endm + +generate_composite_function \ + pixman_composite_src_n_8888_asm_neon, 0, 0, 32, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 0, /* prefetch distance */ \ + pixman_composite_src_n_8888_init, \ + pixman_composite_src_n_8888_cleanup, \ + pixman_composite_src_n_8888_process_pixblock_head, \ + pixman_composite_src_n_8888_process_pixblock_tail, \ + pixman_composite_src_n_8888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_8888_8888_process_pixblock_head +.endm + +.macro pixman_composite_src_8888_8888_process_pixblock_tail +.endm + +.macro pixman_composite_src_8888_8888_process_pixblock_tail_head + vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! + fetch_src_pixblock + cache_preload 8, 8 +.endm + +generate_composite_function \ + pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_8888_8888_process_pixblock_head, \ + pixman_composite_src_8888_8888_process_pixblock_tail, \ + pixman_composite_src_8888_8888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ diff --git a/src/vector/pixman/pixman-arm-neon-asm.h b/src/vector/pixman/pixman-arm-neon-asm.h new file mode 100644 index 0000000..6add220 --- /dev/null +++ b/src/vector/pixman/pixman-arm-neon-asm.h @@ -0,0 +1,1126 @@ +/* + * Copyright © 2009 Nokia Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) + */ + +/* + * This file contains a macro ('generate_composite_function') which can + * construct 2D image processing functions, based on a common template. + * Any combinations of source, destination and mask images with 8bpp, + * 16bpp, 24bpp, 32bpp color formats are supported. + * + * This macro takes care of: + * - handling of leading and trailing unaligned pixels + * - doing most of the work related to L2 cache preload + * - encourages the use of software pipelining for better instructions + * scheduling + * + * The user of this macro has to provide some configuration parameters + * (bit depths for the images, prefetch distance, etc.) and a set of + * macros, which should implement basic code chunks responsible for + * pixels processing. See 'pixman-arm-neon-asm.S' file for the usage + * examples. + * + * TODO: + * - try overlapped pixel method (from Ian Rickards) when processing + * exactly two blocks of pixels + * - maybe add an option to do reverse scanline processing + */ + +/* + * Bit flags for 'generate_composite_function' macro which are used + * to tune generated functions behavior. + */ +.set FLAG_DST_WRITEONLY, 0 +.set FLAG_DST_READWRITE, 1 +.set FLAG_DEINTERLEAVE_32BPP, 2 + +/* + * Offset in stack where mask and source pointer/stride can be accessed + * from 'init' macro. This is useful for doing special handling for solid mask. + */ +.set ARGS_STACK_OFFSET, 40 + +/* + * Constants for selecting preferable prefetch type. + */ +.set PREFETCH_TYPE_NONE, 0 /* No prefetch at all */ +.set PREFETCH_TYPE_SIMPLE, 1 /* A simple, fixed-distance-ahead prefetch */ +.set PREFETCH_TYPE_ADVANCED, 2 /* Advanced fine-grained prefetch */ + +/* + * Definitions of supplementary pixld/pixst macros (for partial load/store of + * pixel data). + */ + +.macro pixldst1 op, elem_size, reg1, mem_operand, abits +.if abits > 0 + op&.&elem_size {d®1}, [&mem_operand&, :&abits&]! +.else + op&.&elem_size {d®1}, [&mem_operand&]! +.endif +.endm + +.macro pixldst2 op, elem_size, reg1, reg2, mem_operand, abits +.if abits > 0 + op&.&elem_size {d®1, d®2}, [&mem_operand&, :&abits&]! +.else + op&.&elem_size {d®1, d®2}, [&mem_operand&]! +.endif +.endm + +.macro pixldst4 op, elem_size, reg1, reg2, reg3, reg4, mem_operand, abits +.if abits > 0 + op&.&elem_size {d®1, d®2, d®3, d®4}, [&mem_operand&, :&abits&]! +.else + op&.&elem_size {d®1, d®2, d®3, d®4}, [&mem_operand&]! +.endif +.endm + +.macro pixldst0 op, elem_size, reg1, idx, mem_operand, abits + op&.&elem_size {d®1[idx]}, [&mem_operand&]! +.endm + +.macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand + op&.&elem_size {d®1, d®2, d®3}, [&mem_operand&]! +.endm + +.macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand + op&.&elem_size {d®1[idx], d®2[idx], d®3[idx]}, [&mem_operand&]! +.endm + +.macro pixldst numbytes, op, elem_size, basereg, mem_operand, abits +.if numbytes == 32 + pixldst4 op, elem_size, %(basereg+4), %(basereg+5), \ + %(basereg+6), %(basereg+7), mem_operand, abits +.elseif numbytes == 16 + pixldst2 op, elem_size, %(basereg+2), %(basereg+3), mem_operand, abits +.elseif numbytes == 8 + pixldst1 op, elem_size, %(basereg+1), mem_operand, abits +.elseif numbytes == 4 + .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 32) + pixldst0 op, 32, %(basereg+0), 1, mem_operand, abits + .elseif elem_size == 16 + pixldst0 op, 16, %(basereg+0), 2, mem_operand, abits + pixldst0 op, 16, %(basereg+0), 3, mem_operand, abits + .else + pixldst0 op, 8, %(basereg+0), 4, mem_operand, abits + pixldst0 op, 8, %(basereg+0), 5, mem_operand, abits + pixldst0 op, 8, %(basereg+0), 6, mem_operand, abits + pixldst0 op, 8, %(basereg+0), 7, mem_operand, abits + .endif +.elseif numbytes == 2 + .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 16) + pixldst0 op, 16, %(basereg+0), 1, mem_operand, abits + .else + pixldst0 op, 8, %(basereg+0), 2, mem_operand, abits + pixldst0 op, 8, %(basereg+0), 3, mem_operand, abits + .endif +.elseif numbytes == 1 + pixldst0 op, 8, %(basereg+0), 1, mem_operand, abits +.else + .error "unsupported size: numbytes" +.endif +.endm + +.macro pixld numpix, bpp, basereg, mem_operand, abits=0 +.if bpp > 0 +.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) + pixldst4 vld4, 8, %(basereg+4), %(basereg+5), \ + %(basereg+6), %(basereg+7), mem_operand, abits +.elseif (bpp == 24) && (numpix == 8) + pixldst3 vld3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand +.elseif (bpp == 24) && (numpix == 4) + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand +.elseif (bpp == 24) && (numpix == 2) + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand +.elseif (bpp == 24) && (numpix == 1) + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand +.else + pixldst %(numpix * bpp / 8), vld1, %(bpp), basereg, mem_operand, abits +.endif +.endif +.endm + +.macro pixst numpix, bpp, basereg, mem_operand, abits=0 +.if bpp > 0 +.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) + pixldst4 vst4, 8, %(basereg+4), %(basereg+5), \ + %(basereg+6), %(basereg+7), mem_operand, abits +.elseif (bpp == 24) && (numpix == 8) + pixldst3 vst3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand +.elseif (bpp == 24) && (numpix == 4) + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand +.elseif (bpp == 24) && (numpix == 2) + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand +.elseif (bpp == 24) && (numpix == 1) + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand +.else + pixldst %(numpix * bpp / 8), vst1, %(bpp), basereg, mem_operand, abits +.endif +.endif +.endm + +.macro pixld_a numpix, bpp, basereg, mem_operand +.if (bpp * numpix) <= 128 + pixld numpix, bpp, basereg, mem_operand, %(bpp * numpix) +.else + pixld numpix, bpp, basereg, mem_operand, 128 +.endif +.endm + +.macro pixst_a numpix, bpp, basereg, mem_operand +.if (bpp * numpix) <= 128 + pixst numpix, bpp, basereg, mem_operand, %(bpp * numpix) +.else + pixst numpix, bpp, basereg, mem_operand, 128 +.endif +.endm + +/* + * Pixel fetcher for nearest scaling (needs TMP1, TMP2, VX, UNIT_X register + * aliases to be defined) + */ +.macro pixld1_s elem_size, reg1, mem_operand +.if elem_size == 16 + mov TMP1, VX, asr #16 + adds VX, VX, UNIT_X +5: subpls VX, VX, SRC_WIDTH_FIXED + bpl 5b + add TMP1, mem_operand, TMP1, asl #1 + mov TMP2, VX, asr #16 + adds VX, VX, UNIT_X +5: subpls VX, VX, SRC_WIDTH_FIXED + bpl 5b + add TMP2, mem_operand, TMP2, asl #1 + vld1.16 {d®1&[0]}, [TMP1, :16] + mov TMP1, VX, asr #16 + adds VX, VX, UNIT_X +5: subpls VX, VX, SRC_WIDTH_FIXED + bpl 5b + add TMP1, mem_operand, TMP1, asl #1 + vld1.16 {d®1&[1]}, [TMP2, :16] + mov TMP2, VX, asr #16 + adds VX, VX, UNIT_X +5: subpls VX, VX, SRC_WIDTH_FIXED + bpl 5b + add TMP2, mem_operand, TMP2, asl #1 + vld1.16 {d®1&[2]}, [TMP1, :16] + vld1.16 {d®1&[3]}, [TMP2, :16] +.elseif elem_size == 32 + mov TMP1, VX, asr #16 + adds VX, VX, UNIT_X +5: subpls VX, VX, SRC_WIDTH_FIXED + bpl 5b + add TMP1, mem_operand, TMP1, asl #2 + mov TMP2, VX, asr #16 + adds VX, VX, UNIT_X +5: subpls VX, VX, SRC_WIDTH_FIXED + bpl 5b + add TMP2, mem_operand, TMP2, asl #2 + vld1.32 {d®1&[0]}, [TMP1, :32] + vld1.32 {d®1&[1]}, [TMP2, :32] +.else + .error "unsupported" +.endif +.endm + +.macro pixld2_s elem_size, reg1, reg2, mem_operand +.if 0 /* elem_size == 32 */ + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X, asl #1 + add TMP1, mem_operand, TMP1, asl #2 + mov TMP2, VX, asr #16 + sub VX, VX, UNIT_X + add TMP2, mem_operand, TMP2, asl #2 + vld1.32 {d®1&[0]}, [TMP1, :32] + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X, asl #1 + add TMP1, mem_operand, TMP1, asl #2 + vld1.32 {d®2&[0]}, [TMP2, :32] + mov TMP2, VX, asr #16 + add VX, VX, UNIT_X + add TMP2, mem_operand, TMP2, asl #2 + vld1.32 {d®1&[1]}, [TMP1, :32] + vld1.32 {d®2&[1]}, [TMP2, :32] +.else + pixld1_s elem_size, reg1, mem_operand + pixld1_s elem_size, reg2, mem_operand +.endif +.endm + +.macro pixld0_s elem_size, reg1, idx, mem_operand +.if elem_size == 16 + mov TMP1, VX, asr #16 + adds VX, VX, UNIT_X +5: subpls VX, VX, SRC_WIDTH_FIXED + bpl 5b + add TMP1, mem_operand, TMP1, asl #1 + vld1.16 {d®1&[idx]}, [TMP1, :16] +.elseif elem_size == 32 + mov TMP1, VX, asr #16 + adds VX, VX, UNIT_X +5: subpls VX, VX, SRC_WIDTH_FIXED + bpl 5b + add TMP1, mem_operand, TMP1, asl #2 + vld1.32 {d®1&[idx]}, [TMP1, :32] +.endif +.endm + +.macro pixld_s_internal numbytes, elem_size, basereg, mem_operand +.if numbytes == 32 + pixld2_s elem_size, %(basereg+4), %(basereg+5), mem_operand + pixld2_s elem_size, %(basereg+6), %(basereg+7), mem_operand + pixdeinterleave elem_size, %(basereg+4) +.elseif numbytes == 16 + pixld2_s elem_size, %(basereg+2), %(basereg+3), mem_operand +.elseif numbytes == 8 + pixld1_s elem_size, %(basereg+1), mem_operand +.elseif numbytes == 4 + .if elem_size == 32 + pixld0_s elem_size, %(basereg+0), 1, mem_operand + .elseif elem_size == 16 + pixld0_s elem_size, %(basereg+0), 2, mem_operand + pixld0_s elem_size, %(basereg+0), 3, mem_operand + .else + pixld0_s elem_size, %(basereg+0), 4, mem_operand + pixld0_s elem_size, %(basereg+0), 5, mem_operand + pixld0_s elem_size, %(basereg+0), 6, mem_operand + pixld0_s elem_size, %(basereg+0), 7, mem_operand + .endif +.elseif numbytes == 2 + .if elem_size == 16 + pixld0_s elem_size, %(basereg+0), 1, mem_operand + .else + pixld0_s elem_size, %(basereg+0), 2, mem_operand + pixld0_s elem_size, %(basereg+0), 3, mem_operand + .endif +.elseif numbytes == 1 + pixld0_s elem_size, %(basereg+0), 1, mem_operand +.else + .error "unsupported size: numbytes" +.endif +.endm + +.macro pixld_s numpix, bpp, basereg, mem_operand +.if bpp > 0 + pixld_s_internal %(numpix * bpp / 8), %(bpp), basereg, mem_operand +.endif +.endm + +.macro vuzp8 reg1, reg2 + vuzp.8 d®1, d®2 +.endm + +.macro vzip8 reg1, reg2 + vzip.8 d®1, d®2 +.endm + +/* deinterleave B, G, R, A channels for eight 32bpp pixels in 4 registers */ +.macro pixdeinterleave bpp, basereg +.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0) + vuzp8 %(basereg+0), %(basereg+1) + vuzp8 %(basereg+2), %(basereg+3) + vuzp8 %(basereg+1), %(basereg+3) + vuzp8 %(basereg+0), %(basereg+2) +.endif +.endm + +/* interleave B, G, R, A channels for eight 32bpp pixels in 4 registers */ +.macro pixinterleave bpp, basereg +.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0) + vzip8 %(basereg+0), %(basereg+2) + vzip8 %(basereg+1), %(basereg+3) + vzip8 %(basereg+2), %(basereg+3) + vzip8 %(basereg+0), %(basereg+1) +.endif +.endm + +/* + * This is a macro for implementing cache preload. The main idea is that + * cache preload logic is mostly independent from the rest of pixels + * processing code. It starts at the top left pixel and moves forward + * across pixels and can jump across scanlines. Prefetch distance is + * handled in an 'incremental' way: it starts from 0 and advances to the + * optimal distance over time. After reaching optimal prefetch distance, + * it is kept constant. There are some checks which prevent prefetching + * unneeded pixel lines below the image (but it still can prefetch a bit + * more data on the right side of the image - not a big issue and may + * be actually helpful when rendering text glyphs). Additional trick is + * the use of LDR instruction for prefetch instead of PLD when moving to + * the next line, the point is that we have a high chance of getting TLB + * miss in this case, and PLD would be useless. + * + * This sounds like it may introduce a noticeable overhead (when working with + * fully cached data). But in reality, due to having a separate pipeline and + * instruction queue for NEON unit in ARM Cortex-A8, normal ARM code can + * execute simultaneously with NEON and be completely shadowed by it. Thus + * we get no performance overhead at all (*). This looks like a very nice + * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher, + * but still can implement some rather advanced prefetch logic in software + * for almost zero cost! + * + * (*) The overhead of the prefetcher is visible when running some trivial + * pixels processing like simple copy. Anyway, having prefetch is a must + * when working with the graphics data. + */ +.macro PF a, x:vararg +.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_ADVANCED) + a x +.endif +.endm + +.macro cache_preload std_increment, boost_increment +.if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0) +.if regs_shortage + PF ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */ +.endif +.if std_increment != 0 + PF add PF_X, PF_X, #std_increment +.endif + PF tst PF_CTL, #0xF + PF addne PF_X, PF_X, #boost_increment + PF subne PF_CTL, PF_CTL, #1 + PF cmp PF_X, ORIG_W +.if src_bpp_shift >= 0 + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] +.endif +.if dst_r_bpp != 0 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] +.endif +.if mask_bpp_shift >= 0 + PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] +.endif + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 +.if src_bpp_shift >= 0 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! +.endif +.if dst_r_bpp != 0 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! +.endif +.if mask_bpp_shift >= 0 + PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! +.endif +.endif +.endm + +.macro cache_preload_simple +.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_SIMPLE) +.if src_bpp > 0 + pld [SRC, #(PREFETCH_DISTANCE_SIMPLE * src_bpp / 8)] +.endif +.if dst_r_bpp > 0 + pld [DST_R, #(PREFETCH_DISTANCE_SIMPLE * dst_r_bpp / 8)] +.endif +.if mask_bpp > 0 + pld [MASK, #(PREFETCH_DISTANCE_SIMPLE * mask_bpp / 8)] +.endif +.endif +.endm + +.macro fetch_mask_pixblock + pixld pixblock_size, mask_bpp, \ + (mask_basereg - pixblock_size * mask_bpp / 64), MASK +.endm + +/* + * Macro which is used to process leading pixels until destination + * pointer is properly aligned (at 16 bytes boundary). When destination + * buffer uses 16bpp format, this is unnecessary, or even pointless. + */ +.macro ensure_destination_ptr_alignment process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head +.if dst_w_bpp != 24 + tst DST_R, #0xF + beq 2f + +.irp lowbit, 1, 2, 4, 8, 16 +local skip1 +.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp)) +.if lowbit < 16 /* we don't need more than 16-byte alignment */ + tst DST_R, #lowbit + beq 1f +.endif + pixld_src (lowbit * 8 / dst_w_bpp), src_bpp, src_basereg, SRC + pixld (lowbit * 8 / dst_w_bpp), mask_bpp, mask_basereg, MASK +.if dst_r_bpp > 0 + pixld_a (lowbit * 8 / dst_r_bpp), dst_r_bpp, dst_r_basereg, DST_R +.else + add DST_R, DST_R, #lowbit +.endif + PF add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp) + sub W, W, #(lowbit * 8 / dst_w_bpp) +1: +.endif +.endr + pixdeinterleave src_bpp, src_basereg + pixdeinterleave mask_bpp, mask_basereg + pixdeinterleave dst_r_bpp, dst_r_basereg + + process_pixblock_head + cache_preload 0, pixblock_size + cache_preload_simple + process_pixblock_tail + + pixinterleave dst_w_bpp, dst_w_basereg +.irp lowbit, 1, 2, 4, 8, 16 +.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp)) +.if lowbit < 16 /* we don't need more than 16-byte alignment */ + tst DST_W, #lowbit + beq 1f +.endif + pixst_a (lowbit * 8 / dst_w_bpp), dst_w_bpp, dst_w_basereg, DST_W +1: +.endif +.endr +.endif +2: +.endm + +/* + * Special code for processing up to (pixblock_size - 1) remaining + * trailing pixels. As SIMD processing performs operation on + * pixblock_size pixels, anything smaller than this has to be loaded + * and stored in a special way. Loading and storing of pixel data is + * performed in such a way that we fill some 'slots' in the NEON + * registers (some slots naturally are unused), then perform compositing + * operation as usual. In the end, the data is taken from these 'slots' + * and saved to memory. + * + * cache_preload_flag - allows to suppress prefetch if + * set to 0 + * dst_aligned_flag - selects whether destination buffer + * is aligned + */ +.macro process_trailing_pixels cache_preload_flag, \ + dst_aligned_flag, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + tst W, #(pixblock_size - 1) + beq 2f +.irp chunk_size, 16, 8, 4, 2, 1 +.if pixblock_size > chunk_size + tst W, #chunk_size + beq 1f + pixld_src chunk_size, src_bpp, src_basereg, SRC + pixld chunk_size, mask_bpp, mask_basereg, MASK +.if dst_aligned_flag != 0 + pixld_a chunk_size, dst_r_bpp, dst_r_basereg, DST_R +.else + pixld chunk_size, dst_r_bpp, dst_r_basereg, DST_R +.endif +.if cache_preload_flag != 0 + PF add PF_X, PF_X, #chunk_size +.endif +1: +.endif +.endr + pixdeinterleave src_bpp, src_basereg + pixdeinterleave mask_bpp, mask_basereg + pixdeinterleave dst_r_bpp, dst_r_basereg + + process_pixblock_head +.if cache_preload_flag != 0 + cache_preload 0, pixblock_size + cache_preload_simple +.endif + process_pixblock_tail + pixinterleave dst_w_bpp, dst_w_basereg +.irp chunk_size, 16, 8, 4, 2, 1 +.if pixblock_size > chunk_size + tst W, #chunk_size + beq 1f +.if dst_aligned_flag != 0 + pixst_a chunk_size, dst_w_bpp, dst_w_basereg, DST_W +.else + pixst chunk_size, dst_w_bpp, dst_w_basereg, DST_W +.endif +1: +.endif +.endr +2: +.endm + +/* + * Macro, which performs all the needed operations to switch to the next + * scanline and start the next loop iteration unless all the scanlines + * are already processed. + */ +.macro advance_to_next_scanline start_of_loop_label +.if regs_shortage + ldrd W, [sp] /* load W and H (width and height) from stack */ +.else + mov W, ORIG_W +.endif + add DST_W, DST_W, DST_STRIDE, lsl #dst_bpp_shift +.if src_bpp != 0 + add SRC, SRC, SRC_STRIDE, lsl #src_bpp_shift +.endif +.if mask_bpp != 0 + add MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift +.endif +.if (dst_w_bpp != 24) + sub DST_W, DST_W, W, lsl #dst_bpp_shift +.endif +.if (src_bpp != 24) && (src_bpp != 0) + sub SRC, SRC, W, lsl #src_bpp_shift +.endif +.if (mask_bpp != 24) && (mask_bpp != 0) + sub MASK, MASK, W, lsl #mask_bpp_shift +.endif + subs H, H, #1 + mov DST_R, DST_W +.if regs_shortage + str H, [sp, #4] /* save updated height to stack */ +.endif + bge start_of_loop_label +.endm + +/* + * Registers are allocated in the following way by default: + * d0, d1, d2, d3 - reserved for loading source pixel data + * d4, d5, d6, d7 - reserved for loading destination pixel data + * d24, d25, d26, d27 - reserved for loading mask pixel data + * d28, d29, d30, d31 - final destination pixel data for writeback to memory + */ +.macro generate_composite_function fname, \ + src_bpp_, \ + mask_bpp_, \ + dst_w_bpp_, \ + flags, \ + pixblock_size_, \ + prefetch_distance, \ + init, \ + cleanup, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head, \ + dst_w_basereg_ = 28, \ + dst_r_basereg_ = 4, \ + src_basereg_ = 0, \ + mask_basereg_ = 24 + + pixman_asm_function fname + + push {r4-r12, lr} /* save all registers */ + +/* + * Select prefetch type for this function. If prefetch distance is + * set to 0 or one of the color formats is 24bpp, SIMPLE prefetch + * has to be used instead of ADVANCED. + */ + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_DEFAULT +.if prefetch_distance == 0 + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE +.elseif (PREFETCH_TYPE_CURRENT > PREFETCH_TYPE_SIMPLE) && \ + ((src_bpp_ == 24) || (mask_bpp_ == 24) || (dst_w_bpp_ == 24)) + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_SIMPLE +.endif + +/* + * Make some macro arguments globally visible and accessible + * from other macros + */ + .set src_bpp, src_bpp_ + .set mask_bpp, mask_bpp_ + .set dst_w_bpp, dst_w_bpp_ + .set pixblock_size, pixblock_size_ + .set dst_w_basereg, dst_w_basereg_ + .set dst_r_basereg, dst_r_basereg_ + .set src_basereg, src_basereg_ + .set mask_basereg, mask_basereg_ + + .macro pixld_src x:vararg + pixld x + .endm + .macro fetch_src_pixblock + pixld_src pixblock_size, src_bpp, \ + (src_basereg - pixblock_size * src_bpp / 64), SRC + .endm +/* + * Assign symbolic names to registers + */ + W .req r0 /* width (is updated during processing) */ + H .req r1 /* height (is updated during processing) */ + DST_W .req r2 /* destination buffer pointer for writes */ + DST_STRIDE .req r3 /* destination image stride */ + SRC .req r4 /* source buffer pointer */ + SRC_STRIDE .req r5 /* source image stride */ + DST_R .req r6 /* destination buffer pointer for reads */ + + MASK .req r7 /* mask pointer */ + MASK_STRIDE .req r8 /* mask stride */ + + PF_CTL .req r9 /* combined lines counter and prefetch */ + /* distance increment counter */ + PF_X .req r10 /* pixel index in a scanline for current */ + /* pretetch position */ + PF_SRC .req r11 /* pointer to source scanline start */ + /* for prefetch purposes */ + PF_DST .req r12 /* pointer to destination scanline start */ + /* for prefetch purposes */ + PF_MASK .req r14 /* pointer to mask scanline start */ + /* for prefetch purposes */ +/* + * Check whether we have enough registers for all the local variables. + * If we don't have enough registers, original width and height are + * kept on top of stack (and 'regs_shortage' variable is set to indicate + * this for the rest of code). Even if there are enough registers, the + * allocation scheme may be a bit different depending on whether source + * or mask is not used. + */ +.if (PREFETCH_TYPE_CURRENT < PREFETCH_TYPE_ADVANCED) + ORIG_W .req r10 /* saved original width */ + DUMMY .req r12 /* temporary register */ + .set regs_shortage, 0 +.elseif mask_bpp == 0 + ORIG_W .req r7 /* saved original width */ + DUMMY .req r8 /* temporary register */ + .set regs_shortage, 0 +.elseif src_bpp == 0 + ORIG_W .req r4 /* saved original width */ + DUMMY .req r5 /* temporary register */ + .set regs_shortage, 0 +.else + ORIG_W .req r1 /* saved original width */ + DUMMY .req r1 /* temporary register */ + .set regs_shortage, 1 +.endif + + .set mask_bpp_shift, -1 +.if src_bpp == 32 + .set src_bpp_shift, 2 +.elseif src_bpp == 24 + .set src_bpp_shift, 0 +.elseif src_bpp == 16 + .set src_bpp_shift, 1 +.elseif src_bpp == 8 + .set src_bpp_shift, 0 +.elseif src_bpp == 0 + .set src_bpp_shift, -1 +.else + .error "requested src bpp (src_bpp) is not supported" +.endif +.if mask_bpp == 32 + .set mask_bpp_shift, 2 +.elseif mask_bpp == 24 + .set mask_bpp_shift, 0 +.elseif mask_bpp == 8 + .set mask_bpp_shift, 0 +.elseif mask_bpp == 0 + .set mask_bpp_shift, -1 +.else + .error "requested mask bpp (mask_bpp) is not supported" +.endif +.if dst_w_bpp == 32 + .set dst_bpp_shift, 2 +.elseif dst_w_bpp == 24 + .set dst_bpp_shift, 0 +.elseif dst_w_bpp == 16 + .set dst_bpp_shift, 1 +.elseif dst_w_bpp == 8 + .set dst_bpp_shift, 0 +.else + .error "requested dst bpp (dst_w_bpp) is not supported" +.endif + +.if (((flags) & FLAG_DST_READWRITE) != 0) + .set dst_r_bpp, dst_w_bpp +.else + .set dst_r_bpp, 0 +.endif +.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0) + .set DEINTERLEAVE_32BPP_ENABLED, 1 +.else + .set DEINTERLEAVE_32BPP_ENABLED, 0 +.endif + +.if prefetch_distance < 0 || prefetch_distance > 15 + .error "invalid prefetch distance (prefetch_distance)" +.endif + +.if src_bpp > 0 + ldr SRC, [sp, #40] +.endif +.if mask_bpp > 0 + ldr MASK, [sp, #48] +.endif + PF mov PF_X, #0 +.if src_bpp > 0 + ldr SRC_STRIDE, [sp, #44] +.endif +.if mask_bpp > 0 + ldr MASK_STRIDE, [sp, #52] +.endif + mov DST_R, DST_W + +.if src_bpp == 24 + sub SRC_STRIDE, SRC_STRIDE, W + sub SRC_STRIDE, SRC_STRIDE, W, lsl #1 +.endif +.if mask_bpp == 24 + sub MASK_STRIDE, MASK_STRIDE, W + sub MASK_STRIDE, MASK_STRIDE, W, lsl #1 +.endif +.if dst_w_bpp == 24 + sub DST_STRIDE, DST_STRIDE, W + sub DST_STRIDE, DST_STRIDE, W, lsl #1 +.endif + +/* + * Setup advanced prefetcher initial state + */ + PF mov PF_SRC, SRC + PF mov PF_DST, DST_R + PF mov PF_MASK, MASK + /* PF_CTL = prefetch_distance | ((h - 1) << 4) */ + PF mov PF_CTL, H, lsl #4 + PF add PF_CTL, #(prefetch_distance - 0x10) + + init +.if regs_shortage + push {r0, r1} +.endif + subs H, H, #1 +.if regs_shortage + str H, [sp, #4] /* save updated height to stack */ +.else + mov ORIG_W, W +.endif + blt 9f + cmp W, #(pixblock_size * 2) + blt 8f +/* + * This is the start of the pipelined loop, which if optimized for + * long scanlines + */ +0: + ensure_destination_ptr_alignment process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + + /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */ + pixld_a pixblock_size, dst_r_bpp, \ + (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R + fetch_src_pixblock + pixld pixblock_size, mask_bpp, \ + (mask_basereg - pixblock_size * mask_bpp / 64), MASK + PF add PF_X, PF_X, #pixblock_size + process_pixblock_head + cache_preload 0, pixblock_size + cache_preload_simple + subs W, W, #(pixblock_size * 2) + blt 2f +1: + process_pixblock_tail_head + cache_preload_simple + subs W, W, #pixblock_size + bge 1b +2: + process_pixblock_tail + pixst_a pixblock_size, dst_w_bpp, \ + (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W + + /* Process the remaining trailing pixels in the scanline */ + process_trailing_pixels 1, 1, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + advance_to_next_scanline 0b + +.if regs_shortage + pop {r0, r1} +.endif + cleanup + pop {r4-r12, pc} /* exit */ +/* + * This is the start of the loop, designed to process images with small width + * (less than pixblock_size * 2 pixels). In this case neither pipelining + * nor prefetch are used. + */ +8: + /* Process exactly pixblock_size pixels if needed */ + tst W, #pixblock_size + beq 1f + pixld pixblock_size, dst_r_bpp, \ + (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R + fetch_src_pixblock + pixld pixblock_size, mask_bpp, \ + (mask_basereg - pixblock_size * mask_bpp / 64), MASK + process_pixblock_head + process_pixblock_tail + pixst pixblock_size, dst_w_bpp, \ + (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W +1: + /* Process the remaining trailing pixels in the scanline */ + process_trailing_pixels 0, 0, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + advance_to_next_scanline 8b +9: +.if regs_shortage + pop {r0, r1} +.endif + cleanup + pop {r4-r12, pc} /* exit */ + + .purgem fetch_src_pixblock + .purgem pixld_src + + .unreq SRC + .unreq MASK + .unreq DST_R + .unreq DST_W + .unreq ORIG_W + .unreq W + .unreq H + .unreq SRC_STRIDE + .unreq DST_STRIDE + .unreq MASK_STRIDE + .unreq PF_CTL + .unreq PF_X + .unreq PF_SRC + .unreq PF_DST + .unreq PF_MASK + .unreq DUMMY + .endfunc +.endm + +/* + * A simplified variant of function generation template for a single + * scanline processing (for implementing pixman combine functions) + */ +.macro generate_composite_function_scanline use_nearest_scaling, \ + fname, \ + src_bpp_, \ + mask_bpp_, \ + dst_w_bpp_, \ + flags, \ + pixblock_size_, \ + init, \ + cleanup, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head, \ + dst_w_basereg_ = 28, \ + dst_r_basereg_ = 4, \ + src_basereg_ = 0, \ + mask_basereg_ = 24 + + pixman_asm_function fname + + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE +/* + * Make some macro arguments globally visible and accessible + * from other macros + */ + .set src_bpp, src_bpp_ + .set mask_bpp, mask_bpp_ + .set dst_w_bpp, dst_w_bpp_ + .set pixblock_size, pixblock_size_ + .set dst_w_basereg, dst_w_basereg_ + .set dst_r_basereg, dst_r_basereg_ + .set src_basereg, src_basereg_ + .set mask_basereg, mask_basereg_ + +.if use_nearest_scaling != 0 + /* + * Assign symbolic names to registers for nearest scaling + */ + W .req r0 + DST_W .req r1 + SRC .req r2 + VX .req r3 + UNIT_X .req ip + MASK .req lr + TMP1 .req r4 + TMP2 .req r5 + DST_R .req r6 + SRC_WIDTH_FIXED .req r7 + + .macro pixld_src x:vararg + pixld_s x + .endm + + ldr UNIT_X, [sp] + push {r4-r8, lr} + ldr SRC_WIDTH_FIXED, [sp, #(24 + 4)] + .if mask_bpp != 0 + ldr MASK, [sp, #(24 + 8)] + .endif +.else + /* + * Assign symbolic names to registers + */ + W .req r0 /* width (is updated during processing) */ + DST_W .req r1 /* destination buffer pointer for writes */ + SRC .req r2 /* source buffer pointer */ + DST_R .req ip /* destination buffer pointer for reads */ + MASK .req r3 /* mask pointer */ + + .macro pixld_src x:vararg + pixld x + .endm +.endif + +.if (((flags) & FLAG_DST_READWRITE) != 0) + .set dst_r_bpp, dst_w_bpp +.else + .set dst_r_bpp, 0 +.endif +.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0) + .set DEINTERLEAVE_32BPP_ENABLED, 1 +.else + .set DEINTERLEAVE_32BPP_ENABLED, 0 +.endif + + .macro fetch_src_pixblock + pixld_src pixblock_size, src_bpp, \ + (src_basereg - pixblock_size * src_bpp / 64), SRC + .endm + + init + mov DST_R, DST_W + + cmp W, #pixblock_size + blt 8f + + ensure_destination_ptr_alignment process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + + subs W, W, #pixblock_size + blt 7f + + /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */ + pixld_a pixblock_size, dst_r_bpp, \ + (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R + fetch_src_pixblock + pixld pixblock_size, mask_bpp, \ + (mask_basereg - pixblock_size * mask_bpp / 64), MASK + process_pixblock_head + subs W, W, #pixblock_size + blt 2f +1: + process_pixblock_tail_head + subs W, W, #pixblock_size + bge 1b +2: + process_pixblock_tail + pixst_a pixblock_size, dst_w_bpp, \ + (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W +7: + /* Process the remaining trailing pixels in the scanline (dst aligned) */ + process_trailing_pixels 0, 1, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + + cleanup +.if use_nearest_scaling != 0 + pop {r4-r8, pc} /* exit */ +.else + bx lr /* exit */ +.endif +8: + /* Process the remaining trailing pixels in the scanline (dst unaligned) */ + process_trailing_pixels 0, 0, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + + cleanup + +.if use_nearest_scaling != 0 + pop {r4-r8, pc} /* exit */ + + .unreq DST_R + .unreq SRC + .unreq W + .unreq VX + .unreq UNIT_X + .unreq TMP1 + .unreq TMP2 + .unreq DST_W + .unreq MASK + .unreq SRC_WIDTH_FIXED + +.else + bx lr /* exit */ + + .unreq SRC + .unreq MASK + .unreq DST_R + .unreq DST_W + .unreq W +.endif + + .purgem fetch_src_pixblock + .purgem pixld_src + + .endfunc +.endm + +.macro generate_composite_function_single_scanline x:vararg + generate_composite_function_scanline 0, x +.endm + +.macro generate_composite_function_nearest_scanline x:vararg + generate_composite_function_scanline 1, x +.endm + +/* Default prologue/epilogue, nothing special needs to be done */ + +.macro default_init +.endm + +.macro default_cleanup +.endm + +/* + * Prologue/epilogue variant which additionally saves/restores d8-d15 + * registers (they need to be saved/restored by callee according to ABI). + * This is required if the code needs to use all the NEON registers. + */ + +.macro default_init_need_all_regs + vpush {d8-d15} +.endm + +.macro default_cleanup_need_all_regs + vpop {d8-d15} +.endm + +/******************************************************************************/ diff --git a/src/vector/pixman/vregion.cpp b/src/vector/pixman/vregion.cpp new file mode 100644 index 0000000..5944b63 --- /dev/null +++ b/src/vector/pixman/vregion.cpp @@ -0,0 +1,2086 @@ +/* + * Copyright 1987, 1988, 1989, 1998 The Open Group + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation. + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name of The Open Group shall not be + * used in advertising or otherwise to promote the sale, use or other dealings + * in this Software without prior written authorization from The Open Group. + * + * Copyright 1987, 1988, 1989 by + * Digital Equipment Corporation, Maynard, Massachusetts. + * + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose and without fee is hereby granted, + * provided that the above copyright notice appear in all copies and that + * both that copyright notice and this permission notice appear in + * supporting documentation, and that the name of Digital not be + * used in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. + * + * DIGITAL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING + * ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL + * DIGITAL BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR + * ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Copyright © 1998 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#define critical_if_fail assert +#define PIXMAN_EXPORT static +#define FALSE 0 +#define TRUE 1 +#define FUNC "" +#define MIN(a, b) (a) < (b) ? (a) : (b) +#define MAX(a, b) (a) > (b) ? (a) : (b) + +typedef int pixman_bool_t; + +typedef struct pixman_rectangle pixman_rectangle_t; + +typedef struct pixman_box box_type_t; +typedef struct pixman_region_data region_data_type_t; +typedef struct pixman_region region_type_t; +typedef int64_t overflow_int_t; + +#define PREFIX(x) pixman_region##x + +#define PIXMAN_REGION_MAX INT32_MAX +#define PIXMAN_REGION_MIN INT32_MIN + +typedef struct { + int x, y; +} point_type_t; + +struct pixman_region_data { + long size; + long numRects; + /* box_type_t rects[size]; in memory but not explicitly declared */ +}; + +struct pixman_rectangle { + int32_t x, y; + uint32_t width, height; +}; + +struct pixman_box { + int32_t x1, y1, x2, y2; +}; + +struct pixman_region { + box_type_t extents; + region_data_type_t *data; +}; + +typedef enum { + PIXMAN_REGION_OUT, + PIXMAN_REGION_IN, + PIXMAN_REGION_PART +} pixman_region_overlap_t; + +static void _pixman_log_error(const char *function, const char *message) +{ + fprintf(stderr, + "*** BUG ***\n" + "In %s: %s\n" + "Set a breakpoint on '_pixman_log_error' to debug\n\n", + function, message); +} + +#define PIXREGION_NIL(reg) ((reg)->data && !(reg)->data->numRects) +/* not a region */ +#define PIXREGION_NAR(reg) ((reg)->data == pixman_broken_data) +#define PIXREGION_NUMRECTS(reg) ((reg)->data ? (reg)->data->numRects : 1) +#define PIXREGION_SIZE(reg) ((reg)->data ? (reg)->data->size : 0) +#define PIXREGION_RECTS(reg) \ + ((reg)->data ? (box_type_t *)((reg)->data + 1) : &(reg)->extents) +#define PIXREGION_BOXPTR(reg) ((box_type_t *)((reg)->data + 1)) +#define PIXREGION_BOX(reg, i) (&PIXREGION_BOXPTR(reg)[i]) +#define PIXREGION_TOP(reg) PIXREGION_BOX(reg, (reg)->data->numRects) +#define PIXREGION_END(reg) PIXREGION_BOX(reg, (reg)->data->numRects - 1) + +#define GOOD_RECT(rect) ((rect)->x1 < (rect)->x2 && (rect)->y1 < (rect)->y2) +#define BAD_RECT(rect) ((rect)->x1 > (rect)->x2 || (rect)->y1 > (rect)->y2) + +#ifdef DEBUG + +#define GOOD(reg) \ + do { \ + if (!PREFIX(_selfcheck(reg))) \ + _pixman_log_error(FUNC, "Malformed region " #reg); \ + } while (0) + +#else + +#define GOOD(reg) + +#endif + +static const box_type_t PREFIX(_empty_box_) = {0, 0, 0, 0}; +static const region_data_type_t PREFIX(_empty_data_) = {0, 0}; +#if defined(__llvm__) && !defined(__clang__) +static const volatile region_data_type_t PREFIX(_broken_data_) = {0, 0}; +#else +static const region_data_type_t PREFIX(_broken_data_) = {0, 0}; +#endif + +static box_type_t *pixman_region_empty_box = (box_type_t *)&PREFIX(_empty_box_); +static region_data_type_t *pixman_region_empty_data = + (region_data_type_t *)&PREFIX(_empty_data_); +static region_data_type_t *pixman_broken_data = + (region_data_type_t *)&PREFIX(_broken_data_); + +static pixman_bool_t pixman_break(region_type_t *region); + +/* + * The functions in this file implement the Region abstraction used extensively + * throughout the X11 sample server. A Region is simply a set of disjoint + * (non-overlapping) rectangles, plus an "extent" rectangle which is the + * smallest single rectangle that contains all the non-overlapping rectangles. + * + * A Region is implemented as a "y-x-banded" array of rectangles. This array + * imposes two degrees of order. First, all rectangles are sorted by top side + * y coordinate first (y1), and then by left side x coordinate (x1). + * + * Furthermore, the rectangles are grouped into "bands". Each rectangle in a + * band has the same top y coordinate (y1), and each has the same bottom y + * coordinate (y2). Thus all rectangles in a band differ only in their left + * and right side (x1 and x2). Bands are implicit in the array of rectangles: + * there is no separate list of band start pointers. + * + * The y-x band representation does not minimize rectangles. In particular, + * if a rectangle vertically crosses a band (the rectangle has scanlines in + * the y1 to y2 area spanned by the band), then the rectangle may be broken + * down into two or more smaller rectangles stacked one atop the other. + * + * ----------- ----------- + * | | | | band 0 + * | | -------- ----------- -------- + * | | | | in y-x banded | | | | band 1 + * | | | | form is | | | | + * ----------- | | ----------- -------- + * | | | | band 2 + * -------- -------- + * + * An added constraint on the rectangles is that they must cover as much + * horizontal area as possible: no two rectangles within a band are allowed + * to touch. + * + * Whenever possible, bands will be merged together to cover a greater vertical + * distance (and thus reduce the number of rectangles). Two bands can be merged + * only if the bottom of one touches the top of the other and they have + * rectangles in the same places (of the same width, of course). + * + * Adam de Boor wrote most of the original region code. Joel McCormack + * substantially modified or rewrote most of the core arithmetic routines, and + * added pixman_region_validate in order to support several speed improvements + * to pixman_region_validate_tree. Bob Scheifler changed the representation + * to be more compact when empty or a single rectangle, and did a bunch of + * gratuitous reformatting. Carl Worth did further gratuitous reformatting + * while re-merging the server and client region code into libpixregion. + * Soren Sandmann did even more gratuitous reformatting. + */ + +/* true iff two Boxes overlap */ +#define EXTENTCHECK(r1, r2) \ + (!(((r1)->x2 <= (r2)->x1) || ((r1)->x1 >= (r2)->x2) || \ + ((r1)->y2 <= (r2)->y1) || ((r1)->y1 >= (r2)->y2))) + +/* true iff (x,y) is in Box */ +#define INBOX(r, x, y) \ + (((r)->x2 > x) && ((r)->x1 <= x) && ((r)->y2 > y) && ((r)->y1 <= y)) + +/* true iff Box r1 contains Box r2 */ +#define SUBSUMES(r1, r2) \ + (((r1)->x1 <= (r2)->x1) && ((r1)->x2 >= (r2)->x2) && \ + ((r1)->y1 <= (r2)->y1) && ((r1)->y2 >= (r2)->y2)) + +static size_t PIXREGION_SZOF(size_t n) +{ + size_t size = n * sizeof(box_type_t); + + if (n > UINT32_MAX / sizeof(box_type_t)) return 0; + + if (sizeof(region_data_type_t) > UINT32_MAX - size) return 0; + + return size + sizeof(region_data_type_t); +} + +static region_data_type_t *alloc_data(size_t n) +{ + size_t sz = PIXREGION_SZOF(n); + + if (!sz) return NULL; + + return (region_data_type_t *)malloc(sz); +} + +#define FREE_DATA(reg) \ + if ((reg)->data && (reg)->data->size) free((reg)->data) + +#define RECTALLOC_BAIL(region, n, bail) \ + do { \ + if (!(region)->data || \ + (((region)->data->numRects + (n)) > (region)->data->size)) { \ + if (!pixman_rect_alloc(region, n)) goto bail; \ + } \ + } while (0) + +#define RECTALLOC(region, n) \ + do { \ + if (!(region)->data || \ + (((region)->data->numRects + (n)) > (region)->data->size)) { \ + if (!pixman_rect_alloc(region, n)) { \ + return FALSE; \ + } \ + } \ + } while (0) + +#define ADDRECT(next_rect, nx1, ny1, nx2, ny2) \ + do { \ + next_rect->x1 = nx1; \ + next_rect->y1 = ny1; \ + next_rect->x2 = nx2; \ + next_rect->y2 = ny2; \ + next_rect++; \ + } while (0) + +#define NEWRECT(region, next_rect, nx1, ny1, nx2, ny2) \ + do { \ + if (!(region)->data || \ + ((region)->data->numRects == (region)->data->size)) { \ + if (!pixman_rect_alloc(region, 1)) return FALSE; \ + next_rect = PIXREGION_TOP(region); \ + } \ + ADDRECT(next_rect, nx1, ny1, nx2, ny2); \ + region->data->numRects++; \ + critical_if_fail(region->data->numRects <= region->data->size); \ + } while (0) + +#define DOWNSIZE(reg, numRects) \ + do { \ + if (((numRects) < ((reg)->data->size >> 1)) && \ + ((reg)->data->size > 50)) { \ + region_data_type_t *new_data; \ + size_t data_size = PIXREGION_SZOF(numRects); \ + \ + if (!data_size) { \ + new_data = NULL; \ + } else { \ + new_data = \ + (region_data_type_t *)realloc((reg)->data, data_size); \ + } \ + \ + if (new_data) { \ + new_data->size = (numRects); \ + (reg)->data = new_data; \ + } \ + } \ + } while (0) + +PIXMAN_EXPORT pixman_bool_t PREFIX(_equal)(region_type_t *reg1, + region_type_t *reg2) +{ + int i; + box_type_t *rects1; + box_type_t *rects2; + + if (reg1->extents.x1 != reg2->extents.x1) return FALSE; + + if (reg1->extents.x2 != reg2->extents.x2) return FALSE; + + if (reg1->extents.y1 != reg2->extents.y1) return FALSE; + + if (reg1->extents.y2 != reg2->extents.y2) return FALSE; + + if (PIXREGION_NUMRECTS(reg1) != PIXREGION_NUMRECTS(reg2)) return FALSE; + + rects1 = PIXREGION_RECTS(reg1); + rects2 = PIXREGION_RECTS(reg2); + + for (i = 0; i != PIXREGION_NUMRECTS(reg1); i++) { + if (rects1[i].x1 != rects2[i].x1) return FALSE; + + if (rects1[i].x2 != rects2[i].x2) return FALSE; + + if (rects1[i].y1 != rects2[i].y1) return FALSE; + + if (rects1[i].y2 != rects2[i].y2) return FALSE; + } + + return TRUE; +} + +// returns true if both region intersects +PIXMAN_EXPORT pixman_bool_t PREFIX(_intersects)(region_type_t *reg1, + region_type_t *reg2) +{ + box_type_t *rects1 = PIXREGION_RECTS(reg1); + box_type_t *rects2 = PIXREGION_RECTS(reg2); + for (int i = 0; i != PIXREGION_NUMRECTS(reg1); i++) { + for (int j = 0; j != PIXREGION_NUMRECTS(reg2); j++) { + if (EXTENTCHECK(rects1 + i, rects2 + j)) return TRUE; + } + } + return FALSE; +} + +int PREFIX(_print)(region_type_t *rgn) +{ + int num, size; + int i; + box_type_t *rects; + + num = PIXREGION_NUMRECTS(rgn); + size = PIXREGION_SIZE(rgn); + rects = PIXREGION_RECTS(rgn); + + fprintf(stderr, "num: %d size: %d\n", num, size); + fprintf(stderr, "extents: %d %d %d %d\n", rgn->extents.x1, rgn->extents.y1, + rgn->extents.x2, rgn->extents.y2); + + for (i = 0; i < num; i++) { + fprintf(stderr, "%d %d %d %d \n", rects[i].x1, rects[i].y1, rects[i].x2, + rects[i].y2); + } + + fprintf(stderr, "\n"); + + return (num); +} + +PIXMAN_EXPORT void PREFIX(_init)(region_type_t *region) +{ + region->extents = *pixman_region_empty_box; + region->data = pixman_region_empty_data; +} + +PIXMAN_EXPORT pixman_bool_t PREFIX(_union_rect)(region_type_t *dest, + region_type_t *source, int x, + int y, unsigned int width, + unsigned int height); +PIXMAN_EXPORT void PREFIX(_init_rect)(region_type_t *region, int x, int y, + unsigned int width, unsigned int height) +{ + PREFIX(_init)(region); + PREFIX(_union_rect)(region, region, x, y, width, height); +} + +PIXMAN_EXPORT void PREFIX(_fini)(region_type_t *region) +{ + GOOD(region); + FREE_DATA(region); +} + +PIXMAN_EXPORT int PREFIX(_n_rects)(region_type_t *region) +{ + return PIXREGION_NUMRECTS(region); +} + +static pixman_bool_t pixman_break(region_type_t *region) +{ + FREE_DATA(region); + + region->extents = *pixman_region_empty_box; + region->data = pixman_broken_data; + + return FALSE; +} + +static pixman_bool_t pixman_rect_alloc(region_type_t *region, int n) +{ + region_data_type_t *data; + + if (!region->data) { + n++; + region->data = alloc_data(n); + + if (!region->data) return pixman_break(region); + + region->data->numRects = 1; + *PIXREGION_BOXPTR(region) = region->extents; + } else if (!region->data->size) { + region->data = alloc_data(n); + + if (!region->data) return pixman_break(region); + + region->data->numRects = 0; + } else { + size_t data_size; + + if (n == 1) { + n = region->data->numRects; + if (n > 500) /* XXX pick numbers out of a hat */ + n = 250; + } + + n += region->data->numRects; + data_size = PIXREGION_SZOF(n); + + if (!data_size) { + data = NULL; + } else { + data = + (region_data_type_t *)realloc(region->data, PIXREGION_SZOF(n)); + } + + if (!data) return pixman_break(region); + + region->data = data; + } + + region->data->size = n; + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t PREFIX(_copy)(region_type_t *dst, + region_type_t *src) +{ + GOOD(dst); + GOOD(src); + + if (dst == src) return TRUE; + + dst->extents = src->extents; + + if (!src->data || !src->data->size) { + FREE_DATA(dst); + dst->data = src->data; + return TRUE; + } + + if (!dst->data || (dst->data->size < src->data->numRects)) { + FREE_DATA(dst); + + dst->data = alloc_data(src->data->numRects); + + if (!dst->data) return pixman_break(dst); + + dst->data->size = src->data->numRects; + } + + dst->data->numRects = src->data->numRects; + + memmove((char *)PIXREGION_BOXPTR(dst), (char *)PIXREGION_BOXPTR(src), + dst->data->numRects * sizeof(box_type_t)); + + return TRUE; +} + +/*====================================================================== + * Generic Region Operator + *====================================================================*/ + +/*- + *----------------------------------------------------------------------- + * pixman_coalesce -- + * Attempt to merge the boxes in the current band with those in the + * previous one. We are guaranteed that the current band extends to + * the end of the rects array. Used only by pixman_op. + * + * Results: + * The new index for the previous band. + * + * Side Effects: + * If coalescing takes place: + * - rectangles in the previous band will have their y2 fields + * altered. + * - region->data->numRects will be decreased. + * + *----------------------------------------------------------------------- + */ +static inline int pixman_coalesce( + region_type_t *region, /* Region to coalesce */ + int prev_start, /* Index of start of previous band */ + int cur_start) /* Index of start of current band */ +{ + box_type_t *prev_box; /* Current box in previous band */ + box_type_t *cur_box; /* Current box in current band */ + int numRects; /* Number rectangles in both bands */ + int y2; /* Bottom of current band */ + + /* + * Figure out how many rectangles are in the band. + */ + numRects = cur_start - prev_start; + critical_if_fail(numRects == region->data->numRects - cur_start); + + if (!numRects) return cur_start; + + /* + * The bands may only be coalesced if the bottom of the previous + * matches the top scanline of the current. + */ + prev_box = PIXREGION_BOX(region, prev_start); + cur_box = PIXREGION_BOX(region, cur_start); + if (prev_box->y2 != cur_box->y1) return cur_start; + + /* + * Make sure the bands have boxes in the same places. This + * assumes that boxes have been added in such a way that they + * cover the most area possible. I.e. two boxes in a band must + * have some horizontal space between them. + */ + y2 = cur_box->y2; + + do { + if ((prev_box->x1 != cur_box->x1) || (prev_box->x2 != cur_box->x2)) + return (cur_start); + + prev_box++; + cur_box++; + numRects--; + } while (numRects); + + /* + * The bands may be merged, so set the bottom y of each box + * in the previous band to the bottom y of the current band. + */ + numRects = cur_start - prev_start; + region->data->numRects -= numRects; + + do { + prev_box--; + prev_box->y2 = y2; + numRects--; + } while (numRects); + + return prev_start; +} + +/* Quicky macro to avoid trivial reject procedure calls to pixman_coalesce */ + +#define COALESCE(new_reg, prev_band, cur_band) \ + do { \ + if (cur_band - prev_band == new_reg->data->numRects - cur_band) \ + prev_band = pixman_coalesce(new_reg, prev_band, cur_band); \ + else \ + prev_band = cur_band; \ + } while (0) + +/*- + *----------------------------------------------------------------------- + * pixman_region_append_non_o -- + * Handle a non-overlapping band for the union and subtract operations. + * Just adds the (top/bottom-clipped) rectangles into the region. + * Doesn't have to check for subsumption or anything. + * + * Results: + * None. + * + * Side Effects: + * region->data->numRects is incremented and the rectangles overwritten + * with the rectangles we're passed. + * + *----------------------------------------------------------------------- + */ +static inline pixman_bool_t pixman_region_append_non_o(region_type_t *region, + box_type_t * r, + box_type_t * r_end, + int y1, int y2) +{ + box_type_t *next_rect; + int new_rects; + + new_rects = r_end - r; + + critical_if_fail(y1 < y2); + critical_if_fail(new_rects != 0); + + /* Make sure we have enough space for all rectangles to be added */ + RECTALLOC(region, new_rects); + next_rect = PIXREGION_TOP(region); + region->data->numRects += new_rects; + + do { + critical_if_fail(r->x1 < r->x2); + ADDRECT(next_rect, r->x1, y1, r->x2, y2); + r++; + } while (r != r_end); + + return TRUE; +} + +#define FIND_BAND(r, r_band_end, r_end, ry1) \ + do { \ + ry1 = r->y1; \ + r_band_end = r + 1; \ + while ((r_band_end != r_end) && (r_band_end->y1 == ry1)) { \ + r_band_end++; \ + } \ + } while (0) + +#define APPEND_REGIONS(new_reg, r, r_end) \ + do { \ + int new_rects; \ + if ((new_rects = r_end - r)) { \ + RECTALLOC_BAIL(new_reg, new_rects, bail); \ + memmove((char *)PIXREGION_TOP(new_reg), (char *)r, \ + new_rects * sizeof(box_type_t)); \ + new_reg->data->numRects += new_rects; \ + } \ + } while (0) + +/*- + *----------------------------------------------------------------------- + * pixman_op -- + * Apply an operation to two regions. Called by pixman_region_union, + *pixman_region_inverse, pixman_region_subtract, pixman_region_intersect.... + *Both regions MUST have at least one rectangle, and cannot be the same object. + * + * Results: + * TRUE if successful. + * + * Side Effects: + * The new region is overwritten. + * overlap set to TRUE if overlap_func ever returns TRUE. + * + * Notes: + * The idea behind this function is to view the two regions as sets. + * Together they cover a rectangle of area that this function divides + * into horizontal bands where points are covered only by one region + * or by both. For the first case, the non_overlap_func is called with + * each the band and the band's upper and lower extents. For the + * second, the overlap_func is called to process the entire band. It + * is responsible for clipping the rectangles in the band, though + * this function provides the boundaries. + * At the end of each band, the new region is coalesced, if possible, + * to reduce the number of rectangles in the region. + * + *----------------------------------------------------------------------- + */ + +typedef pixman_bool_t (*overlap_proc_ptr)(region_type_t *region, box_type_t *r1, + box_type_t *r1_end, box_type_t *r2, + box_type_t *r2_end, int y1, int y2); + +static pixman_bool_t pixman_op( + region_type_t * new_reg, /* Place to store result */ + region_type_t * reg1, /* First region in operation */ + region_type_t * reg2, /* 2d region in operation */ + overlap_proc_ptr overlap_func, /* Function to call for over- + * lapping bands */ + int append_non1, /* Append non-overlapping bands + * in region 1 ? + */ + int append_non2 /* Append non-overlapping bands + * in region 2 ? + */ +) +{ + box_type_t * r1; /* Pointer into first region */ + box_type_t * r2; /* Pointer into 2d region */ + box_type_t * r1_end; /* End of 1st region */ + box_type_t * r2_end; /* End of 2d region */ + int ybot; /* Bottom of intersection */ + int ytop; /* Top of intersection */ + region_data_type_t *old_data; /* Old data for new_reg */ + int prev_band; /* Index of start of + * previous band in new_reg */ + int cur_band; /* Index of start of current + * band in new_reg */ + box_type_t *r1_band_end; /* End of current band in r1 */ + box_type_t *r2_band_end; /* End of current band in r2 */ + int top; /* Top of non-overlapping band */ + int bot; /* Bottom of non-overlapping band*/ + int r1y1; /* Temps for r1->y1 and r2->y1 */ + int r2y1; + int new_size; + int numRects; + + /* + * Break any region computed from a broken region + */ + if (PIXREGION_NAR(reg1) || PIXREGION_NAR(reg2)) + return pixman_break(new_reg); + + /* + * Initialization: + * set r1, r2, r1_end and r2_end appropriately, save the rectangles + * of the destination region until the end in case it's one of + * the two source regions, then mark the "new" region empty, allocating + * another array of rectangles for it to use. + */ + + r1 = PIXREGION_RECTS(reg1); + new_size = PIXREGION_NUMRECTS(reg1); + r1_end = r1 + new_size; + + numRects = PIXREGION_NUMRECTS(reg2); + r2 = PIXREGION_RECTS(reg2); + r2_end = r2 + numRects; + + critical_if_fail(r1 != r1_end); + critical_if_fail(r2 != r2_end); + + old_data = (region_data_type_t *)NULL; + + if (((new_reg == reg1) && (new_size > 1)) || + ((new_reg == reg2) && (numRects > 1))) { + old_data = new_reg->data; + new_reg->data = pixman_region_empty_data; + } + + /* guess at new size */ + if (numRects > new_size) new_size = numRects; + + new_size <<= 1; + + if (!new_reg->data) + new_reg->data = pixman_region_empty_data; + else if (new_reg->data->size) + new_reg->data->numRects = 0; + + if (new_size > new_reg->data->size) { + if (!pixman_rect_alloc(new_reg, new_size)) { + free(old_data); + return FALSE; + } + } + + /* + * Initialize ybot. + * In the upcoming loop, ybot and ytop serve different functions depending + * on whether the band being handled is an overlapping or non-overlapping + * band. + * In the case of a non-overlapping band (only one of the regions + * has points in the band), ybot is the bottom of the most recent + * intersection and thus clips the top of the rectangles in that band. + * ytop is the top of the next intersection between the two regions and + * serves to clip the bottom of the rectangles in the current band. + * For an overlapping band (where the two regions intersect), ytop clips + * the top of the rectangles of both regions and ybot clips the bottoms. + */ + + ybot = MIN(r1->y1, r2->y1); + + /* + * prev_band serves to mark the start of the previous band so rectangles + * can be coalesced into larger rectangles. qv. pixman_coalesce, above. + * In the beginning, there is no previous band, so prev_band == cur_band + * (cur_band is set later on, of course, but the first band will always + * start at index 0). prev_band and cur_band must be indices because of + * the possible expansion, and resultant moving, of the new region's + * array of rectangles. + */ + prev_band = 0; + + do { + /* + * This algorithm proceeds one source-band (as opposed to a + * destination band, which is determined by where the two regions + * intersect) at a time. r1_band_end and r2_band_end serve to mark the + * rectangle after the last one in the current band for their + * respective regions. + */ + critical_if_fail(r1 != r1_end); + critical_if_fail(r2 != r2_end); + + FIND_BAND(r1, r1_band_end, r1_end, r1y1); + FIND_BAND(r2, r2_band_end, r2_end, r2y1); + + /* + * First handle the band that doesn't intersect, if any. + * + * Note that attention is restricted to one band in the + * non-intersecting region at once, so if a region has n + * bands between the current position and the next place it overlaps + * the other, this entire loop will be passed through n times. + */ + if (r1y1 < r2y1) { + if (append_non1) { + top = MAX(r1y1, ybot); + bot = MIN(r1->y2, r2y1); + if (top != bot) { + cur_band = new_reg->data->numRects; + if (!pixman_region_append_non_o(new_reg, r1, r1_band_end, + top, bot)) + goto bail; + COALESCE(new_reg, prev_band, cur_band); + } + } + ytop = r2y1; + } else if (r2y1 < r1y1) { + if (append_non2) { + top = MAX(r2y1, ybot); + bot = MIN(r2->y2, r1y1); + + if (top != bot) { + cur_band = new_reg->data->numRects; + + if (!pixman_region_append_non_o(new_reg, r2, r2_band_end, + top, bot)) + goto bail; + + COALESCE(new_reg, prev_band, cur_band); + } + } + ytop = r1y1; + } else { + ytop = r1y1; + } + + /* + * Now see if we've hit an intersecting band. The two bands only + * intersect if ybot > ytop + */ + ybot = MIN(r1->y2, r2->y2); + if (ybot > ytop) { + cur_band = new_reg->data->numRects; + + if (!(*overlap_func)(new_reg, r1, r1_band_end, r2, r2_band_end, + ytop, ybot)) { + goto bail; + } + + COALESCE(new_reg, prev_band, cur_band); + } + + /* + * If we've finished with a band (y2 == ybot) we skip forward + * in the region to the next band. + */ + if (r1->y2 == ybot) r1 = r1_band_end; + + if (r2->y2 == ybot) r2 = r2_band_end; + + } while (r1 != r1_end && r2 != r2_end); + + /* + * Deal with whichever region (if any) still has rectangles left. + * + * We only need to worry about banding and coalescing for the very first + * band left. After that, we can just group all remaining boxes, + * regardless of how many bands, into one final append to the list. + */ + + if ((r1 != r1_end) && append_non1) { + /* Do first non_overlap1Func call, which may be able to coalesce */ + FIND_BAND(r1, r1_band_end, r1_end, r1y1); + + cur_band = new_reg->data->numRects; + + if (!pixman_region_append_non_o(new_reg, r1, r1_band_end, + MAX(r1y1, ybot), r1->y2)) { + goto bail; + } + + COALESCE(new_reg, prev_band, cur_band); + + /* Just append the rest of the boxes */ + APPEND_REGIONS(new_reg, r1_band_end, r1_end); + } else if ((r2 != r2_end) && append_non2) { + /* Do first non_overlap2Func call, which may be able to coalesce */ + FIND_BAND(r2, r2_band_end, r2_end, r2y1); + + cur_band = new_reg->data->numRects; + + if (!pixman_region_append_non_o(new_reg, r2, r2_band_end, + MAX(r2y1, ybot), r2->y2)) { + goto bail; + } + + COALESCE(new_reg, prev_band, cur_band); + + /* Append rest of boxes */ + APPEND_REGIONS(new_reg, r2_band_end, r2_end); + } + + free(old_data); + + if (!(numRects = new_reg->data->numRects)) { + FREE_DATA(new_reg); + new_reg->data = pixman_region_empty_data; + } else if (numRects == 1) { + new_reg->extents = *PIXREGION_BOXPTR(new_reg); + FREE_DATA(new_reg); + new_reg->data = (region_data_type_t *)NULL; + } else { + DOWNSIZE(new_reg, numRects); + } + + return TRUE; + +bail: + free(old_data); + + return pixman_break(new_reg); +} + +/*- + *----------------------------------------------------------------------- + * pixman_set_extents -- + * Reset the extents of a region to what they should be. Called by + * pixman_region_subtract and pixman_region_intersect as they can't + * figure it out along the way or do so easily, as pixman_region_union can. + * + * Results: + * None. + * + * Side Effects: + * The region's 'extents' structure is overwritten. + * + *----------------------------------------------------------------------- + */ +static void pixman_set_extents(region_type_t *region) +{ + box_type_t *box, *box_end; + + if (!region->data) return; + + if (!region->data->size) { + region->extents.x2 = region->extents.x1; + region->extents.y2 = region->extents.y1; + return; + } + + box = PIXREGION_BOXPTR(region); + box_end = PIXREGION_END(region); + + /* + * Since box is the first rectangle in the region, it must have the + * smallest y1 and since box_end is the last rectangle in the region, + * it must have the largest y2, because of banding. Initialize x1 and + * x2 from box and box_end, resp., as good things to initialize them + * to... + */ + region->extents.x1 = box->x1; + region->extents.y1 = box->y1; + region->extents.x2 = box_end->x2; + region->extents.y2 = box_end->y2; + + critical_if_fail(region->extents.y1 < region->extents.y2); + + while (box <= box_end) { + if (box->x1 < region->extents.x1) region->extents.x1 = box->x1; + if (box->x2 > region->extents.x2) region->extents.x2 = box->x2; + box++; + } + + critical_if_fail(region->extents.x1 < region->extents.x2); +} + +/*====================================================================== + * Region Intersection + *====================================================================*/ +/*- + *----------------------------------------------------------------------- + * pixman_region_intersect_o -- + * Handle an overlapping band for pixman_region_intersect. + * + * Results: + * TRUE if successful. + * + * Side Effects: + * Rectangles may be added to the region. + * + *----------------------------------------------------------------------- + */ +/*ARGSUSED*/ +static pixman_bool_t pixman_region_intersect_o( + region_type_t *region, box_type_t *r1, box_type_t *r1_end, box_type_t *r2, + box_type_t *r2_end, int y1, int y2) +{ + int x1; + int x2; + box_type_t *next_rect; + + next_rect = PIXREGION_TOP(region); + + critical_if_fail(y1 < y2); + critical_if_fail(r1 != r1_end && r2 != r2_end); + + do { + x1 = MAX(r1->x1, r2->x1); + x2 = MIN(r1->x2, r2->x2); + + /* + * If there's any overlap between the two rectangles, add that + * overlap to the new region. + */ + if (x1 < x2) NEWRECT(region, next_rect, x1, y1, x2, y2); + + /* + * Advance the pointer(s) with the leftmost right side, since the next + * rectangle on that list may still overlap the other region's + * current rectangle. + */ + if (r1->x2 == x2) { + r1++; + } + if (r2->x2 == x2) { + r2++; + } + } while ((r1 != r1_end) && (r2 != r2_end)); + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t PREFIX(_intersect)(region_type_t *new_reg, + region_type_t *reg1, + region_type_t *reg2) +{ + GOOD(reg1); + GOOD(reg2); + GOOD(new_reg); + + /* check for trivial reject */ + if (PIXREGION_NIL(reg1) || PIXREGION_NIL(reg2) || + !EXTENTCHECK(®1->extents, ®2->extents)) { + /* Covers about 20% of all cases */ + FREE_DATA(new_reg); + new_reg->extents.x2 = new_reg->extents.x1; + new_reg->extents.y2 = new_reg->extents.y1; + if (PIXREGION_NAR(reg1) || PIXREGION_NAR(reg2)) { + new_reg->data = pixman_broken_data; + return FALSE; + } else { + new_reg->data = pixman_region_empty_data; + } + } else if (!reg1->data && !reg2->data) { + /* Covers about 80% of cases that aren't trivially rejected */ + new_reg->extents.x1 = MAX(reg1->extents.x1, reg2->extents.x1); + new_reg->extents.y1 = MAX(reg1->extents.y1, reg2->extents.y1); + new_reg->extents.x2 = MIN(reg1->extents.x2, reg2->extents.x2); + new_reg->extents.y2 = MIN(reg1->extents.y2, reg2->extents.y2); + + FREE_DATA(new_reg); + + new_reg->data = (region_data_type_t *)NULL; + } else if (!reg2->data && SUBSUMES(®2->extents, ®1->extents)) { + return PREFIX(_copy)(new_reg, reg1); + } else if (!reg1->data && SUBSUMES(®1->extents, ®2->extents)) { + return PREFIX(_copy)(new_reg, reg2); + } else if (reg1 == reg2) { + return PREFIX(_copy)(new_reg, reg1); + } else { + /* General purpose intersection */ + + if (!pixman_op(new_reg, reg1, reg2, pixman_region_intersect_o, FALSE, + FALSE)) + return FALSE; + + pixman_set_extents(new_reg); + } + + GOOD(new_reg); + return (TRUE); +} + +#define MERGERECT(r) \ + do { \ + if (r->x1 <= x2) { \ + /* Merge with current rectangle */ \ + if (x2 < r->x2) x2 = r->x2; \ + } else { \ + /* Add current rectangle, start new one */ \ + NEWRECT(region, next_rect, x1, y1, x2, y2); \ + x1 = r->x1; \ + x2 = r->x2; \ + } \ + r++; \ + } while (0) + +/*====================================================================== + * Region Union + *====================================================================*/ + +/*- + *----------------------------------------------------------------------- + * pixman_region_union_o -- + * Handle an overlapping band for the union operation. Picks the + * left-most rectangle each time and merges it into the region. + * + * Results: + * TRUE if successful. + * + * Side Effects: + * region is overwritten. + * overlap is set to TRUE if any boxes overlap. + * + *----------------------------------------------------------------------- + */ +static pixman_bool_t pixman_region_union_o(region_type_t *region, + box_type_t *r1, box_type_t *r1_end, + box_type_t *r2, box_type_t *r2_end, + int y1, int y2) +{ + box_type_t *next_rect; + int x1; /* left and right side of current union */ + int x2; + + critical_if_fail(y1 < y2); + critical_if_fail(r1 != r1_end && r2 != r2_end); + + next_rect = PIXREGION_TOP(region); + + /* Start off current rectangle */ + if (r1->x1 < r2->x1) { + x1 = r1->x1; + x2 = r1->x2; + r1++; + } else { + x1 = r2->x1; + x2 = r2->x2; + r2++; + } + while (r1 != r1_end && r2 != r2_end) { + if (r1->x1 < r2->x1) + MERGERECT(r1); + else + MERGERECT(r2); + } + + /* Finish off whoever (if any) is left */ + if (r1 != r1_end) { + do { + MERGERECT(r1); + } while (r1 != r1_end); + } else if (r2 != r2_end) { + do { + MERGERECT(r2); + } while (r2 != r2_end); + } + + /* Add current rectangle */ + NEWRECT(region, next_rect, x1, y1, x2, y2); + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t PREFIX(_intersect_rect)(region_type_t *dest, + region_type_t *source, + int x, int y, + unsigned int width, + unsigned int height) +{ + region_type_t region; + + region.data = NULL; + region.extents.x1 = x; + region.extents.y1 = y; + region.extents.x2 = x + width; + region.extents.y2 = y + height; + + return PREFIX(_intersect)(dest, source, ®ion); +} + +PIXMAN_EXPORT pixman_bool_t PREFIX(_union)(region_type_t *new_reg, + region_type_t *reg1, + region_type_t *reg2); + +/* Convenience function for performing union of region with a + * single rectangle + */ +PIXMAN_EXPORT pixman_bool_t PREFIX(_union_rect)(region_type_t *dest, + region_type_t *source, int x, + int y, unsigned int width, + unsigned int height) +{ + region_type_t region; + + region.extents.x1 = x; + region.extents.y1 = y; + region.extents.x2 = x + width; + region.extents.y2 = y + height; + + if (!GOOD_RECT(®ion.extents)) { + if (BAD_RECT(®ion.extents)) + _pixman_log_error(FUNC, "Invalid rectangle passed"); + return PREFIX(_copy)(dest, source); + } + + region.data = NULL; + + return PREFIX(_union)(dest, source, ®ion); +} + +PIXMAN_EXPORT pixman_bool_t PREFIX(_union)(region_type_t *new_reg, + region_type_t *reg1, + region_type_t *reg2) +{ + /* Return TRUE if some overlap + * between reg1, reg2 + */ + GOOD(reg1); + GOOD(reg2); + GOOD(new_reg); + + /* checks all the simple cases */ + + /* + * Region 1 and 2 are the same + */ + if (reg1 == reg2) return PREFIX(_copy)(new_reg, reg1); + + /* + * Region 1 is empty + */ + if (PIXREGION_NIL(reg1)) { + if (PIXREGION_NAR(reg1)) return pixman_break(new_reg); + + if (new_reg != reg2) return PREFIX(_copy)(new_reg, reg2); + + return TRUE; + } + + /* + * Region 2 is empty + */ + if (PIXREGION_NIL(reg2)) { + if (PIXREGION_NAR(reg2)) return pixman_break(new_reg); + + if (new_reg != reg1) return PREFIX(_copy)(new_reg, reg1); + + return TRUE; + } + + /* + * Region 1 completely subsumes region 2 + */ + if (!reg1->data && SUBSUMES(®1->extents, ®2->extents)) { + if (new_reg != reg1) return PREFIX(_copy)(new_reg, reg1); + + return TRUE; + } + + /* + * Region 2 completely subsumes region 1 + */ + if (!reg2->data && SUBSUMES(®2->extents, ®1->extents)) { + if (new_reg != reg2) return PREFIX(_copy)(new_reg, reg2); + + return TRUE; + } + + if (!pixman_op(new_reg, reg1, reg2, pixman_region_union_o, TRUE, TRUE)) + return FALSE; + + new_reg->extents.x1 = MIN(reg1->extents.x1, reg2->extents.x1); + new_reg->extents.y1 = MIN(reg1->extents.y1, reg2->extents.y1); + new_reg->extents.x2 = MAX(reg1->extents.x2, reg2->extents.x2); + new_reg->extents.y2 = MAX(reg1->extents.y2, reg2->extents.y2); + + GOOD(new_reg); + + return TRUE; +} + +/*====================================================================== + * Region Subtraction + *====================================================================*/ + +/*- + *----------------------------------------------------------------------- + * pixman_region_subtract_o -- + * Overlapping band subtraction. x1 is the left-most point not yet + * checked. + * + * Results: + * TRUE if successful. + * + * Side Effects: + * region may have rectangles added to it. + * + *----------------------------------------------------------------------- + */ +/*ARGSUSED*/ +static pixman_bool_t pixman_region_subtract_o( + region_type_t *region, box_type_t *r1, box_type_t *r1_end, box_type_t *r2, + box_type_t *r2_end, int y1, int y2) +{ + box_type_t *next_rect; + int x1; + + x1 = r1->x1; + + critical_if_fail(y1 < y2); + critical_if_fail(r1 != r1_end && r2 != r2_end); + + next_rect = PIXREGION_TOP(region); + + do { + if (r2->x2 <= x1) { + /* + * Subtrahend entirely to left of minuend: go to next subtrahend. + */ + r2++; + } else if (r2->x1 <= x1) { + /* + * Subtrahend precedes minuend: nuke left edge of minuend. + */ + x1 = r2->x2; + if (x1 >= r1->x2) { + /* + * Minuend completely covered: advance to next minuend and + * reset left fence to edge of new minuend. + */ + r1++; + if (r1 != r1_end) x1 = r1->x1; + } else { + /* + * Subtrahend now used up since it doesn't extend beyond + * minuend + */ + r2++; + } + } else if (r2->x1 < r1->x2) { + /* + * Left part of subtrahend covers part of minuend: add uncovered + * part of minuend to region and skip to next subtrahend. + */ + critical_if_fail(x1 < r2->x1); + NEWRECT(region, next_rect, x1, y1, r2->x1, y2); + + x1 = r2->x2; + if (x1 >= r1->x2) { + /* + * Minuend used up: advance to new... + */ + r1++; + if (r1 != r1_end) x1 = r1->x1; + } else { + /* + * Subtrahend used up + */ + r2++; + } + } else { + /* + * Minuend used up: add any remaining piece before advancing. + */ + if (r1->x2 > x1) NEWRECT(region, next_rect, x1, y1, r1->x2, y2); + + r1++; + + if (r1 != r1_end) x1 = r1->x1; + } + } while ((r1 != r1_end) && (r2 != r2_end)); + + /* + * Add remaining minuend rectangles to region. + */ + while (r1 != r1_end) { + critical_if_fail(x1 < r1->x2); + + NEWRECT(region, next_rect, x1, y1, r1->x2, y2); + + r1++; + if (r1 != r1_end) x1 = r1->x1; + } + return TRUE; +} + +/*- + *----------------------------------------------------------------------- + * pixman_region_subtract -- + * Subtract reg_s from reg_m and leave the result in reg_d. + * S stands for subtrahend, M for minuend and D for difference. + * + * Results: + * TRUE if successful. + * + * Side Effects: + * reg_d is overwritten. + * + *----------------------------------------------------------------------- + */ +PIXMAN_EXPORT pixman_bool_t PREFIX(_subtract)(region_type_t *reg_d, + region_type_t *reg_m, + region_type_t *reg_s) +{ + GOOD(reg_m); + GOOD(reg_s); + GOOD(reg_d); + + /* check for trivial rejects */ + if (PIXREGION_NIL(reg_m) || PIXREGION_NIL(reg_s) || + !EXTENTCHECK(®_m->extents, ®_s->extents)) { + if (PIXREGION_NAR(reg_s)) return pixman_break(reg_d); + + return PREFIX(_copy)(reg_d, reg_m); + } else if (reg_m == reg_s) { + FREE_DATA(reg_d); + reg_d->extents.x2 = reg_d->extents.x1; + reg_d->extents.y2 = reg_d->extents.y1; + reg_d->data = pixman_region_empty_data; + + return TRUE; + } + + /* Add those rectangles in region 1 that aren't in region 2, + do yucky subtraction for overlaps, and + just throw away rectangles in region 2 that aren't in region 1 */ + if (!pixman_op(reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE)) + return FALSE; + + /* + * Can't alter reg_d's extents before we call pixman_op because + * it might be one of the source regions and pixman_op depends + * on the extents of those regions being unaltered. Besides, this + * way there's no checking against rectangles that will be nuked + * due to coalescing, so we have to examine fewer rectangles. + */ + pixman_set_extents(reg_d); + GOOD(reg_d); + return TRUE; +} +#if 0 +/*====================================================================== + * Region Inversion + *====================================================================*/ + +/*- + *----------------------------------------------------------------------- + * pixman_region_inverse -- + * Take a region and a box and return a region that is everything + * in the box but not in the region. The careful reader will note + * that this is the same as subtracting the region from the box... + * + * Results: + * TRUE. + * + * Side Effects: + * new_reg is overwritten. + * + *----------------------------------------------------------------------- + */ +PIXMAN_EXPORT pixman_bool_t +PREFIX (_inverse) (region_type_t *new_reg, /* Destination region */ + region_type_t *reg1, /* Region to invert */ + box_type_t * inv_rect) /* Bounding box for inversion */ +{ + region_type_t inv_reg; /* Quick and dirty region made from the + * bounding box */ + GOOD (reg1); + GOOD (new_reg); + + /* check for trivial rejects */ + if (PIXREGION_NIL (reg1) || !EXTENTCHECK (inv_rect, ®1->extents)) + { + if (PIXREGION_NAR (reg1)) + return pixman_break (new_reg); + + new_reg->extents = *inv_rect; + FREE_DATA (new_reg); + new_reg->data = (region_data_type_t *)NULL; + + return TRUE; + } + + /* Add those rectangles in region 1 that aren't in region 2, + * do yucky subtraction for overlaps, and + * just throw away rectangles in region 2 that aren't in region 1 + */ + inv_reg.extents = *inv_rect; + inv_reg.data = (region_data_type_t *)NULL; + if (!pixman_op (new_reg, &inv_reg, reg1, pixman_region_subtract_o, TRUE, FALSE)) + return FALSE; + + /* + * Can't alter new_reg's extents before we call pixman_op because + * it might be one of the source regions and pixman_op depends + * on the extents of those regions being unaltered. Besides, this + * way there's no checking against rectangles that will be nuked + * due to coalescing, so we have to examine fewer rectangles. + */ + pixman_set_extents (new_reg); + GOOD (new_reg); + return TRUE; +} +#endif +/* In time O(log n), locate the first box whose y2 is greater than y. + * Return @end if no such box exists. + */ +static box_type_t *find_box_for_y(box_type_t *begin, box_type_t *end, int y) +{ + box_type_t *mid; + + if (end == begin) return end; + + if (end - begin == 1) { + if (begin->y2 > y) + return begin; + else + return end; + } + + mid = begin + (end - begin) / 2; + if (mid->y2 > y) { + /* If no box is found in [begin, mid], the function + * will return @mid, which is then known to be the + * correct answer. + */ + return find_box_for_y(begin, mid, y); + } else { + return find_box_for_y(mid, end, y); + } +} + +/* + * rect_in(region, rect) + * This routine takes a pointer to a region and a pointer to a box + * and determines if the box is outside/inside/partly inside the region. + * + * The idea is to travel through the list of rectangles trying to cover the + * passed box with them. Anytime a piece of the rectangle isn't covered + * by a band of rectangles, part_out is set TRUE. Any time a rectangle in + * the region covers part of the box, part_in is set TRUE. The process ends + * when either the box has been completely covered (we reached a band that + * doesn't overlap the box, part_in is TRUE and part_out is false), the + * box has been partially covered (part_in == part_out == TRUE -- because of + * the banding, the first time this is true we know the box is only + * partially in the region) or is outside the region (we reached a band + * that doesn't overlap the box at all and part_in is false) + */ +PIXMAN_EXPORT pixman_region_overlap_t + PREFIX(_contains_rectangle)(region_type_t *region, box_type_t *prect) +{ + box_type_t *pbox; + box_type_t *pbox_end; + int part_in, part_out; + int numRects; + int x, y; + + GOOD(region); + + numRects = PIXREGION_NUMRECTS(region); + + /* useful optimization */ + if (!numRects || !EXTENTCHECK(®ion->extents, prect)) + return (PIXMAN_REGION_OUT); + + if (numRects == 1) { + /* We know that it must be PIXMAN_REGION_IN or PIXMAN_REGION_PART */ + if (SUBSUMES(®ion->extents, prect)) + return (PIXMAN_REGION_IN); + else + return (PIXMAN_REGION_PART); + } + + part_out = FALSE; + part_in = FALSE; + + /* (x,y) starts at upper left of rect, moving to the right and down */ + x = prect->x1; + y = prect->y1; + + /* can stop when both part_out and part_in are TRUE, or we reach prect->y2 + */ + for (pbox = PIXREGION_BOXPTR(region), pbox_end = pbox + numRects; + pbox != pbox_end; pbox++) { + /* getting up to speed or skipping remainder of band */ + if (pbox->y2 <= y) { + if ((pbox = find_box_for_y(pbox, pbox_end, y)) == pbox_end) break; + } + + if (pbox->y1 > y) { + part_out = TRUE; /* missed part of rectangle above */ + if (part_in || (pbox->y1 >= prect->y2)) break; + y = pbox->y1; /* x guaranteed to be == prect->x1 */ + } + + if (pbox->x2 <= x) continue; /* not far enough over yet */ + + if (pbox->x1 > x) { + part_out = TRUE; /* missed part of rectangle to left */ + if (part_in) break; + } + + if (pbox->x1 < prect->x2) { + part_in = TRUE; /* definitely overlap */ + if (part_out) break; + } + + if (pbox->x2 >= prect->x2) { + y = pbox->y2; /* finished with this band */ + if (y >= prect->y2) break; + x = prect->x1; /* reset x out to left again */ + } else { + /* + * Because boxes in a band are maximal width, if the first box + * to overlap the rectangle doesn't completely cover it in that + * band, the rectangle must be partially out, since some of it + * will be uncovered in that band. part_in will have been set true + * by now... + */ + part_out = TRUE; + break; + } + } + + if (part_in) { + if (y < prect->y2) + return PIXMAN_REGION_PART; + else + return PIXMAN_REGION_IN; + } else { + return PIXMAN_REGION_OUT; + } +} + +/* PREFIX(_translate) (region, x, y) + * translates in place + */ + +PIXMAN_EXPORT void PREFIX(_translate)(region_type_t *region, int x, int y) +{ + overflow_int_t x1, x2, y1, y2; + int nbox; + box_type_t * pbox; + + GOOD(region); + region->extents.x1 = x1 = region->extents.x1 + x; + region->extents.y1 = y1 = region->extents.y1 + y; + region->extents.x2 = x2 = region->extents.x2 + x; + region->extents.y2 = y2 = region->extents.y2 + y; + + if (((x1 - PIXMAN_REGION_MIN) | (y1 - PIXMAN_REGION_MIN) | + (PIXMAN_REGION_MAX - x2) | (PIXMAN_REGION_MAX - y2)) >= 0) { + if (region->data && (nbox = region->data->numRects)) { + for (pbox = PIXREGION_BOXPTR(region); nbox--; pbox++) { + pbox->x1 += x; + pbox->y1 += y; + pbox->x2 += x; + pbox->y2 += y; + } + } + return; + } + + if (((x2 - PIXMAN_REGION_MIN) | (y2 - PIXMAN_REGION_MIN) | + (PIXMAN_REGION_MAX - x1) | (PIXMAN_REGION_MAX - y1)) <= 0) { + region->extents.x2 = region->extents.x1; + region->extents.y2 = region->extents.y1; + FREE_DATA(region); + region->data = pixman_region_empty_data; + return; + } + + if (x1 < PIXMAN_REGION_MIN) + region->extents.x1 = PIXMAN_REGION_MIN; + else if (x2 > PIXMAN_REGION_MAX) + region->extents.x2 = PIXMAN_REGION_MAX; + + if (y1 < PIXMAN_REGION_MIN) + region->extents.y1 = PIXMAN_REGION_MIN; + else if (y2 > PIXMAN_REGION_MAX) + region->extents.y2 = PIXMAN_REGION_MAX; + + if (region->data && (nbox = region->data->numRects)) { + box_type_t *pbox_out; + + for (pbox_out = pbox = PIXREGION_BOXPTR(region); nbox--; pbox++) { + pbox_out->x1 = x1 = pbox->x1 + x; + pbox_out->y1 = y1 = pbox->y1 + y; + pbox_out->x2 = x2 = pbox->x2 + x; + pbox_out->y2 = y2 = pbox->y2 + y; + + if (((x2 - PIXMAN_REGION_MIN) | (y2 - PIXMAN_REGION_MIN) | + (PIXMAN_REGION_MAX - x1) | (PIXMAN_REGION_MAX - y1)) <= 0) { + region->data->numRects--; + continue; + } + + if (x1 < PIXMAN_REGION_MIN) + pbox_out->x1 = PIXMAN_REGION_MIN; + else if (x2 > PIXMAN_REGION_MAX) + pbox_out->x2 = PIXMAN_REGION_MAX; + + if (y1 < PIXMAN_REGION_MIN) + pbox_out->y1 = PIXMAN_REGION_MIN; + else if (y2 > PIXMAN_REGION_MAX) + pbox_out->y2 = PIXMAN_REGION_MAX; + + pbox_out++; + } + + if (pbox_out != pbox) { + if (region->data->numRects == 1) { + region->extents = *PIXREGION_BOXPTR(region); + FREE_DATA(region); + region->data = (region_data_type_t *)NULL; + } else { + pixman_set_extents(region); + } + } + } + + GOOD(region); +} + +PIXMAN_EXPORT int PREFIX(_not_empty)(region_type_t *region) +{ + GOOD(region); + + return (!PIXREGION_NIL(region)); +} + +PIXMAN_EXPORT box_type_t *PREFIX(_extents)(region_type_t *region) +{ + GOOD(region); + + return (®ion->extents); +} + +typedef region_type_t VRegionPrivate; + +#include "vregion.h" + +V_BEGIN_NAMESPACE + +static VRegionPrivate regionPrivate = {{0, 0, 0, 0}, NULL}; + +struct VRegionData { + VRegionData() : ref(-1), rgn(®ionPrivate) {} + RefCount ref; + VRegionPrivate *rgn; +}; + +const VRegionData shared_empty; + +inline VRect box_to_rect(box_type_t *box) +{ + return {box->x1, box->y1, box->x2 - box->x1, box->y2 - box->y1}; +} + +void VRegion::cleanUp(VRegionData *x) +{ + if (x->rgn) { + PREFIX(_fini)(x->rgn); + delete x->rgn; + } + delete x; +} + +void VRegion::detach() +{ + if (d->ref.isShared()) *this = copy(); +} + +VRegion VRegion::copy() const +{ + VRegion r; + + r.d = new VRegionData; + r.d->rgn = new VRegionPrivate; + r.d->ref.setOwned(); + PREFIX(_init)(r.d->rgn); + if (d != &shared_empty) PREFIX(_copy)(r.d->rgn, d->rgn); + return r; +} + +VRegion::VRegion() : d(const_cast(&shared_empty)) {} + +VRegion::VRegion(int x, int y, int w, int h) +{ + VRegion tmp(VRect(x, y, w, h)); + tmp.d->ref.ref(); + d = tmp.d; +} + +VRegion::VRegion(const VRect &r) +{ + if (r.empty()) { + d = const_cast(&shared_empty); + } else { + d = new VRegionData; + d->rgn = new VRegionPrivate; + d->ref.setOwned(); + PREFIX(_init_rect)(d->rgn, r.left(), r.top(), r.width(), r.height()); + } +} + +VRegion::VRegion(const VRegion &r) +{ + d = r.d; + d->ref.ref(); +} + +VRegion::VRegion(VRegion &&other) : d(other.d) +{ + other.d = const_cast(&shared_empty); +} + +VRegion &VRegion::operator=(const VRegion &r) +{ + r.d->ref.ref(); + if (!d->ref.deref()) cleanUp(d); + + d = r.d; + return *this; +} + +inline VRegion &VRegion::operator=(VRegion &&other) +{ + if (!d->ref.deref()) cleanUp(d); + d = other.d; + other.d = const_cast(&shared_empty); + return *this; +} + +VRegion::~VRegion() +{ + if (!d->ref.deref()) cleanUp(d); +} + +bool VRegion::empty() const +{ + return d == &shared_empty || !PREFIX(_not_empty)(d->rgn); +} + +void VRegion::translate(const VPoint &p) +{ + if (p == VPoint() || empty()) return; + + detach(); + PREFIX(_translate)(d->rgn, p.x(), p.y()); +} + +VRegion VRegion::translated(const VPoint &p) const +{ + VRegion ret(*this); + ret.translate(p); + return ret; +} + +/* + * Returns \c true if this region is guaranteed to be fully contained in r. + */ +bool VRegion::within(const VRect &r1) const +{ + box_type_t *r2 = PREFIX(_extents)(d->rgn); + + return r2->x1 >= r1.left() && r2->x2 <= r1.right() && r2->y1 >= r1.top() && + r2->y2 <= r1.bottom(); +} + +bool VRegion::contains(const VRect &r) const +{ + box_type_t box = {r.left(), r.top(), r.right(), r.bottom()}; + + pixman_region_overlap_t res = PREFIX(_contains_rectangle)(d->rgn, &box); + if (res == PIXMAN_REGION_IN) return true; + return false; +} + +VRegion VRegion::united(const VRect &r) const +{ + if (empty()) return r; + + if (contains(r)) { + return *this; + } else if (within(r)) { + return r; + } else { + VRegion result; + result.detach(); + PREFIX(_union_rect) + (result.d->rgn, d->rgn, r.left(), r.top(), r.width(), r.height()); + return result; + } +} + +VRegion VRegion::united(const VRegion &r) const +{ + if (empty()) return r; + if (r.empty()) return *this; + if (d == r.d || PREFIX(_equal)(d->rgn, r.d->rgn)) return *this; + VRegion result; + result.detach(); + PREFIX(_union)(result.d->rgn, d->rgn, r.d->rgn); + return result; +} + +VRegion VRegion::intersected(const VRect &r) const +{ + if (empty() || r.empty()) return VRegion(); + + /* this is fully contained in r */ + if (within(r)) return *this; + + /* r is fully contained in this */ + if (contains(r)) return r; + + VRegion result; + result.detach(); + PREFIX(_intersect_rect) + (result.d->rgn, d->rgn, r.left(), r.top(), r.width(), r.height()); + return result; +} + +VRegion VRegion::intersected(const VRegion &r) const +{ + if (empty() || r.empty()) return VRegion(); + + VRegion result; + result.detach(); + PREFIX(_intersect)(result.d->rgn, d->rgn, r.d->rgn); + + return result; +} + +VRegion VRegion::subtracted(const VRegion &r) const +{ + if (empty() || r.empty()) return *this; + if (d == r.d || PREFIX(_equal)(d->rgn, r.d->rgn)) return VRegion(); + + VRegion result; + result.detach(); + PREFIX(_subtract)(result.d->rgn, d->rgn, r.d->rgn); + return result; +} + +int VRegion::rectCount() const +{ + if (empty()) return 0; + return PREFIX(_n_rects)(d->rgn); +} + +VRect VRegion::rectAt(int index) const +{ + VRegionPrivate *reg = d->rgn; + if (!reg) return {}; + + box_type_t *box = PIXREGION_RECTS(reg) + index; + + return box_to_rect(box); +} + +VRegion VRegion::operator+(const VRect &r) const +{ + return united(r); +} + +VRegion VRegion::operator+(const VRegion &r) const +{ + return united(r); +} + +VRegion VRegion::operator-(const VRegion &r) const +{ + return subtracted(r); +} + +VRegion &VRegion::operator+=(const VRect &r) +{ + if (empty()) return *this = r; + if (r.empty()) return *this; + + if (contains(r)) { + return *this; + } else if (within(r)) { + return *this = r; + } else { + detach(); + PREFIX(_union_rect) + (d->rgn, d->rgn, r.left(), r.top(), r.width(), r.height()); + return *this; + } +} + +VRegion &VRegion::operator+=(const VRegion &r) +{ + if (empty()) return *this = r; + if (r.empty()) return *this; + if (d == r.d || PREFIX(_equal)(d->rgn, r.d->rgn)) return *this; + + detach(); + PREFIX(_union)(d->rgn, d->rgn, r.d->rgn); + return *this; +} + +VRegion &VRegion::operator-=(const VRegion &r) +{ + return *this = *this - r; +} + +bool VRegion::operator==(const VRegion &r) const +{ + if (empty()) return r.empty(); + if (r.empty()) return empty(); + + if (d == r.d) + return true; + else + return PREFIX(_equal)(d->rgn, r.d->rgn); +} + +VRect VRegion::boundingRect() const noexcept +{ + if (empty()) return {}; + return box_to_rect(&d->rgn->extents); +} + +inline bool rect_intersects(const VRect &r1, const VRect &r2) +{ + return (r1.right() >= r2.left() && r1.left() <= r2.right() && + r1.bottom() >= r2.top() && r1.top() <= r2.bottom()); +} + +bool VRegion::intersects(const VRegion &r) const +{ + if (empty() || r.empty()) return false; + + return PREFIX(_intersects)(d->rgn, r.d->rgn); +} + +VDebug &operator<<(VDebug &os, const VRegion &o) +{ + os << "[REGION: " + << "[bbox = " << o.boundingRect() << "]"; + os << "[rectCount = " << o.rectCount() << "]"; + os << "[rects = "; + for (int i = 0; i < o.rectCount(); i++) { + os << o.rectAt(i); + } + os << "]" + << "]"; + return os; +} + +V_END_NAMESPACE diff --git a/src/vector/pixman/vregion.h b/src/vector/pixman/vregion.h new file mode 100644 index 0000000..508b6c3 --- /dev/null +++ b/src/vector/pixman/vregion.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All rights reserved. + * + * Licensed under the Flora License, Version 1.1 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://floralicense.org/license/ + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef VREGION_H +#define VREGION_H +#include +#include +#include +#include +#include "vdebug.h" + +V_BEGIN_NAMESPACE + +struct VRegionData; + +class VRegion { +public: + VRegion(); + VRegion(int x, int y, int w, int h); + VRegion(const VRect &r); + VRegion(const VRegion ®ion); + VRegion(VRegion &&other); + ~VRegion(); + VRegion & operator=(const VRegion &); + VRegion & operator=(VRegion &&); + bool empty() const; + bool contains(const VRect &r) const; + VRegion united(const VRect &r) const; + VRegion united(const VRegion &r) const; + VRegion intersected(const VRect &r) const; + VRegion intersected(const VRegion &r) const; + VRegion subtracted(const VRegion &r) const; + void translate(const VPoint &p); + inline void translate(int dx, int dy); + VRegion translated(const VPoint &p) const; + inline VRegion translated(int dx, int dy) const; + int rectCount() const; + VRect rectAt(int index) const; + + VRegion operator+(const VRect &r) const; + VRegion operator+(const VRegion &r) const; + VRegion operator-(const VRegion &r) const; + VRegion &operator+=(const VRect &r); + VRegion &operator+=(const VRegion &r); + VRegion &operator-=(const VRegion &r); + + VRect boundingRect() const noexcept; + bool intersects(const VRegion ®ion) const; + + bool operator==(const VRegion &r) const; + inline bool operator!=(const VRegion &r) const { return !(operator==(r)); } + friend VDebug &operator<<(VDebug &os, const VRegion &o); + +private: + bool within(const VRect &r) const; + VRegion copy() const; + void detach(); + void cleanUp(VRegionData *x); + + struct VRegionData *d; +}; +inline void VRegion::translate(int dx, int dy) +{ + translate(VPoint(dx, dy)); +} + +inline VRegion VRegion::translated(int dx, int dy) const +{ + return translated(VPoint(dx, dy)); +} + +V_END_NAMESPACE + +#endif // VREGION_H diff --git a/src/vector/vregion.cpp b/src/vector/vregion.cpp deleted file mode 100644 index 5944b63..0000000 --- a/src/vector/vregion.cpp +++ /dev/null @@ -1,2086 +0,0 @@ -/* - * Copyright 1987, 1988, 1989, 1998 The Open Group - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation. - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Except as contained in this notice, the name of The Open Group shall not be - * used in advertising or otherwise to promote the sale, use or other dealings - * in this Software without prior written authorization from The Open Group. - * - * Copyright 1987, 1988, 1989 by - * Digital Equipment Corporation, Maynard, Massachusetts. - * - * All Rights Reserved - * - * Permission to use, copy, modify, and distribute this software and its - * documentation for any purpose and without fee is hereby granted, - * provided that the above copyright notice appear in all copies and that - * both that copyright notice and this permission notice appear in - * supporting documentation, and that the name of Digital not be - * used in advertising or publicity pertaining to distribution of the - * software without specific, written prior permission. - * - * DIGITAL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING - * ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL - * DIGITAL BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR - * ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, - * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, - * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Copyright © 1998 Keith Packard - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#define critical_if_fail assert -#define PIXMAN_EXPORT static -#define FALSE 0 -#define TRUE 1 -#define FUNC "" -#define MIN(a, b) (a) < (b) ? (a) : (b) -#define MAX(a, b) (a) > (b) ? (a) : (b) - -typedef int pixman_bool_t; - -typedef struct pixman_rectangle pixman_rectangle_t; - -typedef struct pixman_box box_type_t; -typedef struct pixman_region_data region_data_type_t; -typedef struct pixman_region region_type_t; -typedef int64_t overflow_int_t; - -#define PREFIX(x) pixman_region##x - -#define PIXMAN_REGION_MAX INT32_MAX -#define PIXMAN_REGION_MIN INT32_MIN - -typedef struct { - int x, y; -} point_type_t; - -struct pixman_region_data { - long size; - long numRects; - /* box_type_t rects[size]; in memory but not explicitly declared */ -}; - -struct pixman_rectangle { - int32_t x, y; - uint32_t width, height; -}; - -struct pixman_box { - int32_t x1, y1, x2, y2; -}; - -struct pixman_region { - box_type_t extents; - region_data_type_t *data; -}; - -typedef enum { - PIXMAN_REGION_OUT, - PIXMAN_REGION_IN, - PIXMAN_REGION_PART -} pixman_region_overlap_t; - -static void _pixman_log_error(const char *function, const char *message) -{ - fprintf(stderr, - "*** BUG ***\n" - "In %s: %s\n" - "Set a breakpoint on '_pixman_log_error' to debug\n\n", - function, message); -} - -#define PIXREGION_NIL(reg) ((reg)->data && !(reg)->data->numRects) -/* not a region */ -#define PIXREGION_NAR(reg) ((reg)->data == pixman_broken_data) -#define PIXREGION_NUMRECTS(reg) ((reg)->data ? (reg)->data->numRects : 1) -#define PIXREGION_SIZE(reg) ((reg)->data ? (reg)->data->size : 0) -#define PIXREGION_RECTS(reg) \ - ((reg)->data ? (box_type_t *)((reg)->data + 1) : &(reg)->extents) -#define PIXREGION_BOXPTR(reg) ((box_type_t *)((reg)->data + 1)) -#define PIXREGION_BOX(reg, i) (&PIXREGION_BOXPTR(reg)[i]) -#define PIXREGION_TOP(reg) PIXREGION_BOX(reg, (reg)->data->numRects) -#define PIXREGION_END(reg) PIXREGION_BOX(reg, (reg)->data->numRects - 1) - -#define GOOD_RECT(rect) ((rect)->x1 < (rect)->x2 && (rect)->y1 < (rect)->y2) -#define BAD_RECT(rect) ((rect)->x1 > (rect)->x2 || (rect)->y1 > (rect)->y2) - -#ifdef DEBUG - -#define GOOD(reg) \ - do { \ - if (!PREFIX(_selfcheck(reg))) \ - _pixman_log_error(FUNC, "Malformed region " #reg); \ - } while (0) - -#else - -#define GOOD(reg) - -#endif - -static const box_type_t PREFIX(_empty_box_) = {0, 0, 0, 0}; -static const region_data_type_t PREFIX(_empty_data_) = {0, 0}; -#if defined(__llvm__) && !defined(__clang__) -static const volatile region_data_type_t PREFIX(_broken_data_) = {0, 0}; -#else -static const region_data_type_t PREFIX(_broken_data_) = {0, 0}; -#endif - -static box_type_t *pixman_region_empty_box = (box_type_t *)&PREFIX(_empty_box_); -static region_data_type_t *pixman_region_empty_data = - (region_data_type_t *)&PREFIX(_empty_data_); -static region_data_type_t *pixman_broken_data = - (region_data_type_t *)&PREFIX(_broken_data_); - -static pixman_bool_t pixman_break(region_type_t *region); - -/* - * The functions in this file implement the Region abstraction used extensively - * throughout the X11 sample server. A Region is simply a set of disjoint - * (non-overlapping) rectangles, plus an "extent" rectangle which is the - * smallest single rectangle that contains all the non-overlapping rectangles. - * - * A Region is implemented as a "y-x-banded" array of rectangles. This array - * imposes two degrees of order. First, all rectangles are sorted by top side - * y coordinate first (y1), and then by left side x coordinate (x1). - * - * Furthermore, the rectangles are grouped into "bands". Each rectangle in a - * band has the same top y coordinate (y1), and each has the same bottom y - * coordinate (y2). Thus all rectangles in a band differ only in their left - * and right side (x1 and x2). Bands are implicit in the array of rectangles: - * there is no separate list of band start pointers. - * - * The y-x band representation does not minimize rectangles. In particular, - * if a rectangle vertically crosses a band (the rectangle has scanlines in - * the y1 to y2 area spanned by the band), then the rectangle may be broken - * down into two or more smaller rectangles stacked one atop the other. - * - * ----------- ----------- - * | | | | band 0 - * | | -------- ----------- -------- - * | | | | in y-x banded | | | | band 1 - * | | | | form is | | | | - * ----------- | | ----------- -------- - * | | | | band 2 - * -------- -------- - * - * An added constraint on the rectangles is that they must cover as much - * horizontal area as possible: no two rectangles within a band are allowed - * to touch. - * - * Whenever possible, bands will be merged together to cover a greater vertical - * distance (and thus reduce the number of rectangles). Two bands can be merged - * only if the bottom of one touches the top of the other and they have - * rectangles in the same places (of the same width, of course). - * - * Adam de Boor wrote most of the original region code. Joel McCormack - * substantially modified or rewrote most of the core arithmetic routines, and - * added pixman_region_validate in order to support several speed improvements - * to pixman_region_validate_tree. Bob Scheifler changed the representation - * to be more compact when empty or a single rectangle, and did a bunch of - * gratuitous reformatting. Carl Worth did further gratuitous reformatting - * while re-merging the server and client region code into libpixregion. - * Soren Sandmann did even more gratuitous reformatting. - */ - -/* true iff two Boxes overlap */ -#define EXTENTCHECK(r1, r2) \ - (!(((r1)->x2 <= (r2)->x1) || ((r1)->x1 >= (r2)->x2) || \ - ((r1)->y2 <= (r2)->y1) || ((r1)->y1 >= (r2)->y2))) - -/* true iff (x,y) is in Box */ -#define INBOX(r, x, y) \ - (((r)->x2 > x) && ((r)->x1 <= x) && ((r)->y2 > y) && ((r)->y1 <= y)) - -/* true iff Box r1 contains Box r2 */ -#define SUBSUMES(r1, r2) \ - (((r1)->x1 <= (r2)->x1) && ((r1)->x2 >= (r2)->x2) && \ - ((r1)->y1 <= (r2)->y1) && ((r1)->y2 >= (r2)->y2)) - -static size_t PIXREGION_SZOF(size_t n) -{ - size_t size = n * sizeof(box_type_t); - - if (n > UINT32_MAX / sizeof(box_type_t)) return 0; - - if (sizeof(region_data_type_t) > UINT32_MAX - size) return 0; - - return size + sizeof(region_data_type_t); -} - -static region_data_type_t *alloc_data(size_t n) -{ - size_t sz = PIXREGION_SZOF(n); - - if (!sz) return NULL; - - return (region_data_type_t *)malloc(sz); -} - -#define FREE_DATA(reg) \ - if ((reg)->data && (reg)->data->size) free((reg)->data) - -#define RECTALLOC_BAIL(region, n, bail) \ - do { \ - if (!(region)->data || \ - (((region)->data->numRects + (n)) > (region)->data->size)) { \ - if (!pixman_rect_alloc(region, n)) goto bail; \ - } \ - } while (0) - -#define RECTALLOC(region, n) \ - do { \ - if (!(region)->data || \ - (((region)->data->numRects + (n)) > (region)->data->size)) { \ - if (!pixman_rect_alloc(region, n)) { \ - return FALSE; \ - } \ - } \ - } while (0) - -#define ADDRECT(next_rect, nx1, ny1, nx2, ny2) \ - do { \ - next_rect->x1 = nx1; \ - next_rect->y1 = ny1; \ - next_rect->x2 = nx2; \ - next_rect->y2 = ny2; \ - next_rect++; \ - } while (0) - -#define NEWRECT(region, next_rect, nx1, ny1, nx2, ny2) \ - do { \ - if (!(region)->data || \ - ((region)->data->numRects == (region)->data->size)) { \ - if (!pixman_rect_alloc(region, 1)) return FALSE; \ - next_rect = PIXREGION_TOP(region); \ - } \ - ADDRECT(next_rect, nx1, ny1, nx2, ny2); \ - region->data->numRects++; \ - critical_if_fail(region->data->numRects <= region->data->size); \ - } while (0) - -#define DOWNSIZE(reg, numRects) \ - do { \ - if (((numRects) < ((reg)->data->size >> 1)) && \ - ((reg)->data->size > 50)) { \ - region_data_type_t *new_data; \ - size_t data_size = PIXREGION_SZOF(numRects); \ - \ - if (!data_size) { \ - new_data = NULL; \ - } else { \ - new_data = \ - (region_data_type_t *)realloc((reg)->data, data_size); \ - } \ - \ - if (new_data) { \ - new_data->size = (numRects); \ - (reg)->data = new_data; \ - } \ - } \ - } while (0) - -PIXMAN_EXPORT pixman_bool_t PREFIX(_equal)(region_type_t *reg1, - region_type_t *reg2) -{ - int i; - box_type_t *rects1; - box_type_t *rects2; - - if (reg1->extents.x1 != reg2->extents.x1) return FALSE; - - if (reg1->extents.x2 != reg2->extents.x2) return FALSE; - - if (reg1->extents.y1 != reg2->extents.y1) return FALSE; - - if (reg1->extents.y2 != reg2->extents.y2) return FALSE; - - if (PIXREGION_NUMRECTS(reg1) != PIXREGION_NUMRECTS(reg2)) return FALSE; - - rects1 = PIXREGION_RECTS(reg1); - rects2 = PIXREGION_RECTS(reg2); - - for (i = 0; i != PIXREGION_NUMRECTS(reg1); i++) { - if (rects1[i].x1 != rects2[i].x1) return FALSE; - - if (rects1[i].x2 != rects2[i].x2) return FALSE; - - if (rects1[i].y1 != rects2[i].y1) return FALSE; - - if (rects1[i].y2 != rects2[i].y2) return FALSE; - } - - return TRUE; -} - -// returns true if both region intersects -PIXMAN_EXPORT pixman_bool_t PREFIX(_intersects)(region_type_t *reg1, - region_type_t *reg2) -{ - box_type_t *rects1 = PIXREGION_RECTS(reg1); - box_type_t *rects2 = PIXREGION_RECTS(reg2); - for (int i = 0; i != PIXREGION_NUMRECTS(reg1); i++) { - for (int j = 0; j != PIXREGION_NUMRECTS(reg2); j++) { - if (EXTENTCHECK(rects1 + i, rects2 + j)) return TRUE; - } - } - return FALSE; -} - -int PREFIX(_print)(region_type_t *rgn) -{ - int num, size; - int i; - box_type_t *rects; - - num = PIXREGION_NUMRECTS(rgn); - size = PIXREGION_SIZE(rgn); - rects = PIXREGION_RECTS(rgn); - - fprintf(stderr, "num: %d size: %d\n", num, size); - fprintf(stderr, "extents: %d %d %d %d\n", rgn->extents.x1, rgn->extents.y1, - rgn->extents.x2, rgn->extents.y2); - - for (i = 0; i < num; i++) { - fprintf(stderr, "%d %d %d %d \n", rects[i].x1, rects[i].y1, rects[i].x2, - rects[i].y2); - } - - fprintf(stderr, "\n"); - - return (num); -} - -PIXMAN_EXPORT void PREFIX(_init)(region_type_t *region) -{ - region->extents = *pixman_region_empty_box; - region->data = pixman_region_empty_data; -} - -PIXMAN_EXPORT pixman_bool_t PREFIX(_union_rect)(region_type_t *dest, - region_type_t *source, int x, - int y, unsigned int width, - unsigned int height); -PIXMAN_EXPORT void PREFIX(_init_rect)(region_type_t *region, int x, int y, - unsigned int width, unsigned int height) -{ - PREFIX(_init)(region); - PREFIX(_union_rect)(region, region, x, y, width, height); -} - -PIXMAN_EXPORT void PREFIX(_fini)(region_type_t *region) -{ - GOOD(region); - FREE_DATA(region); -} - -PIXMAN_EXPORT int PREFIX(_n_rects)(region_type_t *region) -{ - return PIXREGION_NUMRECTS(region); -} - -static pixman_bool_t pixman_break(region_type_t *region) -{ - FREE_DATA(region); - - region->extents = *pixman_region_empty_box; - region->data = pixman_broken_data; - - return FALSE; -} - -static pixman_bool_t pixman_rect_alloc(region_type_t *region, int n) -{ - region_data_type_t *data; - - if (!region->data) { - n++; - region->data = alloc_data(n); - - if (!region->data) return pixman_break(region); - - region->data->numRects = 1; - *PIXREGION_BOXPTR(region) = region->extents; - } else if (!region->data->size) { - region->data = alloc_data(n); - - if (!region->data) return pixman_break(region); - - region->data->numRects = 0; - } else { - size_t data_size; - - if (n == 1) { - n = region->data->numRects; - if (n > 500) /* XXX pick numbers out of a hat */ - n = 250; - } - - n += region->data->numRects; - data_size = PIXREGION_SZOF(n); - - if (!data_size) { - data = NULL; - } else { - data = - (region_data_type_t *)realloc(region->data, PIXREGION_SZOF(n)); - } - - if (!data) return pixman_break(region); - - region->data = data; - } - - region->data->size = n; - - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t PREFIX(_copy)(region_type_t *dst, - region_type_t *src) -{ - GOOD(dst); - GOOD(src); - - if (dst == src) return TRUE; - - dst->extents = src->extents; - - if (!src->data || !src->data->size) { - FREE_DATA(dst); - dst->data = src->data; - return TRUE; - } - - if (!dst->data || (dst->data->size < src->data->numRects)) { - FREE_DATA(dst); - - dst->data = alloc_data(src->data->numRects); - - if (!dst->data) return pixman_break(dst); - - dst->data->size = src->data->numRects; - } - - dst->data->numRects = src->data->numRects; - - memmove((char *)PIXREGION_BOXPTR(dst), (char *)PIXREGION_BOXPTR(src), - dst->data->numRects * sizeof(box_type_t)); - - return TRUE; -} - -/*====================================================================== - * Generic Region Operator - *====================================================================*/ - -/*- - *----------------------------------------------------------------------- - * pixman_coalesce -- - * Attempt to merge the boxes in the current band with those in the - * previous one. We are guaranteed that the current band extends to - * the end of the rects array. Used only by pixman_op. - * - * Results: - * The new index for the previous band. - * - * Side Effects: - * If coalescing takes place: - * - rectangles in the previous band will have their y2 fields - * altered. - * - region->data->numRects will be decreased. - * - *----------------------------------------------------------------------- - */ -static inline int pixman_coalesce( - region_type_t *region, /* Region to coalesce */ - int prev_start, /* Index of start of previous band */ - int cur_start) /* Index of start of current band */ -{ - box_type_t *prev_box; /* Current box in previous band */ - box_type_t *cur_box; /* Current box in current band */ - int numRects; /* Number rectangles in both bands */ - int y2; /* Bottom of current band */ - - /* - * Figure out how many rectangles are in the band. - */ - numRects = cur_start - prev_start; - critical_if_fail(numRects == region->data->numRects - cur_start); - - if (!numRects) return cur_start; - - /* - * The bands may only be coalesced if the bottom of the previous - * matches the top scanline of the current. - */ - prev_box = PIXREGION_BOX(region, prev_start); - cur_box = PIXREGION_BOX(region, cur_start); - if (prev_box->y2 != cur_box->y1) return cur_start; - - /* - * Make sure the bands have boxes in the same places. This - * assumes that boxes have been added in such a way that they - * cover the most area possible. I.e. two boxes in a band must - * have some horizontal space between them. - */ - y2 = cur_box->y2; - - do { - if ((prev_box->x1 != cur_box->x1) || (prev_box->x2 != cur_box->x2)) - return (cur_start); - - prev_box++; - cur_box++; - numRects--; - } while (numRects); - - /* - * The bands may be merged, so set the bottom y of each box - * in the previous band to the bottom y of the current band. - */ - numRects = cur_start - prev_start; - region->data->numRects -= numRects; - - do { - prev_box--; - prev_box->y2 = y2; - numRects--; - } while (numRects); - - return prev_start; -} - -/* Quicky macro to avoid trivial reject procedure calls to pixman_coalesce */ - -#define COALESCE(new_reg, prev_band, cur_band) \ - do { \ - if (cur_band - prev_band == new_reg->data->numRects - cur_band) \ - prev_band = pixman_coalesce(new_reg, prev_band, cur_band); \ - else \ - prev_band = cur_band; \ - } while (0) - -/*- - *----------------------------------------------------------------------- - * pixman_region_append_non_o -- - * Handle a non-overlapping band for the union and subtract operations. - * Just adds the (top/bottom-clipped) rectangles into the region. - * Doesn't have to check for subsumption or anything. - * - * Results: - * None. - * - * Side Effects: - * region->data->numRects is incremented and the rectangles overwritten - * with the rectangles we're passed. - * - *----------------------------------------------------------------------- - */ -static inline pixman_bool_t pixman_region_append_non_o(region_type_t *region, - box_type_t * r, - box_type_t * r_end, - int y1, int y2) -{ - box_type_t *next_rect; - int new_rects; - - new_rects = r_end - r; - - critical_if_fail(y1 < y2); - critical_if_fail(new_rects != 0); - - /* Make sure we have enough space for all rectangles to be added */ - RECTALLOC(region, new_rects); - next_rect = PIXREGION_TOP(region); - region->data->numRects += new_rects; - - do { - critical_if_fail(r->x1 < r->x2); - ADDRECT(next_rect, r->x1, y1, r->x2, y2); - r++; - } while (r != r_end); - - return TRUE; -} - -#define FIND_BAND(r, r_band_end, r_end, ry1) \ - do { \ - ry1 = r->y1; \ - r_band_end = r + 1; \ - while ((r_band_end != r_end) && (r_band_end->y1 == ry1)) { \ - r_band_end++; \ - } \ - } while (0) - -#define APPEND_REGIONS(new_reg, r, r_end) \ - do { \ - int new_rects; \ - if ((new_rects = r_end - r)) { \ - RECTALLOC_BAIL(new_reg, new_rects, bail); \ - memmove((char *)PIXREGION_TOP(new_reg), (char *)r, \ - new_rects * sizeof(box_type_t)); \ - new_reg->data->numRects += new_rects; \ - } \ - } while (0) - -/*- - *----------------------------------------------------------------------- - * pixman_op -- - * Apply an operation to two regions. Called by pixman_region_union, - *pixman_region_inverse, pixman_region_subtract, pixman_region_intersect.... - *Both regions MUST have at least one rectangle, and cannot be the same object. - * - * Results: - * TRUE if successful. - * - * Side Effects: - * The new region is overwritten. - * overlap set to TRUE if overlap_func ever returns TRUE. - * - * Notes: - * The idea behind this function is to view the two regions as sets. - * Together they cover a rectangle of area that this function divides - * into horizontal bands where points are covered only by one region - * or by both. For the first case, the non_overlap_func is called with - * each the band and the band's upper and lower extents. For the - * second, the overlap_func is called to process the entire band. It - * is responsible for clipping the rectangles in the band, though - * this function provides the boundaries. - * At the end of each band, the new region is coalesced, if possible, - * to reduce the number of rectangles in the region. - * - *----------------------------------------------------------------------- - */ - -typedef pixman_bool_t (*overlap_proc_ptr)(region_type_t *region, box_type_t *r1, - box_type_t *r1_end, box_type_t *r2, - box_type_t *r2_end, int y1, int y2); - -static pixman_bool_t pixman_op( - region_type_t * new_reg, /* Place to store result */ - region_type_t * reg1, /* First region in operation */ - region_type_t * reg2, /* 2d region in operation */ - overlap_proc_ptr overlap_func, /* Function to call for over- - * lapping bands */ - int append_non1, /* Append non-overlapping bands - * in region 1 ? - */ - int append_non2 /* Append non-overlapping bands - * in region 2 ? - */ -) -{ - box_type_t * r1; /* Pointer into first region */ - box_type_t * r2; /* Pointer into 2d region */ - box_type_t * r1_end; /* End of 1st region */ - box_type_t * r2_end; /* End of 2d region */ - int ybot; /* Bottom of intersection */ - int ytop; /* Top of intersection */ - region_data_type_t *old_data; /* Old data for new_reg */ - int prev_band; /* Index of start of - * previous band in new_reg */ - int cur_band; /* Index of start of current - * band in new_reg */ - box_type_t *r1_band_end; /* End of current band in r1 */ - box_type_t *r2_band_end; /* End of current band in r2 */ - int top; /* Top of non-overlapping band */ - int bot; /* Bottom of non-overlapping band*/ - int r1y1; /* Temps for r1->y1 and r2->y1 */ - int r2y1; - int new_size; - int numRects; - - /* - * Break any region computed from a broken region - */ - if (PIXREGION_NAR(reg1) || PIXREGION_NAR(reg2)) - return pixman_break(new_reg); - - /* - * Initialization: - * set r1, r2, r1_end and r2_end appropriately, save the rectangles - * of the destination region until the end in case it's one of - * the two source regions, then mark the "new" region empty, allocating - * another array of rectangles for it to use. - */ - - r1 = PIXREGION_RECTS(reg1); - new_size = PIXREGION_NUMRECTS(reg1); - r1_end = r1 + new_size; - - numRects = PIXREGION_NUMRECTS(reg2); - r2 = PIXREGION_RECTS(reg2); - r2_end = r2 + numRects; - - critical_if_fail(r1 != r1_end); - critical_if_fail(r2 != r2_end); - - old_data = (region_data_type_t *)NULL; - - if (((new_reg == reg1) && (new_size > 1)) || - ((new_reg == reg2) && (numRects > 1))) { - old_data = new_reg->data; - new_reg->data = pixman_region_empty_data; - } - - /* guess at new size */ - if (numRects > new_size) new_size = numRects; - - new_size <<= 1; - - if (!new_reg->data) - new_reg->data = pixman_region_empty_data; - else if (new_reg->data->size) - new_reg->data->numRects = 0; - - if (new_size > new_reg->data->size) { - if (!pixman_rect_alloc(new_reg, new_size)) { - free(old_data); - return FALSE; - } - } - - /* - * Initialize ybot. - * In the upcoming loop, ybot and ytop serve different functions depending - * on whether the band being handled is an overlapping or non-overlapping - * band. - * In the case of a non-overlapping band (only one of the regions - * has points in the band), ybot is the bottom of the most recent - * intersection and thus clips the top of the rectangles in that band. - * ytop is the top of the next intersection between the two regions and - * serves to clip the bottom of the rectangles in the current band. - * For an overlapping band (where the two regions intersect), ytop clips - * the top of the rectangles of both regions and ybot clips the bottoms. - */ - - ybot = MIN(r1->y1, r2->y1); - - /* - * prev_band serves to mark the start of the previous band so rectangles - * can be coalesced into larger rectangles. qv. pixman_coalesce, above. - * In the beginning, there is no previous band, so prev_band == cur_band - * (cur_band is set later on, of course, but the first band will always - * start at index 0). prev_band and cur_band must be indices because of - * the possible expansion, and resultant moving, of the new region's - * array of rectangles. - */ - prev_band = 0; - - do { - /* - * This algorithm proceeds one source-band (as opposed to a - * destination band, which is determined by where the two regions - * intersect) at a time. r1_band_end and r2_band_end serve to mark the - * rectangle after the last one in the current band for their - * respective regions. - */ - critical_if_fail(r1 != r1_end); - critical_if_fail(r2 != r2_end); - - FIND_BAND(r1, r1_band_end, r1_end, r1y1); - FIND_BAND(r2, r2_band_end, r2_end, r2y1); - - /* - * First handle the band that doesn't intersect, if any. - * - * Note that attention is restricted to one band in the - * non-intersecting region at once, so if a region has n - * bands between the current position and the next place it overlaps - * the other, this entire loop will be passed through n times. - */ - if (r1y1 < r2y1) { - if (append_non1) { - top = MAX(r1y1, ybot); - bot = MIN(r1->y2, r2y1); - if (top != bot) { - cur_band = new_reg->data->numRects; - if (!pixman_region_append_non_o(new_reg, r1, r1_band_end, - top, bot)) - goto bail; - COALESCE(new_reg, prev_band, cur_band); - } - } - ytop = r2y1; - } else if (r2y1 < r1y1) { - if (append_non2) { - top = MAX(r2y1, ybot); - bot = MIN(r2->y2, r1y1); - - if (top != bot) { - cur_band = new_reg->data->numRects; - - if (!pixman_region_append_non_o(new_reg, r2, r2_band_end, - top, bot)) - goto bail; - - COALESCE(new_reg, prev_band, cur_band); - } - } - ytop = r1y1; - } else { - ytop = r1y1; - } - - /* - * Now see if we've hit an intersecting band. The two bands only - * intersect if ybot > ytop - */ - ybot = MIN(r1->y2, r2->y2); - if (ybot > ytop) { - cur_band = new_reg->data->numRects; - - if (!(*overlap_func)(new_reg, r1, r1_band_end, r2, r2_band_end, - ytop, ybot)) { - goto bail; - } - - COALESCE(new_reg, prev_band, cur_band); - } - - /* - * If we've finished with a band (y2 == ybot) we skip forward - * in the region to the next band. - */ - if (r1->y2 == ybot) r1 = r1_band_end; - - if (r2->y2 == ybot) r2 = r2_band_end; - - } while (r1 != r1_end && r2 != r2_end); - - /* - * Deal with whichever region (if any) still has rectangles left. - * - * We only need to worry about banding and coalescing for the very first - * band left. After that, we can just group all remaining boxes, - * regardless of how many bands, into one final append to the list. - */ - - if ((r1 != r1_end) && append_non1) { - /* Do first non_overlap1Func call, which may be able to coalesce */ - FIND_BAND(r1, r1_band_end, r1_end, r1y1); - - cur_band = new_reg->data->numRects; - - if (!pixman_region_append_non_o(new_reg, r1, r1_band_end, - MAX(r1y1, ybot), r1->y2)) { - goto bail; - } - - COALESCE(new_reg, prev_band, cur_band); - - /* Just append the rest of the boxes */ - APPEND_REGIONS(new_reg, r1_band_end, r1_end); - } else if ((r2 != r2_end) && append_non2) { - /* Do first non_overlap2Func call, which may be able to coalesce */ - FIND_BAND(r2, r2_band_end, r2_end, r2y1); - - cur_band = new_reg->data->numRects; - - if (!pixman_region_append_non_o(new_reg, r2, r2_band_end, - MAX(r2y1, ybot), r2->y2)) { - goto bail; - } - - COALESCE(new_reg, prev_band, cur_band); - - /* Append rest of boxes */ - APPEND_REGIONS(new_reg, r2_band_end, r2_end); - } - - free(old_data); - - if (!(numRects = new_reg->data->numRects)) { - FREE_DATA(new_reg); - new_reg->data = pixman_region_empty_data; - } else if (numRects == 1) { - new_reg->extents = *PIXREGION_BOXPTR(new_reg); - FREE_DATA(new_reg); - new_reg->data = (region_data_type_t *)NULL; - } else { - DOWNSIZE(new_reg, numRects); - } - - return TRUE; - -bail: - free(old_data); - - return pixman_break(new_reg); -} - -/*- - *----------------------------------------------------------------------- - * pixman_set_extents -- - * Reset the extents of a region to what they should be. Called by - * pixman_region_subtract and pixman_region_intersect as they can't - * figure it out along the way or do so easily, as pixman_region_union can. - * - * Results: - * None. - * - * Side Effects: - * The region's 'extents' structure is overwritten. - * - *----------------------------------------------------------------------- - */ -static void pixman_set_extents(region_type_t *region) -{ - box_type_t *box, *box_end; - - if (!region->data) return; - - if (!region->data->size) { - region->extents.x2 = region->extents.x1; - region->extents.y2 = region->extents.y1; - return; - } - - box = PIXREGION_BOXPTR(region); - box_end = PIXREGION_END(region); - - /* - * Since box is the first rectangle in the region, it must have the - * smallest y1 and since box_end is the last rectangle in the region, - * it must have the largest y2, because of banding. Initialize x1 and - * x2 from box and box_end, resp., as good things to initialize them - * to... - */ - region->extents.x1 = box->x1; - region->extents.y1 = box->y1; - region->extents.x2 = box_end->x2; - region->extents.y2 = box_end->y2; - - critical_if_fail(region->extents.y1 < region->extents.y2); - - while (box <= box_end) { - if (box->x1 < region->extents.x1) region->extents.x1 = box->x1; - if (box->x2 > region->extents.x2) region->extents.x2 = box->x2; - box++; - } - - critical_if_fail(region->extents.x1 < region->extents.x2); -} - -/*====================================================================== - * Region Intersection - *====================================================================*/ -/*- - *----------------------------------------------------------------------- - * pixman_region_intersect_o -- - * Handle an overlapping band for pixman_region_intersect. - * - * Results: - * TRUE if successful. - * - * Side Effects: - * Rectangles may be added to the region. - * - *----------------------------------------------------------------------- - */ -/*ARGSUSED*/ -static pixman_bool_t pixman_region_intersect_o( - region_type_t *region, box_type_t *r1, box_type_t *r1_end, box_type_t *r2, - box_type_t *r2_end, int y1, int y2) -{ - int x1; - int x2; - box_type_t *next_rect; - - next_rect = PIXREGION_TOP(region); - - critical_if_fail(y1 < y2); - critical_if_fail(r1 != r1_end && r2 != r2_end); - - do { - x1 = MAX(r1->x1, r2->x1); - x2 = MIN(r1->x2, r2->x2); - - /* - * If there's any overlap between the two rectangles, add that - * overlap to the new region. - */ - if (x1 < x2) NEWRECT(region, next_rect, x1, y1, x2, y2); - - /* - * Advance the pointer(s) with the leftmost right side, since the next - * rectangle on that list may still overlap the other region's - * current rectangle. - */ - if (r1->x2 == x2) { - r1++; - } - if (r2->x2 == x2) { - r2++; - } - } while ((r1 != r1_end) && (r2 != r2_end)); - - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t PREFIX(_intersect)(region_type_t *new_reg, - region_type_t *reg1, - region_type_t *reg2) -{ - GOOD(reg1); - GOOD(reg2); - GOOD(new_reg); - - /* check for trivial reject */ - if (PIXREGION_NIL(reg1) || PIXREGION_NIL(reg2) || - !EXTENTCHECK(®1->extents, ®2->extents)) { - /* Covers about 20% of all cases */ - FREE_DATA(new_reg); - new_reg->extents.x2 = new_reg->extents.x1; - new_reg->extents.y2 = new_reg->extents.y1; - if (PIXREGION_NAR(reg1) || PIXREGION_NAR(reg2)) { - new_reg->data = pixman_broken_data; - return FALSE; - } else { - new_reg->data = pixman_region_empty_data; - } - } else if (!reg1->data && !reg2->data) { - /* Covers about 80% of cases that aren't trivially rejected */ - new_reg->extents.x1 = MAX(reg1->extents.x1, reg2->extents.x1); - new_reg->extents.y1 = MAX(reg1->extents.y1, reg2->extents.y1); - new_reg->extents.x2 = MIN(reg1->extents.x2, reg2->extents.x2); - new_reg->extents.y2 = MIN(reg1->extents.y2, reg2->extents.y2); - - FREE_DATA(new_reg); - - new_reg->data = (region_data_type_t *)NULL; - } else if (!reg2->data && SUBSUMES(®2->extents, ®1->extents)) { - return PREFIX(_copy)(new_reg, reg1); - } else if (!reg1->data && SUBSUMES(®1->extents, ®2->extents)) { - return PREFIX(_copy)(new_reg, reg2); - } else if (reg1 == reg2) { - return PREFIX(_copy)(new_reg, reg1); - } else { - /* General purpose intersection */ - - if (!pixman_op(new_reg, reg1, reg2, pixman_region_intersect_o, FALSE, - FALSE)) - return FALSE; - - pixman_set_extents(new_reg); - } - - GOOD(new_reg); - return (TRUE); -} - -#define MERGERECT(r) \ - do { \ - if (r->x1 <= x2) { \ - /* Merge with current rectangle */ \ - if (x2 < r->x2) x2 = r->x2; \ - } else { \ - /* Add current rectangle, start new one */ \ - NEWRECT(region, next_rect, x1, y1, x2, y2); \ - x1 = r->x1; \ - x2 = r->x2; \ - } \ - r++; \ - } while (0) - -/*====================================================================== - * Region Union - *====================================================================*/ - -/*- - *----------------------------------------------------------------------- - * pixman_region_union_o -- - * Handle an overlapping band for the union operation. Picks the - * left-most rectangle each time and merges it into the region. - * - * Results: - * TRUE if successful. - * - * Side Effects: - * region is overwritten. - * overlap is set to TRUE if any boxes overlap. - * - *----------------------------------------------------------------------- - */ -static pixman_bool_t pixman_region_union_o(region_type_t *region, - box_type_t *r1, box_type_t *r1_end, - box_type_t *r2, box_type_t *r2_end, - int y1, int y2) -{ - box_type_t *next_rect; - int x1; /* left and right side of current union */ - int x2; - - critical_if_fail(y1 < y2); - critical_if_fail(r1 != r1_end && r2 != r2_end); - - next_rect = PIXREGION_TOP(region); - - /* Start off current rectangle */ - if (r1->x1 < r2->x1) { - x1 = r1->x1; - x2 = r1->x2; - r1++; - } else { - x1 = r2->x1; - x2 = r2->x2; - r2++; - } - while (r1 != r1_end && r2 != r2_end) { - if (r1->x1 < r2->x1) - MERGERECT(r1); - else - MERGERECT(r2); - } - - /* Finish off whoever (if any) is left */ - if (r1 != r1_end) { - do { - MERGERECT(r1); - } while (r1 != r1_end); - } else if (r2 != r2_end) { - do { - MERGERECT(r2); - } while (r2 != r2_end); - } - - /* Add current rectangle */ - NEWRECT(region, next_rect, x1, y1, x2, y2); - - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t PREFIX(_intersect_rect)(region_type_t *dest, - region_type_t *source, - int x, int y, - unsigned int width, - unsigned int height) -{ - region_type_t region; - - region.data = NULL; - region.extents.x1 = x; - region.extents.y1 = y; - region.extents.x2 = x + width; - region.extents.y2 = y + height; - - return PREFIX(_intersect)(dest, source, ®ion); -} - -PIXMAN_EXPORT pixman_bool_t PREFIX(_union)(region_type_t *new_reg, - region_type_t *reg1, - region_type_t *reg2); - -/* Convenience function for performing union of region with a - * single rectangle - */ -PIXMAN_EXPORT pixman_bool_t PREFIX(_union_rect)(region_type_t *dest, - region_type_t *source, int x, - int y, unsigned int width, - unsigned int height) -{ - region_type_t region; - - region.extents.x1 = x; - region.extents.y1 = y; - region.extents.x2 = x + width; - region.extents.y2 = y + height; - - if (!GOOD_RECT(®ion.extents)) { - if (BAD_RECT(®ion.extents)) - _pixman_log_error(FUNC, "Invalid rectangle passed"); - return PREFIX(_copy)(dest, source); - } - - region.data = NULL; - - return PREFIX(_union)(dest, source, ®ion); -} - -PIXMAN_EXPORT pixman_bool_t PREFIX(_union)(region_type_t *new_reg, - region_type_t *reg1, - region_type_t *reg2) -{ - /* Return TRUE if some overlap - * between reg1, reg2 - */ - GOOD(reg1); - GOOD(reg2); - GOOD(new_reg); - - /* checks all the simple cases */ - - /* - * Region 1 and 2 are the same - */ - if (reg1 == reg2) return PREFIX(_copy)(new_reg, reg1); - - /* - * Region 1 is empty - */ - if (PIXREGION_NIL(reg1)) { - if (PIXREGION_NAR(reg1)) return pixman_break(new_reg); - - if (new_reg != reg2) return PREFIX(_copy)(new_reg, reg2); - - return TRUE; - } - - /* - * Region 2 is empty - */ - if (PIXREGION_NIL(reg2)) { - if (PIXREGION_NAR(reg2)) return pixman_break(new_reg); - - if (new_reg != reg1) return PREFIX(_copy)(new_reg, reg1); - - return TRUE; - } - - /* - * Region 1 completely subsumes region 2 - */ - if (!reg1->data && SUBSUMES(®1->extents, ®2->extents)) { - if (new_reg != reg1) return PREFIX(_copy)(new_reg, reg1); - - return TRUE; - } - - /* - * Region 2 completely subsumes region 1 - */ - if (!reg2->data && SUBSUMES(®2->extents, ®1->extents)) { - if (new_reg != reg2) return PREFIX(_copy)(new_reg, reg2); - - return TRUE; - } - - if (!pixman_op(new_reg, reg1, reg2, pixman_region_union_o, TRUE, TRUE)) - return FALSE; - - new_reg->extents.x1 = MIN(reg1->extents.x1, reg2->extents.x1); - new_reg->extents.y1 = MIN(reg1->extents.y1, reg2->extents.y1); - new_reg->extents.x2 = MAX(reg1->extents.x2, reg2->extents.x2); - new_reg->extents.y2 = MAX(reg1->extents.y2, reg2->extents.y2); - - GOOD(new_reg); - - return TRUE; -} - -/*====================================================================== - * Region Subtraction - *====================================================================*/ - -/*- - *----------------------------------------------------------------------- - * pixman_region_subtract_o -- - * Overlapping band subtraction. x1 is the left-most point not yet - * checked. - * - * Results: - * TRUE if successful. - * - * Side Effects: - * region may have rectangles added to it. - * - *----------------------------------------------------------------------- - */ -/*ARGSUSED*/ -static pixman_bool_t pixman_region_subtract_o( - region_type_t *region, box_type_t *r1, box_type_t *r1_end, box_type_t *r2, - box_type_t *r2_end, int y1, int y2) -{ - box_type_t *next_rect; - int x1; - - x1 = r1->x1; - - critical_if_fail(y1 < y2); - critical_if_fail(r1 != r1_end && r2 != r2_end); - - next_rect = PIXREGION_TOP(region); - - do { - if (r2->x2 <= x1) { - /* - * Subtrahend entirely to left of minuend: go to next subtrahend. - */ - r2++; - } else if (r2->x1 <= x1) { - /* - * Subtrahend precedes minuend: nuke left edge of minuend. - */ - x1 = r2->x2; - if (x1 >= r1->x2) { - /* - * Minuend completely covered: advance to next minuend and - * reset left fence to edge of new minuend. - */ - r1++; - if (r1 != r1_end) x1 = r1->x1; - } else { - /* - * Subtrahend now used up since it doesn't extend beyond - * minuend - */ - r2++; - } - } else if (r2->x1 < r1->x2) { - /* - * Left part of subtrahend covers part of minuend: add uncovered - * part of minuend to region and skip to next subtrahend. - */ - critical_if_fail(x1 < r2->x1); - NEWRECT(region, next_rect, x1, y1, r2->x1, y2); - - x1 = r2->x2; - if (x1 >= r1->x2) { - /* - * Minuend used up: advance to new... - */ - r1++; - if (r1 != r1_end) x1 = r1->x1; - } else { - /* - * Subtrahend used up - */ - r2++; - } - } else { - /* - * Minuend used up: add any remaining piece before advancing. - */ - if (r1->x2 > x1) NEWRECT(region, next_rect, x1, y1, r1->x2, y2); - - r1++; - - if (r1 != r1_end) x1 = r1->x1; - } - } while ((r1 != r1_end) && (r2 != r2_end)); - - /* - * Add remaining minuend rectangles to region. - */ - while (r1 != r1_end) { - critical_if_fail(x1 < r1->x2); - - NEWRECT(region, next_rect, x1, y1, r1->x2, y2); - - r1++; - if (r1 != r1_end) x1 = r1->x1; - } - return TRUE; -} - -/*- - *----------------------------------------------------------------------- - * pixman_region_subtract -- - * Subtract reg_s from reg_m and leave the result in reg_d. - * S stands for subtrahend, M for minuend and D for difference. - * - * Results: - * TRUE if successful. - * - * Side Effects: - * reg_d is overwritten. - * - *----------------------------------------------------------------------- - */ -PIXMAN_EXPORT pixman_bool_t PREFIX(_subtract)(region_type_t *reg_d, - region_type_t *reg_m, - region_type_t *reg_s) -{ - GOOD(reg_m); - GOOD(reg_s); - GOOD(reg_d); - - /* check for trivial rejects */ - if (PIXREGION_NIL(reg_m) || PIXREGION_NIL(reg_s) || - !EXTENTCHECK(®_m->extents, ®_s->extents)) { - if (PIXREGION_NAR(reg_s)) return pixman_break(reg_d); - - return PREFIX(_copy)(reg_d, reg_m); - } else if (reg_m == reg_s) { - FREE_DATA(reg_d); - reg_d->extents.x2 = reg_d->extents.x1; - reg_d->extents.y2 = reg_d->extents.y1; - reg_d->data = pixman_region_empty_data; - - return TRUE; - } - - /* Add those rectangles in region 1 that aren't in region 2, - do yucky subtraction for overlaps, and - just throw away rectangles in region 2 that aren't in region 1 */ - if (!pixman_op(reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE)) - return FALSE; - - /* - * Can't alter reg_d's extents before we call pixman_op because - * it might be one of the source regions and pixman_op depends - * on the extents of those regions being unaltered. Besides, this - * way there's no checking against rectangles that will be nuked - * due to coalescing, so we have to examine fewer rectangles. - */ - pixman_set_extents(reg_d); - GOOD(reg_d); - return TRUE; -} -#if 0 -/*====================================================================== - * Region Inversion - *====================================================================*/ - -/*- - *----------------------------------------------------------------------- - * pixman_region_inverse -- - * Take a region and a box and return a region that is everything - * in the box but not in the region. The careful reader will note - * that this is the same as subtracting the region from the box... - * - * Results: - * TRUE. - * - * Side Effects: - * new_reg is overwritten. - * - *----------------------------------------------------------------------- - */ -PIXMAN_EXPORT pixman_bool_t -PREFIX (_inverse) (region_type_t *new_reg, /* Destination region */ - region_type_t *reg1, /* Region to invert */ - box_type_t * inv_rect) /* Bounding box for inversion */ -{ - region_type_t inv_reg; /* Quick and dirty region made from the - * bounding box */ - GOOD (reg1); - GOOD (new_reg); - - /* check for trivial rejects */ - if (PIXREGION_NIL (reg1) || !EXTENTCHECK (inv_rect, ®1->extents)) - { - if (PIXREGION_NAR (reg1)) - return pixman_break (new_reg); - - new_reg->extents = *inv_rect; - FREE_DATA (new_reg); - new_reg->data = (region_data_type_t *)NULL; - - return TRUE; - } - - /* Add those rectangles in region 1 that aren't in region 2, - * do yucky subtraction for overlaps, and - * just throw away rectangles in region 2 that aren't in region 1 - */ - inv_reg.extents = *inv_rect; - inv_reg.data = (region_data_type_t *)NULL; - if (!pixman_op (new_reg, &inv_reg, reg1, pixman_region_subtract_o, TRUE, FALSE)) - return FALSE; - - /* - * Can't alter new_reg's extents before we call pixman_op because - * it might be one of the source regions and pixman_op depends - * on the extents of those regions being unaltered. Besides, this - * way there's no checking against rectangles that will be nuked - * due to coalescing, so we have to examine fewer rectangles. - */ - pixman_set_extents (new_reg); - GOOD (new_reg); - return TRUE; -} -#endif -/* In time O(log n), locate the first box whose y2 is greater than y. - * Return @end if no such box exists. - */ -static box_type_t *find_box_for_y(box_type_t *begin, box_type_t *end, int y) -{ - box_type_t *mid; - - if (end == begin) return end; - - if (end - begin == 1) { - if (begin->y2 > y) - return begin; - else - return end; - } - - mid = begin + (end - begin) / 2; - if (mid->y2 > y) { - /* If no box is found in [begin, mid], the function - * will return @mid, which is then known to be the - * correct answer. - */ - return find_box_for_y(begin, mid, y); - } else { - return find_box_for_y(mid, end, y); - } -} - -/* - * rect_in(region, rect) - * This routine takes a pointer to a region and a pointer to a box - * and determines if the box is outside/inside/partly inside the region. - * - * The idea is to travel through the list of rectangles trying to cover the - * passed box with them. Anytime a piece of the rectangle isn't covered - * by a band of rectangles, part_out is set TRUE. Any time a rectangle in - * the region covers part of the box, part_in is set TRUE. The process ends - * when either the box has been completely covered (we reached a band that - * doesn't overlap the box, part_in is TRUE and part_out is false), the - * box has been partially covered (part_in == part_out == TRUE -- because of - * the banding, the first time this is true we know the box is only - * partially in the region) or is outside the region (we reached a band - * that doesn't overlap the box at all and part_in is false) - */ -PIXMAN_EXPORT pixman_region_overlap_t - PREFIX(_contains_rectangle)(region_type_t *region, box_type_t *prect) -{ - box_type_t *pbox; - box_type_t *pbox_end; - int part_in, part_out; - int numRects; - int x, y; - - GOOD(region); - - numRects = PIXREGION_NUMRECTS(region); - - /* useful optimization */ - if (!numRects || !EXTENTCHECK(®ion->extents, prect)) - return (PIXMAN_REGION_OUT); - - if (numRects == 1) { - /* We know that it must be PIXMAN_REGION_IN or PIXMAN_REGION_PART */ - if (SUBSUMES(®ion->extents, prect)) - return (PIXMAN_REGION_IN); - else - return (PIXMAN_REGION_PART); - } - - part_out = FALSE; - part_in = FALSE; - - /* (x,y) starts at upper left of rect, moving to the right and down */ - x = prect->x1; - y = prect->y1; - - /* can stop when both part_out and part_in are TRUE, or we reach prect->y2 - */ - for (pbox = PIXREGION_BOXPTR(region), pbox_end = pbox + numRects; - pbox != pbox_end; pbox++) { - /* getting up to speed or skipping remainder of band */ - if (pbox->y2 <= y) { - if ((pbox = find_box_for_y(pbox, pbox_end, y)) == pbox_end) break; - } - - if (pbox->y1 > y) { - part_out = TRUE; /* missed part of rectangle above */ - if (part_in || (pbox->y1 >= prect->y2)) break; - y = pbox->y1; /* x guaranteed to be == prect->x1 */ - } - - if (pbox->x2 <= x) continue; /* not far enough over yet */ - - if (pbox->x1 > x) { - part_out = TRUE; /* missed part of rectangle to left */ - if (part_in) break; - } - - if (pbox->x1 < prect->x2) { - part_in = TRUE; /* definitely overlap */ - if (part_out) break; - } - - if (pbox->x2 >= prect->x2) { - y = pbox->y2; /* finished with this band */ - if (y >= prect->y2) break; - x = prect->x1; /* reset x out to left again */ - } else { - /* - * Because boxes in a band are maximal width, if the first box - * to overlap the rectangle doesn't completely cover it in that - * band, the rectangle must be partially out, since some of it - * will be uncovered in that band. part_in will have been set true - * by now... - */ - part_out = TRUE; - break; - } - } - - if (part_in) { - if (y < prect->y2) - return PIXMAN_REGION_PART; - else - return PIXMAN_REGION_IN; - } else { - return PIXMAN_REGION_OUT; - } -} - -/* PREFIX(_translate) (region, x, y) - * translates in place - */ - -PIXMAN_EXPORT void PREFIX(_translate)(region_type_t *region, int x, int y) -{ - overflow_int_t x1, x2, y1, y2; - int nbox; - box_type_t * pbox; - - GOOD(region); - region->extents.x1 = x1 = region->extents.x1 + x; - region->extents.y1 = y1 = region->extents.y1 + y; - region->extents.x2 = x2 = region->extents.x2 + x; - region->extents.y2 = y2 = region->extents.y2 + y; - - if (((x1 - PIXMAN_REGION_MIN) | (y1 - PIXMAN_REGION_MIN) | - (PIXMAN_REGION_MAX - x2) | (PIXMAN_REGION_MAX - y2)) >= 0) { - if (region->data && (nbox = region->data->numRects)) { - for (pbox = PIXREGION_BOXPTR(region); nbox--; pbox++) { - pbox->x1 += x; - pbox->y1 += y; - pbox->x2 += x; - pbox->y2 += y; - } - } - return; - } - - if (((x2 - PIXMAN_REGION_MIN) | (y2 - PIXMAN_REGION_MIN) | - (PIXMAN_REGION_MAX - x1) | (PIXMAN_REGION_MAX - y1)) <= 0) { - region->extents.x2 = region->extents.x1; - region->extents.y2 = region->extents.y1; - FREE_DATA(region); - region->data = pixman_region_empty_data; - return; - } - - if (x1 < PIXMAN_REGION_MIN) - region->extents.x1 = PIXMAN_REGION_MIN; - else if (x2 > PIXMAN_REGION_MAX) - region->extents.x2 = PIXMAN_REGION_MAX; - - if (y1 < PIXMAN_REGION_MIN) - region->extents.y1 = PIXMAN_REGION_MIN; - else if (y2 > PIXMAN_REGION_MAX) - region->extents.y2 = PIXMAN_REGION_MAX; - - if (region->data && (nbox = region->data->numRects)) { - box_type_t *pbox_out; - - for (pbox_out = pbox = PIXREGION_BOXPTR(region); nbox--; pbox++) { - pbox_out->x1 = x1 = pbox->x1 + x; - pbox_out->y1 = y1 = pbox->y1 + y; - pbox_out->x2 = x2 = pbox->x2 + x; - pbox_out->y2 = y2 = pbox->y2 + y; - - if (((x2 - PIXMAN_REGION_MIN) | (y2 - PIXMAN_REGION_MIN) | - (PIXMAN_REGION_MAX - x1) | (PIXMAN_REGION_MAX - y1)) <= 0) { - region->data->numRects--; - continue; - } - - if (x1 < PIXMAN_REGION_MIN) - pbox_out->x1 = PIXMAN_REGION_MIN; - else if (x2 > PIXMAN_REGION_MAX) - pbox_out->x2 = PIXMAN_REGION_MAX; - - if (y1 < PIXMAN_REGION_MIN) - pbox_out->y1 = PIXMAN_REGION_MIN; - else if (y2 > PIXMAN_REGION_MAX) - pbox_out->y2 = PIXMAN_REGION_MAX; - - pbox_out++; - } - - if (pbox_out != pbox) { - if (region->data->numRects == 1) { - region->extents = *PIXREGION_BOXPTR(region); - FREE_DATA(region); - region->data = (region_data_type_t *)NULL; - } else { - pixman_set_extents(region); - } - } - } - - GOOD(region); -} - -PIXMAN_EXPORT int PREFIX(_not_empty)(region_type_t *region) -{ - GOOD(region); - - return (!PIXREGION_NIL(region)); -} - -PIXMAN_EXPORT box_type_t *PREFIX(_extents)(region_type_t *region) -{ - GOOD(region); - - return (®ion->extents); -} - -typedef region_type_t VRegionPrivate; - -#include "vregion.h" - -V_BEGIN_NAMESPACE - -static VRegionPrivate regionPrivate = {{0, 0, 0, 0}, NULL}; - -struct VRegionData { - VRegionData() : ref(-1), rgn(®ionPrivate) {} - RefCount ref; - VRegionPrivate *rgn; -}; - -const VRegionData shared_empty; - -inline VRect box_to_rect(box_type_t *box) -{ - return {box->x1, box->y1, box->x2 - box->x1, box->y2 - box->y1}; -} - -void VRegion::cleanUp(VRegionData *x) -{ - if (x->rgn) { - PREFIX(_fini)(x->rgn); - delete x->rgn; - } - delete x; -} - -void VRegion::detach() -{ - if (d->ref.isShared()) *this = copy(); -} - -VRegion VRegion::copy() const -{ - VRegion r; - - r.d = new VRegionData; - r.d->rgn = new VRegionPrivate; - r.d->ref.setOwned(); - PREFIX(_init)(r.d->rgn); - if (d != &shared_empty) PREFIX(_copy)(r.d->rgn, d->rgn); - return r; -} - -VRegion::VRegion() : d(const_cast(&shared_empty)) {} - -VRegion::VRegion(int x, int y, int w, int h) -{ - VRegion tmp(VRect(x, y, w, h)); - tmp.d->ref.ref(); - d = tmp.d; -} - -VRegion::VRegion(const VRect &r) -{ - if (r.empty()) { - d = const_cast(&shared_empty); - } else { - d = new VRegionData; - d->rgn = new VRegionPrivate; - d->ref.setOwned(); - PREFIX(_init_rect)(d->rgn, r.left(), r.top(), r.width(), r.height()); - } -} - -VRegion::VRegion(const VRegion &r) -{ - d = r.d; - d->ref.ref(); -} - -VRegion::VRegion(VRegion &&other) : d(other.d) -{ - other.d = const_cast(&shared_empty); -} - -VRegion &VRegion::operator=(const VRegion &r) -{ - r.d->ref.ref(); - if (!d->ref.deref()) cleanUp(d); - - d = r.d; - return *this; -} - -inline VRegion &VRegion::operator=(VRegion &&other) -{ - if (!d->ref.deref()) cleanUp(d); - d = other.d; - other.d = const_cast(&shared_empty); - return *this; -} - -VRegion::~VRegion() -{ - if (!d->ref.deref()) cleanUp(d); -} - -bool VRegion::empty() const -{ - return d == &shared_empty || !PREFIX(_not_empty)(d->rgn); -} - -void VRegion::translate(const VPoint &p) -{ - if (p == VPoint() || empty()) return; - - detach(); - PREFIX(_translate)(d->rgn, p.x(), p.y()); -} - -VRegion VRegion::translated(const VPoint &p) const -{ - VRegion ret(*this); - ret.translate(p); - return ret; -} - -/* - * Returns \c true if this region is guaranteed to be fully contained in r. - */ -bool VRegion::within(const VRect &r1) const -{ - box_type_t *r2 = PREFIX(_extents)(d->rgn); - - return r2->x1 >= r1.left() && r2->x2 <= r1.right() && r2->y1 >= r1.top() && - r2->y2 <= r1.bottom(); -} - -bool VRegion::contains(const VRect &r) const -{ - box_type_t box = {r.left(), r.top(), r.right(), r.bottom()}; - - pixman_region_overlap_t res = PREFIX(_contains_rectangle)(d->rgn, &box); - if (res == PIXMAN_REGION_IN) return true; - return false; -} - -VRegion VRegion::united(const VRect &r) const -{ - if (empty()) return r; - - if (contains(r)) { - return *this; - } else if (within(r)) { - return r; - } else { - VRegion result; - result.detach(); - PREFIX(_union_rect) - (result.d->rgn, d->rgn, r.left(), r.top(), r.width(), r.height()); - return result; - } -} - -VRegion VRegion::united(const VRegion &r) const -{ - if (empty()) return r; - if (r.empty()) return *this; - if (d == r.d || PREFIX(_equal)(d->rgn, r.d->rgn)) return *this; - VRegion result; - result.detach(); - PREFIX(_union)(result.d->rgn, d->rgn, r.d->rgn); - return result; -} - -VRegion VRegion::intersected(const VRect &r) const -{ - if (empty() || r.empty()) return VRegion(); - - /* this is fully contained in r */ - if (within(r)) return *this; - - /* r is fully contained in this */ - if (contains(r)) return r; - - VRegion result; - result.detach(); - PREFIX(_intersect_rect) - (result.d->rgn, d->rgn, r.left(), r.top(), r.width(), r.height()); - return result; -} - -VRegion VRegion::intersected(const VRegion &r) const -{ - if (empty() || r.empty()) return VRegion(); - - VRegion result; - result.detach(); - PREFIX(_intersect)(result.d->rgn, d->rgn, r.d->rgn); - - return result; -} - -VRegion VRegion::subtracted(const VRegion &r) const -{ - if (empty() || r.empty()) return *this; - if (d == r.d || PREFIX(_equal)(d->rgn, r.d->rgn)) return VRegion(); - - VRegion result; - result.detach(); - PREFIX(_subtract)(result.d->rgn, d->rgn, r.d->rgn); - return result; -} - -int VRegion::rectCount() const -{ - if (empty()) return 0; - return PREFIX(_n_rects)(d->rgn); -} - -VRect VRegion::rectAt(int index) const -{ - VRegionPrivate *reg = d->rgn; - if (!reg) return {}; - - box_type_t *box = PIXREGION_RECTS(reg) + index; - - return box_to_rect(box); -} - -VRegion VRegion::operator+(const VRect &r) const -{ - return united(r); -} - -VRegion VRegion::operator+(const VRegion &r) const -{ - return united(r); -} - -VRegion VRegion::operator-(const VRegion &r) const -{ - return subtracted(r); -} - -VRegion &VRegion::operator+=(const VRect &r) -{ - if (empty()) return *this = r; - if (r.empty()) return *this; - - if (contains(r)) { - return *this; - } else if (within(r)) { - return *this = r; - } else { - detach(); - PREFIX(_union_rect) - (d->rgn, d->rgn, r.left(), r.top(), r.width(), r.height()); - return *this; - } -} - -VRegion &VRegion::operator+=(const VRegion &r) -{ - if (empty()) return *this = r; - if (r.empty()) return *this; - if (d == r.d || PREFIX(_equal)(d->rgn, r.d->rgn)) return *this; - - detach(); - PREFIX(_union)(d->rgn, d->rgn, r.d->rgn); - return *this; -} - -VRegion &VRegion::operator-=(const VRegion &r) -{ - return *this = *this - r; -} - -bool VRegion::operator==(const VRegion &r) const -{ - if (empty()) return r.empty(); - if (r.empty()) return empty(); - - if (d == r.d) - return true; - else - return PREFIX(_equal)(d->rgn, r.d->rgn); -} - -VRect VRegion::boundingRect() const noexcept -{ - if (empty()) return {}; - return box_to_rect(&d->rgn->extents); -} - -inline bool rect_intersects(const VRect &r1, const VRect &r2) -{ - return (r1.right() >= r2.left() && r1.left() <= r2.right() && - r1.bottom() >= r2.top() && r1.top() <= r2.bottom()); -} - -bool VRegion::intersects(const VRegion &r) const -{ - if (empty() || r.empty()) return false; - - return PREFIX(_intersects)(d->rgn, r.d->rgn); -} - -VDebug &operator<<(VDebug &os, const VRegion &o) -{ - os << "[REGION: " - << "[bbox = " << o.boundingRect() << "]"; - os << "[rectCount = " << o.rectCount() << "]"; - os << "[rects = "; - for (int i = 0; i < o.rectCount(); i++) { - os << o.rectAt(i); - } - os << "]" - << "]"; - return os; -} - -V_END_NAMESPACE diff --git a/src/vector/vregion.h b/src/vector/vregion.h deleted file mode 100644 index 508b6c3..0000000 --- a/src/vector/vregion.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All rights reserved. - * - * Licensed under the Flora License, Version 1.1 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://floralicense.org/license/ - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef VREGION_H -#define VREGION_H -#include -#include -#include -#include -#include "vdebug.h" - -V_BEGIN_NAMESPACE - -struct VRegionData; - -class VRegion { -public: - VRegion(); - VRegion(int x, int y, int w, int h); - VRegion(const VRect &r); - VRegion(const VRegion ®ion); - VRegion(VRegion &&other); - ~VRegion(); - VRegion & operator=(const VRegion &); - VRegion & operator=(VRegion &&); - bool empty() const; - bool contains(const VRect &r) const; - VRegion united(const VRect &r) const; - VRegion united(const VRegion &r) const; - VRegion intersected(const VRect &r) const; - VRegion intersected(const VRegion &r) const; - VRegion subtracted(const VRegion &r) const; - void translate(const VPoint &p); - inline void translate(int dx, int dy); - VRegion translated(const VPoint &p) const; - inline VRegion translated(int dx, int dy) const; - int rectCount() const; - VRect rectAt(int index) const; - - VRegion operator+(const VRect &r) const; - VRegion operator+(const VRegion &r) const; - VRegion operator-(const VRegion &r) const; - VRegion &operator+=(const VRect &r); - VRegion &operator+=(const VRegion &r); - VRegion &operator-=(const VRegion &r); - - VRect boundingRect() const noexcept; - bool intersects(const VRegion ®ion) const; - - bool operator==(const VRegion &r) const; - inline bool operator!=(const VRegion &r) const { return !(operator==(r)); } - friend VDebug &operator<<(VDebug &os, const VRegion &o); - -private: - bool within(const VRect &r) const; - VRegion copy() const; - void detach(); - void cleanUp(VRegionData *x); - - struct VRegionData *d; -}; -inline void VRegion::translate(int dx, int dy) -{ - translate(VPoint(dx, dy)); -} - -inline VRegion VRegion::translated(int dx, int dy) const -{ - return translated(VPoint(dx, dy)); -} - -V_END_NAMESPACE - -#endif // VREGION_H