3 * Copyright 2013 Samsung Electronics S.LSI Co. LTD
5 * Licensed under the Apache License, Version 2.0 (the "License")
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
19 * @file csc_BGRA8888_to_RGBA8888.s
20 * @brief color format converter
21 * @author Hyungdeok Lee (hd0408.lee@samsung.com)
28 * Source BGRA8888 copy to Dest RGBA8888.
29 * Use neon interleaved load instruction, easly swap R ch to B ch.
41 * bpp only concerned about 4
46 .global csc_BGRA8888_RGBA8888_NEON
47 .type csc_BGRA8888_RGBA8888_NEON, %function
48 csc_BGRA8888_RGBA8888_NEON:
66 stmfd sp!, {r4-r12,r14} @ backup registers
70 mov r9, r2, lsr #5 @ r9 = r2 >> 5 (32)
71 and r14, r9, #3 @ r14 = r9 & 3
72 mov r12, r2, lsr #7 @ r12 = r2 >> 7 (128)
77 @ Process d0 to d3 at once. 4 times same operation. := 8 byte * 4 * 4 = 128 byte loop.
79 @pld [r11] @ cache line fill. use this for r11 region set by cachable.
80 vld4.8 {d0, d1, d2, d3}, [r11]!
82 vst4.8 {d0, d1, d2, d3}, [r10]!
84 vld4.8 {d0, d1, d2, d3}, [r11]!
86 vst4.8 {d0, d1, d2, d3}, [r10]!
88 vld4.8 {d0, d1, d2, d3}, [r11]!
90 vst4.8 {d0, d1, d2, d3}, [r10]!
92 vld4.8 {d0, d1, d2, d3}, [r11]!
94 vst4.8 {d0, d1, d2, d3}, [r10]!
104 vld4.8 {d0, d1, d2, d3}, [r11]!
106 vst4.8 {d0, d1, d2, d3}, [r10]!
111 ldmfd sp!, {r4-r12,r15} @ restore registers