1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
|
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2007 by Andree Buschmann
*
* All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
/****************************************************************************
* void lcd_write_yuv_420_lines(unsigned char const * const src[3],
* int width,
* int stride);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Scaled, normalized, rounded and tweaked to yield RGB 565:
* |R| |74 0 101| |Y' - 16| >> 9
* |G| = |74 -24 -51| |Cb - 128| >> 8
* |B| |74 128 0| |Cr - 128| >> 9
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines
.type lcd_write_yuv420_lines, %function
lcd_write_yuv420_lines:
/* r0 = yuv_src */
/* r1 = width */
/* r2 = stride */
stmfd sp!, { r4-r12 } /* save non-scratch */
ldmia r0, { r10, r11, r12 } /* r10 = yuv_src[0] = Y'_p */
/* r11 = yuv_src[1] = Cb_p */
/* r12 = yuv_src[2] = Cr_p */
add r2, r10, r2 /* r2 = &ysrc[stride] */
add r3, r1, r1, asl #1 /* number of bytes for chroma buffer */
add r3, r3, #15 /* plus room for 3 additional words, */
bic r3, r3, #3 /* rounded up to multiples of 4 byte */
sub sp, sp, r3 /* and allocate on stack */
stmia sp, {r1, r2, r3} /* width, &ysrc[stride], stack_alloc */
mov r7, r1 /* r7 = loop count */
add r8, sp, #12 /* chroma buffer */
mov r9, #0x30000000 /* LCD data port */
/* 1st loop start */
10: /* loop start */
ldrb r0, [r11], #1 /* r0 = *usrc++ = *Cb_p++ */
ldrb r1, [r12], #1 /* r1 = *vsrc++ = *Cr_p++ */
sub r0, r0, #128 /* r0 = Cb-128 */
sub r1, r1, #128 /* r1 = Cr-128 */
add r3, r1, r1, asl #1 /* r3 = Cr*51 + Cb*24 */
add r3, r3, r3, asl #4
add r3, r3, r0, asl #3
add r3, r3, r0, asl #4
add r4, r1, r1, asl #2 /* r1 = Cr*101 */
add r4, r4, r1, asl #5
add r1, r4, r1, asl #6
add r1, r1, #256 /* r1 = rv = (r1 + 256) >> 9 */
mov r1, r1, asr #9
strb r1, [r8], #1 /* store r1 to chroma_buf */
rsb r3, r3, #128 /* r3 = guv = (-r3 + 128) >> 8 */
mov r3, r3, asr #8
strb r3, [r8], #1 /* store r3 to chroma_buf */
add r0, r0, #2 /* r0 = bu = (Cb*128 + 256) >> 9 */
mov r0, r0, asr #2
strb r0, [r8], #1 /* store r0 to chroma_buf */
/* 1st loop, first pixel */
ldrb r5, [r10], #1 /* r5 = *ysrc++ = *Y'_p++ */
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
add r2, r5, r5, asl #2
add r5, r2, r5, asl #5
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
add r2, r3, r5, asr #7 /* r2 = g = (Y >> 8) + guv */
add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
orr r5, r6, r4 /* check if clamping is needed... */
orr r5, r5, r2, asr #1 /* ...at all */
cmp r5, #31
bls 15f /* -> no clamp */
cmp r6, #31 /* clamp r */
mvnhi r6, r6, asr #31
andhi r6, r6, #31
cmp r2, #63 /* clamp g */
mvnhi r2, r2, asr #31
andhi r2, r2, #63
cmp r4, #31 /* clamp b */
mvnhi r4, r4, asr #31
andhi r4, r4, #31
15: /* no clamp */
orr r4, r4, r2, lsl #5 /* pixel = r<<11 | g<<5 | b */
orr r4, r4, r6, lsl #11
strh r4, [r9] /* write pixel */
/* 1st loop, second pixel */
ldrb r5, [r10], #1 /* r5 = *ysrc++ = *Y'_p++ */
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
add r2, r5, r5, asl #2
add r5, r2, r5, asl #5
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
add r2, r3, r5, asr #7 /* r2 = g = (Y >> 8) + guv */
add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
orr r5, r6, r4 /* check if clamping is needed... */
orr r5, r5, r2, asr #1 /* ...at all */
cmp r5, #31
bls 15f /* -> no clamp */
cmp r6, #31 /* clamp r */
mvnhi r6, r6, asr #31
andhi r6, r6, #31
cmp r2, #63 /* clamp g */
mvnhi r2, r2, asr #31
andhi r2, r2, #63
cmp r4, #31 /* clamp b */
mvnhi r4, r4, asr #31
andhi r4, r4, #31
15: /* no clamp */
orr r4, r4, r2, lsl #5 /* pixel = r<<11 | g<<5 | b */
orr r4, r4, r6, lsl #11
strh r4, [r9] /* write pixel */
subs r7, r7, #2 /* check for loop end */
bgt 10b /* back to beginning */
/* 1st loop end */
add r8, sp, #12 /* chroma buffer */
ldmia sp, { r7, r10 } /* r7 = loop count */
/* r10 = &ysrc[stride] */
/* 2nd loop start */
20: /* loop start */
/* restore r1, r3 and r0 from chroma buffer */
ldrsb r1, [r8], #1
ldrsb r3, [r8], #1
ldrsb r0, [r8], #1
/* 2nd loop, first pixel */
ldrb r5, [r10], #1 /* r5 = *ysrc++ = *Y'_p++ */
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
add r2, r5, r5, asl #2
add r5, r2, r5, asl #5
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
add r2, r3, r5, asr #7 /* r2 = g = (Y >> 8) + guv */
add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
orr r5, r6, r4 /* check if clamping is needed... */
orr r5, r5, r2, asr #1 /* ...at all */
cmp r5, #31
bls 15f /* -> no clamp */
cmp r6, #31 /* clamp r */
mvnhi r6, r6, asr #31
andhi r6, r6, #31
cmp r2, #63 /* clamp g */
mvnhi r2, r2, asr #31
andhi r2, r2, #63
cmp r4, #31 /* clamp b */
mvnhi r4, r4, asr #31
andhi r4, r4, #31
15: /* no clamp */
orr r4, r4, r2, lsl #5 /* pixel = r<<11 | g<<5 | b */
orr r4, r4, r6, lsl #11
strh r4, [r9] /* write pixel */
/* 2nd loop, second pixel */
ldrb r5, [r10], #1 /* r5 = *ysrc++ = *Y'_p++ */
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
add r2, r5, r5, asl #2
add r5, r2, r5, asl #5
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
add r2, r3, r5, asr #7 /* r2 = g = (Y >> 8) + guv */
add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
orr r5, r6, r4 /* check if clamping is needed... */
orr r5, r5, r2, asr #1 /* ...at all */
cmp r5, #31
bls 15f /* -> no clamp */
cmp r6, #31 /* clamp r */
mvnhi r6, r6, asr #31
andhi r6, r6, #31
cmp r2, #63 /* clamp g */
mvnhi r2, r2, asr #31
andhi r2, r2, #63
cmp r4, #31 /* clamp b */
mvnhi r4, r4, asr #31
andhi r4, r4, #31
15: /* no clamp */
orr r4, r4, r2, lsl #5 /* pixel = r<<11 | g<<5 | b */
orr r4, r4, r6, lsl #11
strh r4, [r9] /* write pixel */
subs r7, r7, #2 /* check for loop end */
bgt 20b /* back to beginning */
/* 2nd loop end */
ldr r3, [sp, #8]
add sp, sp, r3 /* deallocate buffer */
ldmfd sp!, { r4-r12 } /* restore registers */
bx lr
.ltorg
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
|