1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
|
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2008 by Jens Arnold
*
* based on arm_nrv2e_d8.S -- ARM decompressor for NRV2E
* Copyright (C) 1996-2008 Markus Franz Xaver Johannes Oberhumer
* Copyright (C) 1996-2008 Laszlo Molnar
* Copyright (C) 2000-2008 John F. Reiser
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#define src r4
#define dst r5
#define len r6 /* overlaps 'cnt' */
#define cnt r6 /* overlaps 'len' while reading an offset */
#define tmp r7
#define off r0 /* must be r0 because of indexed addressing */
#define bits r1
#define bitmask r2
#define wrnk r3 /* -0x500 -M2_MAX_OFFSET before "wrinkle" */
#define GETBIT \
tst bits, bitmask; \
bf 1f; \
bsr get1_n2e; \
1: \
shll bits /* using the delay slot on purpose */
#define getnextb(reg) GETBIT; rotcl reg
#define jnextb0 GETBIT; bf
#define jnextb1 GETBIT; bt
.section .icode,"ax",@progbits
.align 2
.global _ucl_nrv2e_decompress_8
.type _ucl_nrv2e_decompress_8,@function
/* src_len = ucl_nrv2e_decompress_8(const unsigned char *src,
* unsigned char *dst,
* unsigned long *dst_len)
*/
_ucl_nrv2e_decompress_8:
sts.l pr, @-r15
mov #-1, off ! off = -1 initial condition
mov.l r6, @-r15
mov #-5, wrnk
mov.l r5, @-r15
shll8 wrnk ! nrv2e -M2_MAX_OFFSET
mov.l r4, @-r15
mov #-1, bitmask
shlr bitmask ! 0x7fffffff for testing before shifting
bra top_n2e
not bitmask, bits ! refill next time (MSB must be set)
eof_n2e:
mov.l @r15+, r0 ! r0 = orig_src
mov.l @r15+, r1 ! r1 = orig_dst
sub r0, src
mov.l @r15+, r2 ! r2 = plen_dst
sub r1, dst
mov.l dst, @r2
lds.l @r15+, pr
rts
mov src, r0
lit_n2e:
mov.b @src, tmp
add #1, src ! Need to fill the pipeline latency anyway
mov.b tmp, @dst
add #1, dst
top_n2e:
jnextb1 lit_n2e
bra getoff_n2e
mov #1, cnt
off_n2e:
add #-1, cnt
getnextb(cnt)
getoff_n2e:
getnextb(cnt)
jnextb0 off_n2e
mov #-4, tmp ! T=1 on entry, so this does
addc cnt, tmp ! tmp = cnt - 3, T = (cnt >= 3)
mov #0, len ! cnt and len share a reg!
bf offprev_n2e ! cnt was 2
mov.b @src+, off ! low 7+1 bits
shll8 tmp
extu.b off, off
or tmp, off
not off, off ! off = ~off
tst off, off
bt eof_n2e
shar off
bt lenlast_n2e
bf lenmore_n2e ! always taken if the preceding bt isn't
offprev_n2e:
jnextb1 lenlast_n2e
lenmore_n2e:
mov #1, len
jnextb1 lenlast_n2e
len_n2e:
getnextb(len)
jnextb0 len_n2e
bra gotlen_n2e
mov #6-2, tmp
get1_n2e: ! in: T bit set
mov.b @src+, bits ! SH1 sign-extends on load
rotcl bits ! LSB = T, T = MSB
shll16 bits
rts
shll8 bits
lenlast_n2e:
getnextb(len) ! 0,1,2,3
mov #2, tmp
gotlen_n2e:
cmp/gt off, wrnk ! too far away, so minimum match length is 3
addc tmp, len
copy_n2e:
add #-1, len
mov.b @(off,dst), tmp
tst len, len
mov.b tmp, @dst
add #1, dst
bf copy_n2e
bt top_n2e ! always taken if the preceding bf isn't
.size _ucl_nrv2e_decompress_8, .-_ucl_nrv2e_decompress_8
|