summaryrefslogtreecommitdiff
path: root/firmware/bitswap.S
blob: 08e609b9e530c1cadb900aa20cac99efedff9fbc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
/***************************************************************************
 *             __________               __   ___.
 *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
 *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
 *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
 *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
 *                     \/            \/     \/    \/            \/
 * $Id$
 *
 * Copyright (C) 2004 by Jens Arnold
 *
 * All files in this archive are subject to the GNU General Public License.
 * See the file COPYING in the source tree root for full license agreement.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 ****************************************************************************/

    .section    .icode,"ax",@progbits
    .align      2
    .global     _bitswap
    .type       _bitswap,@function

/* Flips the bits of all bytes in a memory area (required for mp3 data on
 * the Archos). This version is optimized for speed and size.
 *
 * arguments:
 *  r4 - start address
 *  r5 - length
 *
 * return value: void
 *
 * register usage:
 *  r0 - temporary
 *  r1 - bit mask for rounding to long bound / low byte (after swap)
 *  r2 - high byte (after swap) / combined result
 *  r4 - data address - 4
 *  r5 - end address - 4
 *  r7 - flip table (addressing with signed offset)
 *
 * The instruction order below is devised in a way to utilize the pipelining
 * of the SH1 to the max.
 */

_bitswap:
    mova    _fliptable,r0
    mov     r0,r7
    add     #-4,r4          /* address is shifted by 4 */
    add     r4,r5           /* r5 = end_address - 4 */
    cmp/hi  r4,r5           /* at least something to do? */
    bf      .exit           /* no, get out of here! */

    add     #-3,r5          /* end offset for flipping 4 bytes per pass */
    mov     r4,r0
    tst     #1,r0           /* even address? */
    bt      .start2_w       /* yes, jump into main loop */

    /* no, flip first byte */
    mov.b   @(4,r4),r0      /* load byte, sign extension! */
    add     #1,r4           /* early increment */
    mov.b   @(r0,r7),r0     /* fliptable offset is signed */
    bra     .start2_w       /* jump into main loop */
    mov.b   r0,@(3,r4)      /* store byte */

    /* main loop: flips 2 words per pass */
    .align  2
.loop2_w:
    mov.w   @(6,r4),r0      /* load second word */
    add     #4,r4           /* early increment */
    swap.b  r0,r2           /* get high byte (2nd word) */
    exts.b  r0,r0           /* prepare low byte (2nd word) */
    mov.b   @(r0,r7),r1     /* swap low byte (2nd word) */
    exts.b  r2,r0           /* prepare high byte (2nd word) */
    mov.b   @(r0,r7),r2     /* swap high byte (2nd word) */
    extu.b  r1,r0           /* zero extend low byte (2nd word) */
    mov.w   @r4,r1          /* load first word */
    shll8   r2              /* shift high byte (2nd word), low byte zeroed */
    or      r2,r0           /* put low byte (2nd word) in result */
    swap.b  r1,r2           /* get high byte (1st word) */
    mov.w   r0,@(2,r4)      /* store result (2nd word) */
    exts.b  r1,r0           /* prepare low byte (1st word) */
    mov.b   @(r0,r7),r1     /* swap low byte (1st word) */
    exts.b  r2,r0           /* prepare high byte (1st word) */
    mov.b   @(r0,r7),r2     /* swap high byte (1st word) */
    extu.b  r1,r0           /* zero extend low byte (1st word) */
    shll8   r2              /* shift high byte (1st word), low byte zeroed */
    or      r2,r0           /* put low byte (1st word) in result */
    mov.w   r0,@r4          /* store result (1st word) */
.start2_w:
    cmp/hi  r4,r5           /* runs r4 up to last long bound */
    bt      .loop2_w

    bra     .start_b2       /* jump into trailing byte loop */
    add     #3,r5           /* reset end offset */

    /* trailing byte loop: flips 0..3 bytes */
.loop_b2:
    mov.b   @(4,r4),r0      /* loand byte, sign extension! */
    add     #1,r4           /* early increment */
    mov.b   @(r0,r7),r0     /* fliptable offset is signed */
    mov.b   r0,@(3,r4)      /* store byte */
.start_b2:
    cmp/hi  r4,r5           /* runs r4 up to end address */
    bt      .loop_b2

.exit:
    rts
    nop

    .align  2
    .global _fliptable

    .byte   0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1
    .byte   0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1
    .byte   0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9
    .byte   0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9
    .byte   0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5
    .byte   0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5
    .byte   0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed
    .byte   0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd
    .byte   0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3
    .byte   0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3
    .byte   0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb
    .byte   0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb
    .byte   0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7
    .byte   0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7
    .byte   0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef
    .byte   0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff
_fliptable:
    .byte   0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0
    .byte   0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0
    .byte   0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8
    .byte   0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8
    .byte   0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4
    .byte   0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4
    .byte   0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec
    .byte   0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc
    .byte   0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2
    .byte   0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2
    .byte   0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea
    .byte   0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa
    .byte   0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6
    .byte   0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6
    .byte   0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee
    .byte   0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe


.end:
    .size   _bitswap,.end-_bitswap