summaryrefslogtreecommitdiff
path: root/firmware/asm/mips/memcpy.S
blob: ec1625bb4f64b6475782919d638a3238d85b59b4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
/***************************************************************************
 *             __________               __   ___.
 *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
 *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
 *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
 *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
 *                     \/            \/     \/    \/            \/
 * $Id$
 *
 * Copyright (C) 2002, 2003 Free Software Foundation, Inc.
 * This file was originally part of the GNU C Library
 * Contributed to glibc by Hartvig Ekner <hartvige@mips.com>, 2002
 * Adapted for Rockbox by Maurus Cuelenaere, 2009
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 ****************************************************************************/

#include "config.h"
#include "mips.h"

/* void *memcpy(void *s1, const void *s2, size_t n);  */

#ifdef ROCKBOX_BIG_ENDIAN
# define LWHI  lwl                /* high part is left in big-endian        */
# define SWHI  swl                /* high part is left in big-endian        */
# define LWLO  lwr                /* low part is right in big-endian        */
# define SWLO  swr                /* low part is right in big-endian        */
#else
# define LWHI  lwr                /* high part is right in little-endian    */
# define SWHI  swr                /* high part is right in little-endian    */
# define LWLO  lwl                /* low part is left in little-endian      */
# define SWLO  swl                /* low part is left in little-endian      */
#endif

    .section   .icode, "ax", %progbits

    .global    memcpy
    .type      memcpy, %function
    .global    mempcpy
    .type      mempcpy, %function
    
    .set       noreorder
mempcpy:
    slti       t0, a2, 8                # Less than 8?
    bne        t0, zero, last8
     addu      v0, a0, a2               # exit value = s1 + n
    b          1f
     xor       t0, a1, a0               # Find a0/a1 displacement (fill delay)

memcpy:
    slti       t0, a2, 8                # Less than 8?
    bne        t0, zero, last8
    move       v0, a0                   # Setup exit value before too late

    xor        t0, a1, a0               # Find a0/a1 displacement

1:  andi       t0, 0x3
    bne        t0, zero, shift          # Go handle the unaligned case
    subu       t1, zero, a1
    andi       t1, 0x3                  # a0/a1 are aligned, but are we
    beq        t1, zero, chk8w          # starting in the middle of a word?
    subu       a2, t1
    LWHI       t0, 0(a1)                # Yes we are... take care of that
    addu       a1, t1
    SWHI       t0, 0(a0)
    addu       a0, t1

chk8w:
    andi       t0, a2, 0x1f             # 32 or more bytes left?
    beq        t0, a2, chk1w
    subu       a3, a2, t0               # Yes
    addu       a3, a1                   # a3 = end address of loop
    move       a2, t0                   # a2 = what will be left after loop
lop8w:
    lw         t0,  0(a1)               # Loop taking 8 words at a time
    lw         t1,  4(a1)
    lw         t2,  8(a1)
    lw         t3, 12(a1)
    lw         t4, 16(a1)
    lw         t5, 20(a1)
    lw         t6, 24(a1)
    lw         t7, 28(a1)
    addiu      a0, 32
    addiu      a1, 32
    sw         t0, -32(a0)
    sw         t1, -28(a0)
    sw         t2, -24(a0)
    sw         t3, -20(a0)
    sw         t4, -16(a0)
    sw         t5, -12(a0)
    sw         t6, -8(a0)
    bne        a1, a3, lop8w
    sw         t7, -4(a0)

chk1w:
    andi       t0, a2, 0x3              # 4 or more bytes left?
    beq        t0, a2, last8
    subu       a3, a2, t0               # Yes, handle them one word at a time
    addu       a3, a1                   # a3 again end address
    move       a2, t0
lop1w:
    lw         t0, 0(a1)
    addiu      a0, 4
    addiu      a1, 4
    bne        a1, a3, lop1w
    sw         t0, -4(a0)

last8:
    blez       a2, lst8e                # Handle last 8 bytes, one at a time
    addu       a3, a2, a1
lst8l:
    lb         t0, 0(a1)
    addiu      a0, 1
    addiu      a1, 1
    bne        a1, a3, lst8l
    sb         t0, -1(a0)
lst8e:
    jr         ra                       # Bye, bye
    nop

shift:
    subu       a3, zero, a0             # Src and Dest unaligned
    andi       a3, 0x3                  #  (unoptimized case...)
    beq        a3, zero, shft1
    subu       a2, a3                   # a2 = bytes left
    LWHI       t0, 0(a1)                # Take care of first odd part
    LWLO       t0, 3(a1)
    addu       a1, a3
    SWHI       t0, 0(a0)
    addu       a0, a3
shft1:
    andi       t0, a2, 0x3
    subu       a3, a2, t0
    addu       a3, a1
shfth:
    LWHI       t1, 0(a1)                # Limp through, word by word
    LWLO       t1, 3(a1)
    addiu      a0, 4
    addiu      a1, 4
    bne        a1, a3, shfth
    sw         t1, -4(a0)
    b          last8                    # Handle anything which may be left
    move       a2, t0

    .set       reorder