summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Mahone <andrew.mahone@gmail.com>2009-12-31 08:32:15 +0000
committerAndrew Mahone <andrew.mahone@gmail.com>2009-12-31 08:32:15 +0000
commit822abc12360900030323560b92a440f425b5641a (patch)
tree037ba9d25b25a1ca842ef66ddbfe2ce9470a7c0d
parentbecdbaa12d58850efa65da9a3f623795aed8acfb (diff)
Add 31/31-bit unsigned division in apps/codecs/lib/udiv_arm.S, with 2 cycles / iteration, falling back to previous 32-bit, 3 cycle / iteration code when needed (well under 1% of divisions in sample file). APE normal sample is now 96.90% realtime, approx 1.3% improved vs svn. TODO: unify divisor normalization for both trial subtraction routines, possibly use divisor bits to select 31- vs 32-bit division.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24130 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/lib/udiv32_armv4.S54
1 files changed, 52 insertions, 2 deletions
diff --git a/apps/codecs/lib/udiv32_armv4.S b/apps/codecs/lib/udiv32_armv4.S
index 6b34cae1b3..6921c7fbd1 100644
--- a/apps/codecs/lib/udiv32_armv4.S
+++ b/apps/codecs/lib/udiv32_armv4.S
@@ -8,6 +8,7 @@
* $Id$
*
* Copyright (C) 2008 by Jens Arnold
+ * Copyright (C) 2009 by Andrew Mahone
*
* Optimised unsigned integer division for ARMv4
*
@@ -30,7 +31,48 @@
/* Codecs should not normally do this, but we need to check a macro, and
* codecs.h would confuse the assembler. */
-.macro ARM_DIV_BODY dividend, divisor, result, curbit
+/* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2)
+ for dividing a 30-bit value by a 15-bit value, with two operations per
+ iteration by storing quotient and remainder together and adding the previous
+ quotient bit during trial subtraction. Modified to work with any dividend
+ and divisor both less than 1 << 30, and skipping trials by calculating bits
+ in output.
+*/
+.macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient
+
+ mov \bits, #1
+ cmp \divisor, \dividend, lsr #16
+ movls \divisor, \divisor, lsl #16
+ addls \bits, \bits, #16
+ cmp \divisor, \dividend, lsr #8
+ movls \divisor, \divisor, lsl #8
+ addls \bits, \bits, #8
+ cmp \divisor, \dividend, lsr #4
+ movls \divisor, \divisor, lsl #4
+ addls \bits, \bits, #4
+ cmp \divisor, \dividend, lsr #2
+ movls \divisor, \divisor, lsl #2
+ addls \bits, \bits, #2
+ cmp \divisor, \dividend, lsr #1
+ movls \divisor, \divisor, lsl #1
+ addls \bits, \bits, #1
+ rsb \divisor, \divisor, #0
+ adds \result, \dividend, \divisor
+ subcc \result, \result, \divisor
+ rsb \curbit, \bits, #31
+ add pc, pc, \curbit, lsl #3
+ nop
+ .rept 30
+ adcs \result, \divisor, \result, lsl #1
+ subcc \result, \result, \divisor
+ .endr
+ /* shift remainder/quotient left one, add final quotient bit */
+ adc \result, \result, \result
+ mov \dividend, \result, lsr \bits
+ eor \quotient, \result, \dividend, lsl \bits
+.endm
+
+.macro ARM_DIV_32_BODY dividend, divisor, result, curbit
mov \result, \dividend
mov \curbit, #90 @ 3 * 30, (calculating branch dest)
@@ -93,8 +135,16 @@ udiv32_arm:
bls 10f
tst r1, r2
beq 30f
+ tst r0, r0
+ /* High bit must be unset, otherwise use ARM_DIV_32_BODY. High bit of
+ divisor is also unset dividend has been tested to be >= divisor.
+ */
+ bmi 5f
+ ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0
+ bx lr
- ARM_DIV_BODY r0, r1, r2, r3
+5:
+ ARM_DIV_32_BODY r0, r1, r2, r3
mov r0, r2
bx lr