/*************************************************************************** * __________ __ ___. * Open \______ \ ____ ____ | | _\_ |__ _______ ___ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ * \/ \/ \/ \/ \/ * $Id$ * * Copyright (C) 2008 by Jens Arnold * * Optimised unsigned integer division for ARMv4 * * Based on: libgcc routines for ARM cpu. * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 * Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * ****************************************************************************/ #include "config.h" /* Codecs should not normally do this, but we need to check a macro, and * codecs.h would confuse the assembler. */ .macro ARM_DIV_BODY dividend, divisor, result, curbit mov \result, \dividend mov \curbit, #90 @ 3 * 30, (calculating branch dest) cmp \divisor, \result, lsr #16 movls \result,\result, lsr #16 subls \curbit, \curbit, #48 cmp \divisor, \result, lsr #8 movls \result,\result, lsr #8 subls \curbit, \curbit, #24 cmp \divisor, \result, lsr #4 movls \result,\result, lsr #4 subls \curbit, \curbit, #12 cmp \divisor, \result, lsr #2 subls \curbit, \curbit, #6 @ Calculation is only done down to shift=2, because the shift=1 step @ would need 3 more cycles, but would only gain 1.5 cycles on average. mov \result, #0 add pc, pc, \curbit, lsl #2 nop .set shift, 32 .rept 31 .set shift, shift - 1 cmp \divisor, \dividend, lsr #shift orrls \result, \result, #(1 << shift) subls \dividend, \dividend, \divisor, lsl #shift .endr @ shift==0 in the .rept would cause a warning for lsr #0 cmp \divisor, \dividend orrls \result, \result, #1 @subls \dividend, \dividend, \divisor @ correct remainder not needed .endm .macro ARM_DIV2_ORDER divisor, order @ There's exactly one bit set in the divisor, so ffs() can be used @ This is the ffs algorithm devised by D.Seal and posted to @ comp.sys.arm on 16 Feb 1994. adr \order, L_ffs_table orr \divisor, \divisor, \divisor, lsl #4 @ = X * 0x11 orr \divisor, \divisor, \divisor, lsl #6 @ = X * 0x451 rsb \divisor, \divisor, \divisor, lsl #16 @ = X * 0x0450fbaf ldrb \order, [\order, \divisor, lsr #26] .endm #ifdef USE_IRAM .section .icode,"ax",%progbits #else .text #endif .align .global udiv32_arm .type udiv32_arm,%function udiv32_arm: subs r2, r1, #1 bxeq lr bcc 20f cmp r0, r1 bls 10f tst r1, r2 beq 30f ARM_DIV_BODY r0, r1, r2, r3 mov r0, r2 bx lr 10: moveq r0, #1 20: movne r0, #0 bx lr 30: ARM_DIV2_ORDER r1, r2 mov r0, r0, lsr r2 bx lr L_ffs_table: @ 0 1 2 3 4 5 6 7 @---------------------------------------------- .byte 32, 0, 1, 12, 2, 6, 0, 13 @ 0- 7 .byte 3, 0, 7, 0, 0, 0, 0, 14 @ 8-15 .byte 10, 4, 0, 0, 8, 0, 0, 25 @ 16-23 .byte 0, 0, 0, 0, 0, 21, 27, 15 @ 24-31 .byte 31, 11, 5, 0, 0, 0, 0, 0 @ 32-39 .byte 9, 0, 0, 24, 0, 0, 20, 26 @ 40-47 .byte 30, 0, 0, 0, 0, 23, 0, 19 @ 48-55 .byte 29, 0, 22, 18, 28, 17, 16, 0 @ 56-63