gbadev.org forum archive

Here's an implementation of the cordic rotator atan2 we've been discussing in this thread.

It would be great if some of the experts here could take a look at it and suggest any fixes or improvements they see.

Code:

/*
sint32 atan2asm(sint32 y, sint32 x);

atan2 using cordic rotator
implemented by eli curtz, eli@nuprometheus.com
based on information from http://www.andraka.com/cordic.htm
and forum comments at http://www.gbadev.org from users
AnthC, DiscoStew, and ecurtz (me)

entry
r0: y
r1: x

exit
r0: angle (-256...256) change shift to adjust desired range
r1: distance (if uncommented at end of routine)

destroys
r0: y
r1: x
r2: tmp
r3: shift
r12: ang
*/

.text

.arm
.align
.section .iwram
.global atan2asm
.type atan2asm, function

atan2asm:
movs r12, r1, lsr#31    @ ang = (x >= 0) ? 0 : 1
beq    endinit          @ if (x>= 0) branch else
rsb    r1, r1, #0       @ x = -x
rsbs r0, r0, #0       @ y = -y
mov    r12, #0x4000000    @ ang = pi
rsbpl r12, r12, #0    @ if (y >= 0) ang = -pi
endinit:
orrs r2, r1, r0       @ tmp = x | y
bxeq lr             @ return if (x == 0) && (y == 0)
                     @ there is no reason to calculate the shift
                     @ if you know the maximum values for x & y
                     @ are less than 0x00200000, just shift 8
                     @ and remove this code
submi r2, r1, r0       @ if (y < 0) tmp = x - y
mov    r3, #0          @ shift = 0
cmp    r2, #0x8000000    @ if (tmp < 0x8000000)
addmi r3, r3, #3       @ shift += 3
cmp    r2, #0x1000000    @ if (tmp < 0x1000000)
addmi r3, r3, #3       @ shift += 3
cmp    r2, #0x200000    @ if (tmp < 0x200000)
addmi r3, r3, #3       @ shift += 3
               @ end of shift calculation

stmfd sp!, {r4-r7}    @ push r4-r7 onto stack
adr    r4, atan2table    @
ldmia r4!, {r5-r7}    @ read 3 entries from table
mov    r1, r1, lsl r3    @ x <<= shift
movs r0, r0, lsl r3    @ y <<= shift
bpl    ypos0          @ if (y >= 0) branch
yneg0:
sub    r12, r12, #0x1000000@ ang -= pi/4
sub    r2, r1, r0       @ tmp = x - (y)
adds r0, r0, r1       @ y += (x)
bpl    ypos1
yneg1:
sub    r12, r12, r5    @ ang -= atantable[n]
sub    r1, r2, r0, asr#1 @ x = tmp - (y >> 1)
adds r0, r0, r2, asr#1 @ y += (tmp >> 1)
bpl    ypos2
yneg2:
sub    r12, r12, r6    @ ang -= atantable[n]
sub    r2, r1, r0, asr#2 @ tmp = x - (y >> 2)
adds r0, r0, r1, asr#2 @ y += (x >> 2)
bpl    ypos3
yneg3:
sub    r12, r12, r7    @ ang -= atantable[n]
sub    r1, r2, r0, asr#3 @ x = tmp - (y >> 3)
adds r0, r0, r2, asr#3 @ y += (tmp >> 3)
bpl    ypos4
yneg4:
ldmia r4, {r4-r7}       @ read next 4 entries from table
                     @
sub    r12, r12, r4    @ ang -= atantable[n]
sub    r2, r1, r0, asr#4 @ tmp = x - (y >> 4)
adds r0, r0, r1, asr#4 @ y += (x >> 4)
bpl    ypos5
yneg5:
sub    r12, r12, r5    @ ang -= atantable[n]
sub    r1, r2, r0, asr#5 @ x = tmp - (y >> 5)
adds r0, r0, r2, asr#5 @ y += (tmp >> 5)
bpl    ypos6
yneg6:
sub    r12, r12, r6    @ ang -= atantable[n]
sub    r2, r1, r0, asr#6 @ tmp = x - (y >> 6)
adds r0, r0, r1, asr#6 @ y += (x >> 6)
bpl    ypos7
yneg7:
sub    r12, r12, r7    @ ang -= atantable[n]
sub    r1, r2, r0, asr#7 @ x = tmp - (y >> 7)
adds r0, r0, r2, asr#7 @ y += (tmp >> 7)
b    endrotate

ypos0:
add    r12, r12, #0x1000000@ ang += pi/4
add    r2, r1, r0       @ tmp = x + (y)
subs r0, r0, r1       @ y -= (x)
bmi    yneg1
ypos1:
add    r12, r12, r5    @ ang += atantable[n]
add    r1, r2, r0, asr#1 @ x = tmp + (y >> 1)
subs r0, r0, r2, asr#1 @ y -= (tmp >> 1)
bmi    yneg2
ypos2:
add    r12, r12, r6    @ ang += atantable[n]
add    r2, r1, r0, asr#2 @ tmp = x + (y >> 2)
subs r0, r0, r1, asr#2 @ y -= (x >> 2)
bmi    yneg3
ypos3:
add    r12, r12, r7    @ ang += atantable[n]
add    r1, r2, r0, asr#3 @ x = tmp + (y >> 3)
subs r0, r0, r2, asr#3 @ y -= (tmp >> 3)
bmi    yneg4
ypos4:
ldmia r4, {r4-r7}       @ read next 4 entries from table
                     @
add    r12, r12, r4    @ ang += atantable[n]
add    r2, r1, r0, asr#4 @ tmp = x + (y >> 4)
subs r0, r0, r1, asr#4 @ y -= (x >> 4)
bmi    yneg5
ypos5:
add    r12, r12, r5    @ ang += atantable[n]
add    r1, r2, r0, asr#5 @ x = tmp + (y >> 5)
subs r0, r0, r2, asr#5 @ y -= (tmp >> 5)
bmi    yneg6
ypos6:
add    r12, r12, r6    @ ang += atantable[n]
add    r2, r1, r0, asr#6 @ tmp = x + (y >> 6)
subs r0, r0, r1, asr#6 @ y -= (x >> 6)
bmi    yneg7
ypos7:
add    r12, r12, r7    @ ang += atantable[n]
add    r1, r2, r0, asr#7 @ x = tmp + (y >> 7)
subs r0, r0, r2, asr#7 @ y -= (tmp >> 7)
endrotate:
addpl r12, r12, r7, asr#1 @ if (y >= 0) ang += atantable[n]
submi r12, r12, r7, asr#1 @ else ang -= atantable[n]

ldmfd sp!, {r4-r7}    @ pop r4-r7 off stack
@ correction to distance calculation
@ mov    r1, r1, asr r3    @ x >>= shift
@ mul    r1, r1, #2487    @ x *= 2487
@ mov    r1, r1, asr#12    @ x >>= 12
add    r12, r12, #0x10000 @ ang += (1 << 16)
movs r0, r12, asr#18    @ result = ang >> 18
@ addmi r0, r0, #512    @ if (ang < 0) ang += 512
bx    lr             @ return from subroutine
.fend1:
.size atan2asm,.fend1-atan2asm

atan2table:
.word 0x00972028
.word 0x004fd9c2
.word 0x0028888e
.word 0x0014586a
.word 0x000a2ebf
.word 0x000517b0
.word 0x00028be2

<edit> Added ".section .iwram" just in case anybody is copy and pasting it into test code. BTW my guestimate is ~80 cycles for the current version, but I may be totally off due to some beginner mistake. </edit>

I adjusted the shifts slightly. If anybody is going to use this you should check the values against the kind of numbers you'll actually be seeing in your game.

Anybody tried this in their code yet? I'd like to hear how it goes...

I'm working on a good old fashioned tunnel effect, so I will be needing atan for the texture mapping. I probably won't have it running until next weekend at least, but I'll give your routine a shot when I do
_________________
___________
The best optimization is to do nothing at all.
Therefore a fully optimized program doesn't exist.
-Deku

Sorry for the big bump, but I'm implementing this code and both the distance and the arctan function returns excellent values for me to work with :)

But I've got two problems:

1.
This line:

Code:

mul r1, r1, #2478 @ x *= 2487 @

Results in the following error:
cordic.s: Assembler messages:
cordic.s:169: rd and rm should be different in mul
cordic.s:169: Error: bad arguments to instruction -- `mul r1,r1,#2478'

2.
I can only get the r0 (angle) value out of the function, but can I get the value of r1 (distance) out of it at the same time?
_________________
[Images not permitted - Click here to view it]

Yeah, that's a pretty messed-up instruction. The mul instruction only works out of registers and not immediate values, and the destination register needs to be different from the first source register (but it can be used as the second source register).

Even if this statement were okay, 2478 (or 2487, depending on whether the instruction or the comment is correct) is an invalid immediate value, so it couldn't just be mov-ed into another source register; it would have to be loaded in from the constant pool (or or-ed together).

Conclusion: it needs some fixing. :D

Dan.

I also solved the second issue by writing an asm function that returns r1:

Code:

getDistance:
mov r0, r1
bx lr

To my amazement it works perfectly, thanks everyone!
_________________
[Images not permitted - Click here to view it]

gbadev.org forum archive

ASM > atan2 implementation

#24599 - ecurtz - Sat Aug 07, 2004 6:17 am

#24712 - ecurtz - Tue Aug 10, 2004 4:26 am

#24714 - DekuTree64 - Tue Aug 10, 2004 4:42 am

#55942 - IRbaboon - Tue Oct 04, 2005 8:09 pm

#55945 - poslundc - Tue Oct 04, 2005 8:39 pm

#56027 - IRbaboon - Wed Oct 05, 2005 3:40 pm