gbadev.org forum archive

This is a read-only mirror of the content originally found on forum.gbadev.org (now offline), salvaged from Wayback machine copies. A new forum can be found here.

ASM > atan2 implementation

#24599 - ecurtz - Sat Aug 07, 2004 6:17 am

Here's an implementation of the cordic rotator atan2 we've been discussing in this thread.

It would be great if some of the experts here could take a look at it and suggest any fixes or improvements they see.

Code:

/*
sint32 atan2asm(sint32 y, sint32 x);

atan2 using cordic rotator
implemented by eli curtz, eli@nuprometheus.com
based on information from http://www.andraka.com/cordic.htm
and forum comments at http://www.gbadev.org from users
AnthC, DiscoStew, and ecurtz (me)

entry
r0: y
r1: x

exit
r0: angle (-256...256) change shift to adjust desired range
r1: distance (if uncommented at end of routine)

destroys
r0: y
r1: x
r2: tmp
r3: shift
r12: ang
*/

.text

.arm
.align
.section .iwram
.global atan2asm
.type atan2asm, function

atan2asm:
   movs   r12, r1, lsr#31      @ ang = (x >= 0) ? 0 : 1
   beq      endinit            @ if (x>= 0) branch else
   rsb      r1, r1, #0         @ x = -x
   rsbs   r0, r0, #0         @ y = -y
   mov      r12, #0x4000000      @ ang = pi
   rsbpl   r12, r12, #0      @ if (y >= 0) ang = -pi
endinit:
   orrs   r2, r1, r0         @ tmp = x | y
   bxeq   lr               @ return if (x == 0) && (y == 0)
                        @ there is no reason to calculate the shift
                        @ if you know the maximum values for x & y
                        @ are less than 0x00200000, just shift 8
                        @ and remove this code
   submi   r2, r1, r0         @ if (y < 0) tmp = x - y
   mov      r3, #0            @ shift = 0
   cmp      r2, #0x8000000      @ if (tmp < 0x8000000)
   addmi   r3, r3, #3         @   shift += 3
   cmp      r2, #0x1000000      @ if (tmp < 0x1000000)
   addmi   r3, r3, #3         @   shift += 3   
   cmp      r2, #0x200000      @ if (tmp < 0x200000)
   addmi   r3, r3, #3         @   shift += 3
                             @ end of shift calculation
   
   stmfd   sp!, {r4-r7}      @ push r4-r7 onto stack
   adr      r4, atan2table      @
   ldmia   r4!, {r5-r7}      @ read 3 entries from table
   mov      r1, r1, lsl r3      @ x <<= shift
   movs   r0, r0, lsl r3      @ y <<= shift
   bpl      ypos0            @ if (y >= 0) branch
yneg0:
   sub      r12, r12, #0x1000000@ ang -= pi/4
   sub      r2, r1, r0         @ tmp = x - (y)
   adds   r0, r0, r1         @ y += (x)
   bpl      ypos1
yneg1:
   sub      r12, r12, r5      @ ang -= atantable[n]
   sub      r1, r2, r0, asr#1   @ x = tmp - (y >> 1)
   adds   r0, r0, r2, asr#1   @ y += (tmp >> 1)
   bpl      ypos2
yneg2:
   sub      r12, r12, r6      @ ang -= atantable[n]
   sub      r2, r1, r0, asr#2   @ tmp = x - (y >> 2)
   adds   r0, r0, r1, asr#2   @ y += (x >> 2)
   bpl      ypos3
yneg3:
   sub      r12, r12, r7      @ ang -= atantable[n]
   sub      r1, r2, r0, asr#3   @ x = tmp - (y >> 3)
   adds   r0, r0, r2, asr#3   @ y += (tmp >> 3)
   bpl      ypos4
yneg4:
   ldmia   r4, {r4-r7}         @ read next 4 entries from table
                        @
   sub      r12, r12, r4      @ ang -= atantable[n]
   sub      r2, r1, r0, asr#4   @ tmp = x - (y >> 4)
   adds   r0, r0, r1, asr#4   @ y += (x >> 4)
   bpl      ypos5
yneg5:
   sub      r12, r12, r5      @ ang -= atantable[n]
   sub      r1, r2, r0, asr#5   @ x = tmp - (y >> 5)
   adds   r0, r0, r2, asr#5   @ y += (tmp >> 5)
   bpl      ypos6
yneg6:
   sub      r12, r12, r6      @ ang -= atantable[n]
   sub      r2, r1, r0, asr#6   @ tmp = x - (y >> 6)
   adds   r0, r0, r1, asr#6   @ y += (x >> 6)
   bpl      ypos7
yneg7:
   sub      r12, r12, r7      @ ang -= atantable[n]
   sub      r1, r2, r0, asr#7   @ x = tmp - (y >> 7)
   adds   r0, r0, r2, asr#7   @ y += (tmp >> 7)
   b      endrotate

ypos0:
   add      r12, r12, #0x1000000@ ang += pi/4
   add      r2, r1, r0         @ tmp = x + (y)
   subs   r0, r0, r1         @ y -= (x)
   bmi      yneg1
ypos1:
   add      r12, r12, r5      @ ang += atantable[n]
   add      r1, r2, r0, asr#1   @ x = tmp + (y >> 1)
   subs   r0, r0, r2, asr#1   @ y -= (tmp >> 1)
   bmi      yneg2
ypos2:
   add      r12, r12, r6      @ ang += atantable[n]
   add      r2, r1, r0, asr#2   @ tmp = x + (y >> 2)
   subs   r0, r0, r1, asr#2   @ y -= (x >> 2)
   bmi      yneg3
ypos3:
   add      r12, r12, r7      @ ang += atantable[n]
   add      r1, r2, r0, asr#3   @ x = tmp + (y >> 3)
   subs   r0, r0, r2, asr#3   @ y -= (tmp >> 3)
   bmi      yneg4
ypos4:
   ldmia   r4, {r4-r7}         @ read next 4 entries from table
                        @
   add      r12, r12, r4      @ ang += atantable[n]
   add      r2, r1, r0, asr#4   @ tmp = x + (y >> 4)
   subs   r0, r0, r1, asr#4   @ y -= (x >> 4)
   bmi      yneg5
ypos5:
   add      r12, r12, r5      @ ang += atantable[n]
   add      r1, r2, r0, asr#5   @ x = tmp + (y >> 5)
   subs   r0, r0, r2, asr#5   @ y -= (tmp >> 5)
   bmi      yneg6
ypos6:
   add      r12, r12, r6      @ ang += atantable[n]
   add      r2, r1, r0, asr#6   @ tmp = x + (y >> 6)
   subs   r0, r0, r1, asr#6   @ y -= (x >> 6)
   bmi      yneg7
ypos7:
   add      r12, r12, r7      @ ang += atantable[n]
   add      r1, r2, r0, asr#7   @ x = tmp + (y >> 7)
   subs   r0, r0, r2, asr#7   @ y -= (tmp >> 7)
endrotate:
   addpl   r12, r12, r7, asr#1 @ if (y >= 0) ang += atantable[n]
   submi   r12, r12, r7, asr#1 @ else ang -= atantable[n]
   
   ldmfd   sp!, {r4-r7}      @ pop r4-r7 off stack
@ correction to distance calculation
@   mov      r1, r1, asr r3      @ x >>= shift
@   mul      r1, r1, #2487      @ x *= 2487
@   mov      r1, r1, asr#12      @ x >>= 12
   add      r12, r12, #0x10000   @ ang += (1 << 16)
   movs   r0, r12, asr#18      @ result = ang >> 18
@   addmi   r0, r0, #512      @ if (ang < 0) ang += 512
   bx      lr               @ return from subroutine
.fend1:
.size   atan2asm,.fend1-atan2asm

atan2table:
.word   0x00972028
.word   0x004fd9c2
.word   0x0028888e
.word   0x0014586a
.word   0x000a2ebf
.word   0x000517b0
.word   0x00028be2


<edit> Added ".section .iwram" just in case anybody is copy and pasting it into test code. BTW my guestimate is ~80 cycles for the current version, but I may be totally off due to some beginner mistake. </edit>

#24712 - ecurtz - Tue Aug 10, 2004 4:26 am

I adjusted the shifts slightly. If anybody is going to use this you should check the values against the kind of numbers you'll actually be seeing in your game.

Anybody tried this in their code yet? I'd like to hear how it goes...

#24714 - DekuTree64 - Tue Aug 10, 2004 4:42 am

I'm working on a good old fashioned tunnel effect, so I will be needing atan for the texture mapping. I probably won't have it running until next weekend at least, but I'll give your routine a shot when I do
_________________
___________
The best optimization is to do nothing at all.
Therefore a fully optimized program doesn't exist.
-Deku

#55942 - IRbaboon - Tue Oct 04, 2005 8:09 pm

Sorry for the big bump, but I'm implementing this code and both the distance and the arctan function returns excellent values for me to work with :)

But I've got two problems:

1.
This line:
Code:
   mul      r1, r1, #2478      @ x *= 2487 @

Results in the following error:
cordic.s: Assembler messages:
cordic.s:169: rd and rm should be different in mul
cordic.s:169: Error: bad arguments to instruction -- `mul r1,r1,#2478'


2.
I can only get the r0 (angle) value out of the function, but can I get the value of r1 (distance) out of it at the same time?
_________________
[Images not permitted - Click here to view it]

#55945 - poslundc - Tue Oct 04, 2005 8:39 pm

Yeah, that's a pretty messed-up instruction. The mul instruction only works out of registers and not immediate values, and the destination register needs to be different from the first source register (but it can be used as the second source register).

Even if this statement were okay, 2478 (or 2487, depending on whether the instruction or the comment is correct) is an invalid immediate value, so it couldn't just be mov-ed into another source register; it would have to be loaded in from the constant pool (or or-ed together).

Conclusion: it needs some fixing. :D

Dan.

#56027 - IRbaboon - Wed Oct 05, 2005 3:40 pm

I also solved the second issue by writing an asm function that returns r1:
Code:
getDistance:
mov      r0, r1
bx      lr

To my amazement it works perfectly, thanks everyone!
_________________
[Images not permitted - Click here to view it]