#24599 - ecurtz - Sat Aug 07, 2004 6:17 am
Here's an implementation of the cordic rotator atan2 we've been discussing in this thread.
It would be great if some of the experts here could take a look at it and suggest any fixes or improvements they see.
<edit> Added ".section .iwram" just in case anybody is copy and pasting it into test code. BTW my guestimate is ~80 cycles for the current version, but I may be totally off due to some beginner mistake. </edit>
It would be great if some of the experts here could take a look at it and suggest any fixes or improvements they see.
Code: |
/* sint32 atan2asm(sint32 y, sint32 x); atan2 using cordic rotator implemented by eli curtz, eli@nuprometheus.com based on information from http://www.andraka.com/cordic.htm and forum comments at http://www.gbadev.org from users AnthC, DiscoStew, and ecurtz (me) entry r0: y r1: x exit r0: angle (-256...256) change shift to adjust desired range r1: distance (if uncommented at end of routine) destroys r0: y r1: x r2: tmp r3: shift r12: ang */ .text .arm .align .section .iwram .global atan2asm .type atan2asm, function atan2asm: movs r12, r1, lsr#31 @ ang = (x >= 0) ? 0 : 1 beq endinit @ if (x>= 0) branch else rsb r1, r1, #0 @ x = -x rsbs r0, r0, #0 @ y = -y mov r12, #0x4000000 @ ang = pi rsbpl r12, r12, #0 @ if (y >= 0) ang = -pi endinit: orrs r2, r1, r0 @ tmp = x | y bxeq lr @ return if (x == 0) && (y == 0) @ there is no reason to calculate the shift @ if you know the maximum values for x & y @ are less than 0x00200000, just shift 8 @ and remove this code submi r2, r1, r0 @ if (y < 0) tmp = x - y mov r3, #0 @ shift = 0 cmp r2, #0x8000000 @ if (tmp < 0x8000000) addmi r3, r3, #3 @ shift += 3 cmp r2, #0x1000000 @ if (tmp < 0x1000000) addmi r3, r3, #3 @ shift += 3 cmp r2, #0x200000 @ if (tmp < 0x200000) addmi r3, r3, #3 @ shift += 3 @ end of shift calculation stmfd sp!, {r4-r7} @ push r4-r7 onto stack adr r4, atan2table @ ldmia r4!, {r5-r7} @ read 3 entries from table mov r1, r1, lsl r3 @ x <<= shift movs r0, r0, lsl r3 @ y <<= shift bpl ypos0 @ if (y >= 0) branch yneg0: sub r12, r12, #0x1000000@ ang -= pi/4 sub r2, r1, r0 @ tmp = x - (y) adds r0, r0, r1 @ y += (x) bpl ypos1 yneg1: sub r12, r12, r5 @ ang -= atantable[n] sub r1, r2, r0, asr#1 @ x = tmp - (y >> 1) adds r0, r0, r2, asr#1 @ y += (tmp >> 1) bpl ypos2 yneg2: sub r12, r12, r6 @ ang -= atantable[n] sub r2, r1, r0, asr#2 @ tmp = x - (y >> 2) adds r0, r0, r1, asr#2 @ y += (x >> 2) bpl ypos3 yneg3: sub r12, r12, r7 @ ang -= atantable[n] sub r1, r2, r0, asr#3 @ x = tmp - (y >> 3) adds r0, r0, r2, asr#3 @ y += (tmp >> 3) bpl ypos4 yneg4: ldmia r4, {r4-r7} @ read next 4 entries from table @ sub r12, r12, r4 @ ang -= atantable[n] sub r2, r1, r0, asr#4 @ tmp = x - (y >> 4) adds r0, r0, r1, asr#4 @ y += (x >> 4) bpl ypos5 yneg5: sub r12, r12, r5 @ ang -= atantable[n] sub r1, r2, r0, asr#5 @ x = tmp - (y >> 5) adds r0, r0, r2, asr#5 @ y += (tmp >> 5) bpl ypos6 yneg6: sub r12, r12, r6 @ ang -= atantable[n] sub r2, r1, r0, asr#6 @ tmp = x - (y >> 6) adds r0, r0, r1, asr#6 @ y += (x >> 6) bpl ypos7 yneg7: sub r12, r12, r7 @ ang -= atantable[n] sub r1, r2, r0, asr#7 @ x = tmp - (y >> 7) adds r0, r0, r2, asr#7 @ y += (tmp >> 7) b endrotate ypos0: add r12, r12, #0x1000000@ ang += pi/4 add r2, r1, r0 @ tmp = x + (y) subs r0, r0, r1 @ y -= (x) bmi yneg1 ypos1: add r12, r12, r5 @ ang += atantable[n] add r1, r2, r0, asr#1 @ x = tmp + (y >> 1) subs r0, r0, r2, asr#1 @ y -= (tmp >> 1) bmi yneg2 ypos2: add r12, r12, r6 @ ang += atantable[n] add r2, r1, r0, asr#2 @ tmp = x + (y >> 2) subs r0, r0, r1, asr#2 @ y -= (x >> 2) bmi yneg3 ypos3: add r12, r12, r7 @ ang += atantable[n] add r1, r2, r0, asr#3 @ x = tmp + (y >> 3) subs r0, r0, r2, asr#3 @ y -= (tmp >> 3) bmi yneg4 ypos4: ldmia r4, {r4-r7} @ read next 4 entries from table @ add r12, r12, r4 @ ang += atantable[n] add r2, r1, r0, asr#4 @ tmp = x + (y >> 4) subs r0, r0, r1, asr#4 @ y -= (x >> 4) bmi yneg5 ypos5: add r12, r12, r5 @ ang += atantable[n] add r1, r2, r0, asr#5 @ x = tmp + (y >> 5) subs r0, r0, r2, asr#5 @ y -= (tmp >> 5) bmi yneg6 ypos6: add r12, r12, r6 @ ang += atantable[n] add r2, r1, r0, asr#6 @ tmp = x + (y >> 6) subs r0, r0, r1, asr#6 @ y -= (x >> 6) bmi yneg7 ypos7: add r12, r12, r7 @ ang += atantable[n] add r1, r2, r0, asr#7 @ x = tmp + (y >> 7) subs r0, r0, r2, asr#7 @ y -= (tmp >> 7) endrotate: addpl r12, r12, r7, asr#1 @ if (y >= 0) ang += atantable[n] submi r12, r12, r7, asr#1 @ else ang -= atantable[n] ldmfd sp!, {r4-r7} @ pop r4-r7 off stack @ correction to distance calculation @ mov r1, r1, asr r3 @ x >>= shift @ mul r1, r1, #2487 @ x *= 2487 @ mov r1, r1, asr#12 @ x >>= 12 add r12, r12, #0x10000 @ ang += (1 << 16) movs r0, r12, asr#18 @ result = ang >> 18 @ addmi r0, r0, #512 @ if (ang < 0) ang += 512 bx lr @ return from subroutine .fend1: .size atan2asm,.fend1-atan2asm atan2table: .word 0x00972028 .word 0x004fd9c2 .word 0x0028888e .word 0x0014586a .word 0x000a2ebf .word 0x000517b0 .word 0x00028be2 |
<edit> Added ".section .iwram" just in case anybody is copy and pasting it into test code. BTW my guestimate is ~80 cycles for the current version, but I may be totally off due to some beginner mistake. </edit>