#152534 - Ruben - Mon Mar 17, 2008 11:30 am
Hi everyone. I was reading up on Deku's sound mixing thingy, optimized it, blah blah blah... but then I though, "I think I should do it in assembler." I came up with this code but I know for a FACT that there's something wrong in it... I just don't know WHAT LMAO!
The externs sndVars, sngVars and sndChannels are as follow:
I know, I know... lots of room for optimization but I just wanna get this working first. Thanks a lot guys.
Code: |
.section .iwram, "ax", %progbits
.align 4 .arm .global SndMix, tmpBuffer .extern sndVars, sngVars, sndChannels @ r0: samples to mix SndMix: stmfd sp!, {r1-r12} @ push {r1-r12} @ Clear the temp buffer add r0, r0, #0x01 @ r0 += 0x01 (to avoid LSR'ing 1) mov r1, #0x04000000 @ r1 = 0x04000000 (REG_BASE) add r1, r1, #0xD4 @ r1 += 0xD4 (DMA3SAD) mov r2, #0x00000000 @ r2 = 0x00000000 (NULL) str r2, [r1, #0x08] @ *(r1 + 0x08) = r2 (DMACNT) ldr r2, =ClrVal @ r2 = &ClrVal ldr r3, =tmpBuffer @ r3 = &sndTmpBuffer mov r4, #0x85000000 @ r4 = 0x85000000 (MEMSET32) add r4, r4, r0, lsr #0x01 @ r4 += r0 >> 1 stmia r1!, {r2-r4} @ *r4++ = r1, *r4++ = r2, *r4++ = r3 @ Get the global volume ldr r1, =sngVars @ r1 = &sngVars ldr r1, [r1] @ r1 = *((int*)r1) (gVol) @ Loop through the channels and mix data mov r2, #0x09 @ r2 = 0x09 ldr r3, =sndChannels+0xE0 @ r3 = &(*sndChannels[8]) .LChnLoop: ldr r4, [r3] @ r4 = *((int*)r3) (CHN_CNT) ands r5, r4, #0x01 @ \ beq .LChnNA @ if(!(r4 & 0x01)) goto .LChnLoopEnd .LChnActive: ldr r5, =tmpBuffer @ r5 = &tmpBuffer mov r6, r4, lsr #0x03 @ r6 = r4 >> 2 (CHN_VOL) muls r7, r6, r1 @ r7 = r6 * r1 | mov r6, r7, lsr #0x06 @ r6 = r7 >> 6 (CHN_VOL) ldr r7, [r3, #0x04]! @ r7 = *(int*)(r3 += 4) (CHN_POS) ldr r8, [r3, #0x04]! @ r8 = *(int*)(r3 += 4) (CHN_INC) ldr r9, [r3, #0x04]! @ r9 = *(int*)(r3 += 4) (CHN_LEN) ldr r10, [r3, #0x08]! @ r10 = *(int*)(r3 += 8) (CHN_SRC) add r3, r3, #0x04 @ r3 += 0x04 (CHN_FRQ) (end) sub r3, r3, #0x1C stmfd sp!, {r0} @ Mix down .LChnActMix: ldrb r11, [r10, r7, lsr #0x0C] @ r12 = *(char*)(r10+r11) mul r11, r12, r6 @ r11 = r12 * r6 (CHN_VOL) ldrsh r12, [r5] add r11, r11, r12 strh r11, [r5] add r5, r5, #0x02 add r7, r7, r8 cmp r7, r9 bge .LChnEnd .LChnLink: subs r0, r0, #0x01 bne .LChnActMix .LChnLinkEnd: ldmfd sp!, {r0} str r7, [r3, #0x04] b .LChnLoopEnd @ goto .LChnLoopEnd .LChnEnd: ands r11, r4, #0x02 beq .LChnSmpEnd .LChnSmpEndLoop: ldr r11, [r3, #0x14] mov r7, r11 b .LChnLink .LChnSmpEnd: mov r7, #0x00 sub r11, r4, #0x02 str r11, [r3] b .LChnLinkEnd .LChnNA: .LChnLoopEnd: sub r3, r3, #0x1C subs r2, r2, #0x01 @ \ bne .LChnLoop @ if(r2-- != 0) goto .LChnLoop .LDownsample: ldr r1, =tmpBuffer-2 ldr r2, =sndVars ldr r2, [r2] @ curmixbuffer sub r2, r2, #0x01 .LDownsampleLoop: ldrh r3, [r1, #0x02]! mov r4, r3, lsr #0x08 strb r4, [r2, #0x01]! subs r0, r0, #0x01 bne .LDownsampleLoop .LEnd: ldmfd sp!, {r1-r12} @ pop {r1-r12} bx lr @ bx to lr .align 4 ClrVal: .word 0x00000000 tmpBuffer: .space 736*2 .end |
The externs sndVars, sngVars and sndChannels are as follow:
Code: |
typedef struct __attribute__ ((aligned(4))) { s8 *mixBufferBase; s8 *curMixBuffer; u8 activeBuffer; u32 smpsTilTick; u32 smpsPerTick; u32 mixFreq; u32 rcpMixFreq; u32 mixBufferSize; } SND_VARS; typedef struct __attribute__ ((aligned(4))) { u32 gVol; u32 mode; u32 state; u8 tickA; u8 tickB; u8 row; ... ... ... } MOD_VARS; typedef struct __attribute__ ((aligned(4))) { u32 cnt; u32 pos; u32 inc; u32 len; u32 loopStart; s8 *dat; u32 freq; } SND_CHANNEL; ... SND_VARS sndVars; MOD_VARS sngVars; SND_CHANNEL sndChannels[9]; |
I know, I know... lots of room for optimization but I just wanna get this working first. Thanks a lot guys.