gbadev.org forum archive

Hi!

I am working on an AdLib emulation running on DS ARM7. It currently can manage 8 channels (each with 2 operators), but the ninth (and final) channel makes it take more CPU cycles than are available. I was hoping you gurus could perhaps take a look at my inner loops and check if I have missed some obvious optimization possibilities.

I am planning to change the rows of ldr and str commands to ldmia and stmia after I reorder the slot struct variables and register usage, but that is only done 9 times per frame while the inner loops are done 256*9 times a frame so any optimization in the for_SLOT1 and for_SLOT2 loops would be much appreciated!

Thanks in advance!

Code:

.for_channel:
@-------
@ Load the SLOT-specific data values
@-------
ldrb r1, [r0, #(ch0_slot1_bits-SLOT1)] @ r1 = SLOT1 bits (Feedback(3 bits), Con, AM, Vib, EG type, KSR)
ldr r2, [r0, #(ch0_slot1_wavetable-SLOT1)] @ r2 = SLOT1 wavetable value
ldr r3, [r0, #(ch0_slot1_op1_out-SLOT1)] @ r3 = SLOT1 op1_out value
ldr r4, [r0, #(ch0_slot1_env_sustain-SLOT1)] @ r5 = SLOT1 sustain level (MAX_ATT_INDEX if release phase)
ldr r5, [r0, #(ch0_slot1_envelope-SLOT1)] @ r5 = SLOT1 envelope counter value
ldr r6, [r0, #(ch0_slot1_Incr-SLOT1)] @ r6 = SLOT1 Incr value
ldr r7, [r0, #(ch0_slot1_Cnt-SLOT1)] @ r7 = SLOT1 Cnt value
ldr r8, [r0, #(ch0_slot1_volume-SLOT1)] @ r8 = SLOT1 volume value << 16
ldr r9, [r0, #(ch0_slot1_TLL-SLOT1)] @ r9 = SLOT1 TLL value
ldr r10, =sin_tab
ldr r11, =tl_tab
add r10, r2 @ r10 = sin_tab + SLOT1->wavetable
@-------
@ tmp = lfo_am_table[ OPL->lfo_am_cnt >> LFO_SH ];
@ if (OPL->lfo_am_depth)
@ LFO_AM = tmp;
@ else
@ LFO_AM = tmp>>2;
@-------

bic r12, #0xFF00
orr r12, r1, lsl #8 @ Put all bit values of SLOT1 into r12 second byte
and r1, r12, #0xFF @ r1 = LFO_AM and lfo_am_depth

mov r2, #3
tst r1, #1 @ if (OPL->lfo_am_depth)
moveq r2, #5
tst r12, #(8<<8) @ AM bit set?
addne r9, r1, lsr r2 @ Now r9 = SLOT1 volume base (0..511?)
orr r12, #((ADLIB_BUFFER_SAMPLES-1)<<20)
.for_SLOT1: @ for( i=length-1; i >= 0 ; i-- ) {
@-------
@ Calculate the FM part for SLOT 1
@ Output goes always to the output buffer, SLOT2 uses it as phase_modulation or direct output depending on connect
@
@ FREQ_SH = 16
@ FREQ_MASK = 65535
@ SIN_MASK = 1023
@ ENV_QUIET = 384 (= 6144>>4)
@
@ out = SLOT->op1_out[0] + SLOT->op1_out[1];
@ SLOT->op1_out[0] = SLOT->op1_out[1];
@ SLOT->op1_out[1] = 0;
@ *SLOT->connect1 += SLOT->op1_out[0];
@ env = ((SLOT)->TLL + ((UINT32)(SLOT)->volume) + (LFO_AM & (SLOT)->AMmask));
@ if( env < ENV_QUIET )
@ {
@ if (!SLOT->FB)
@ out = 0;
@ UINT32 p = (env<<4) + sin_tab[SLOT->wavetable + ((((signed int)((SLOT->Cnt & ~FREQ_MASK) + (out<<SLOT->FB))) >> FREQ_SH ) & SIN_MASK) ];
@ if (p < TL_TAB_LEN)
@ SLOT->op1_out[1] = tl_tab[p];
@ }
@-------
strh r3, [lr], #2 @ *SLOT->connect1 += SLOT->op1_out[0];
add r2, r9, r8, lsr #16 @ r2 = env = ((SLOT)->TLL + ((UINT32)(SLOT)->volume) + (LFO_AM & (SLOT)->AMmask));
tst r12, #(7<<(8+5)) @ Feedback = 0?
moveq r1, r7, lsr #16 @ Yes, use only SLOT->Cnt
beq .op1_no_feedback
@-------
@ Feedback active, calculate r1 = ((signed int)((SLOT->Cnt & ~FREQ_MASK) + (out<<SLOT->FB)))
@-------
mov r1, r12, lsr #(8+5) @ r1 = feedback value, 1..7, 7 = smallest feedback, 1 = largest
and r1, #7
add r1, r7, r3, asr r1 @ r1 = SLOT1->Cnt + (out<<SLOT->FB)
lsr r1, #16 @ r1 >>= FREQ_SH
.op1_no_feedback:
bic r1, #0xFC00 @ r1 &= SIN_MASK
lsl r3, #16 @ r3 = SLOT->op1_out[0] = SLOT->op1_out[1]; SLOT->op1_out[1] = 0;
ldr r1, [r10, r1, lsl #2] @ r1 = sin_tab[SLOT->wavetable + ((((signed int)((SLOT->Cnt & ~FREQ_MASK) + (out<<SLOT->FB))) >> FREQ_SH ) & SIN_MASK) ];
add r1, r2, lsl #5 @ r1 = env<<4 + sin_tab[..], extra << 1 for halfword accessing
cmp r1, #(2*TL_TAB_LEN) @ if (p < TL_TAB_LEN)
ldrloh r1, [r11, r1]
orrlo r3, r1 @ SLOT->op1_out[1] = tl_tab[p];
@-------
@ Calculate envelope for SLOT 1
@-------
tst r5, #1 @ Are we in ATTACK phase?
bne op1_attack @ Yes, go handle ATTACK phase volume envelope
add r8, r5, lsr #1 @ Decrease the volume by the envelope counter
cmp r8, r4 @ Did we go under the SUSTAIN level?
bhi op1_sustain @ Yep, go adjust the volume
op1_env_done:
@-------
@ Calculate phase generator values for SLOT 1
@
@ /* Phase Generator */
@ if(op->vib)
@ {
@ unsigned int block_fnum = CH->block_fnum;
@ unsigned int fnum_lfo = (block_fnum&0x0380) >> 7;
@ signed int lfo_fn_table_index_offset = lfo_pm_table[LFO_PM + 16*fnum_lfo ];
@ if (lfo_fn_table_index_offset) /* LFO phase modulation active */
@ {
@ block_fnum += lfo_fn_table_index_offset;
@ UINT8 block = (block_fnum&0x1c00) >> 10;
@ op->Cnt += (OPL->fn_tab[block_fnum&0x03ff] >> (7-block)) * op->mul;
@ }
@ else /* LFO phase modulation = zero */
@ op->Cnt += op->Incr;
@ }
@ else /* LFO phase modulation disabled for this operator */
@ op->Cnt += op->Incr;
@-------
add r7, r6 @ SLOT1->Cnt += SLOT1->Incr
@-------
@ Loop to next sample
@-------
subs r12, #(1<<20)
bpl .for_SLOT1 @ }
add r12, #(1<<20)
@-------
@ Save the final values for SLOT1
@-------
str r3, [r0, #(ch0_slot1_op1_out-SLOT1)] @ r3 = SLOT1 op1_out value
str r4, [r0, #(ch0_slot1_env_sustain-SLOT1)] @ r4 = SLOT1 sustain level (MAX_ATT_INDEX if release phase)
str r5, [r0, #(ch0_slot1_envelope-SLOT1)] @ r5 = SLOT1 envelope counter & mode value
str r7, [r0, #(ch0_slot1_Cnt-SLOT1)] @ r7 = SLOT1 Cnt value
str r8, [r0, #(ch0_slot1_volume-SLOT1)] @ r8 = SLOT1 volume value << 16

@-------
@ Calculate all the 256 samples for SLOT 2
@-------
add r0, #SLOT_SIZE
sub lr, #ADLIB_BUFFER_SIZE @ Rewind buffer pointer back to start

ldrb r1, [r0, #(ch0_slot1_bits-SLOT1)] @ r1 = SLOT2 bits (Feedback, Con, AM, Vib, EG type, KSR)
ldr r2, [r0, #(ch0_slot1_wavetable-SLOT1)] @ r2 = SLOT1 wavetable value
ldr r4, [r0, #(ch0_slot1_env_sustain-SLOT1)] @ r5 = SLOT1 sustain level (MAX_ATT_INDEX if release phase)
ldr r5, [r0, #(ch0_slot1_envelope-SLOT1)] @ r5 = SLOT1 envelope value
ldr r6, [r0, #(ch0_slot1_Incr-SLOT1)] @ r6 = SLOT1 Incr value
ldr r7, [r0, #(ch0_slot1_Cnt-SLOT1)] @ r7 = SLOT1 Cnt value
ldr r8, [r0, #(ch0_slot1_volume-SLOT1)] @ r8 = SLOT1 volume value << 16
ldr r9, [r0, #(ch0_slot1_TLL-SLOT1)] @ r9 = SLOT1 TLL value
ldr r10, =sin_tab

and r3, r12, #0xFF @ r3 = LFO_AM and lfo_am_depth
bic r12, #0xFF00

add r10, r2 @ r10 = sin_tab + SLOT1->wavetable

@-------
@ tmp = lfo_am_table[ OPL->lfo_am_cnt >> LFO_SH ];
@ if (OPL->lfo_am_depth)
@ LFO_AM = tmp;
@ else
@ LFO_AM = tmp>>2;
@-------
mov r2, #3
tst r3, #1 @ if (OPL->lfo_am_depth)
moveq r2, #5
tst r1, #8 @ AM bit set?
addne r9, r3, lsr r2 @ Now r9 = SLOT2 volume base (0..511?)

orr r12, r1, lsl #8 @ Put all bit values of SLOT2 into r12 second byte

orr r12, #((ADLIB_BUFFER_SAMPLES-1)<<20)
.for_SLOT2: @ for( i=length-1; i >= 0 ; i-- ) {
@-------
@ Calculate the FM part for SLOT 2
@
@ env = ((SLOT)->TLL + ((UINT32)(SLOT)->volume) + (LFO_AM & (SLOT)->AMmask));
@ if( env < ENV_QUIET )
@ {
@ UINT32 p = (env<<4) + sin_tab[SLOT->wavetable + ((((signed int)((SLOT->Cnt & ~FREQ_MASK) + (phase_modulation<<16))) >> FREQ_SH ) & SIN_MASK) ];
@ if (p >= TL_TAB_LEN)
@ output[0] += 0;
@ else
@ output[0] += tl_tab[p];
@ }
@-------
ldrh r3, [lr] @ r3 = either phase_modulation or output[0]
add r2, r9, r8, lsr #16 @ r2 = env = ((SLOT)->TLL + ((UINT32)(SLOT)->volume) + (LFO_AM & (SLOT)->AMmask));
tst r12, #(1<<(8+4)) @ If Con=1, op1 produces sound directly, else use it as phase modulation
addeq r1, r7, r3, lsl #16 @ r1 = ((SLOT->Cnt) + (phase_modulation<<16))
moveq r3, #0
movne r1, r7
lsr r1, #16 @ r1 >>= FREQ_SH
bic r1, #0xFC00 @ r1 &= SIN_MASK
ldr r1, [r10, r1, lsl #2] @ r1 = sin_tab[SLOT->wavetable + ((((signed int)((SLOT->Cnt & ~FREQ_MASK) + (phase_modulation<<16))) >> FREQ_SH ) & SIN_MASK) ];
add r1, r2, lsl #5 @ r1 = env<<4 + sin_tab[..], extra << 1 for halfword accessing
cmp r1, #(2*TL_TAB_LEN) @ if (p < TL_TAB_LEN)
ldrloh r1, [r11, r1]
addlo r3, r1 @ output[0] += tl_tab[p];

@-------
@ Store the sample to output buffer
@
@ lt = output[0];
@ lt >>= FINAL_SH;
@ /* limit check */
@ lt = limit( lt , MAXOUT, MINOUT );
@ /* store to sound buffer */
@ buf[i] = lt;
@-------
strh r3, [lr], #2 @ buf[i] = output[0];

@-------
@ Calculate envelope for SLOT 2
@-------
tst r5, #1 @ Are we in ATTACK phase?
bne op2_attack @ Yes, go handle ATTACK phase volume envelope
add r8, r5, lsr #1 @ Decrease the volume by the envelope counter
cmp r8, r4 @ Did we go under the SUSTAIN level?
bhi op2_sustain @ Yep, go adjust the volume
op2_env_done:
@-------
@ Calculate phase generator values for SLOT 2
@
@ /* Phase Generator */
@ if(op->vib)
@ {
@ unsigned int block_fnum = CH->block_fnum;
@ unsigned int fnum_lfo = (block_fnum&0x0380) >> 7;
@ signed int lfo_fn_table_index_offset = lfo_pm_table[LFO_PM + 16*fnum_lfo ];
@ if (lfo_fn_table_index_offset) /* LFO phase modulation active */
@ {
@ block_fnum += lfo_fn_table_index_offset;
@ UINT8 block = (block_fnum&0x1c00) >> 10;
@ op->Cnt += (OPL->fn_tab[block_fnum&0x03ff] >> (7-block)) * op->mul;
@ }
@ else /* LFO phase modulation = zero */
@ op->Cnt += op->Incr;
@ }
@ else /* LFO phase modulation disabled for this operator */
@ op->Cnt += op->Incr;
@-------
add r7, r6 @ SLOT->Cnt += SLOT->Incr
@-------
@ Loop to next sample
@-------
subs r12, #(1<<20)
bpl .for_SLOT2 @ }
add r12, #(1<<20)
@-------
@ Save the final values for SLOT2
@-------
str r4, [r0, #(ch0_slot1_env_sustain-SLOT1)] @ r4 = SLOT1 sustain level (MAX_ATT_INDEX if release phase)
str r5, [r0, #(ch0_slot1_envelope-SLOT1)] @ r5 = SLOT1 envelope value
str r7, [r0, #(ch0_slot1_Cnt-SLOT1)] @ r7 = SLOT2 Cnt value
str r8, [r0, #(ch0_slot1_volume-SLOT1)] @ r8 = SLOT2 volume value << 16

@-------
@ Go handle the next channel unless this was already the last channel.
@-------
add r0, #SLOT_SIZE
add lr, #(ADLIB_BUFFER_SIZE)
add r12, #0x00010000 @ channel++
and r4, r12, #0x000F0000
tst r12, #4 @ Do we have rhythm mode on?
moveq r5, #0x00080000 @ Nope, handle 9 melodic channels
movne r5, #0x00060000 @ Yep, handle 6 melodic channels
cmp r4, r5
blt .for_channel

Pate
_________________

Now working on DSx86 http://dsx86.patrickaalto.com
Get LineWarsDS from http://linewars.patrickaalto.com

I'm not sure if I'm reading this correctly (as I don't understand what you mean by 'slot') but.. are you calculating the envelopes *during* mixing? If so, that will bloat your code a lot: calculate the final scaling value *before* entering the mix loop.

Also, I see you using a lot of "tst/cmp" inside the mixing loop. If the values in the registers are constant, then I would suggest making separate loops for each condition, to avoid testing during the mixing.

Also, conditionals are t3h r0ckz0rrz ^_^'

Code:

@ Old code
.for_SLOT1:
strh r3, [lr], #2
add r2, r9, r8, lsr #16
tst r12, #(7<<(8+5))
moveq r1, r7, lsr #16
beq .op1_no_feedback
mov r1, r12, lsr #(8+5)
and r1, #7
add r1, r7, r3, asr r1
lsr r1, #16
.op1_no_feedback:

@ New code

.for_SLOT1:
strh r3, [lr], #2
add r2, r9, r8, lsr #16
tst r12, #(7<<(8+5))
moveq r1, r7, lsr #16
movne r1, r12, lsr #(8+5)
andne r1, #7
addne r1, r7, r3, asr r1
lsrne r1, #16

Ruben wrote:

I'm not sure if I'm reading this correctly (as I don't understand what you mean by 'slot') but.. are you calculating the envelopes *during* mixing?

Well, the envelope can change during mixing (like going from attack to decay and then to sustain can all happen within 256 samples), but I see what you mean. I could probably handle each stage separately, if needed.

Quote:

Also, I see you using a lot of "tst/cmp" inside the mixing loop. If the values in the registers are constant, then I would suggest making separate loops for each condition, to avoid testing during the mixing.

Ah, of course! Why didn't I think of that.. I most likely need to test the volume for each sample, but the feedback level and connection mode will stay constant, so having 4 different loops (one for each case) will most likely shave off a lot of cycles! Many many thanks for this tip!

Quote:

Also, conditionals are t3h r0ckz0rrz ^_^'

Really? I mean, even 4 commands is faster to do using conditional execution rather than a branch? I thought the limit was somewhere around 2-3.. What is the number for switching to a branch instead?

Much thanks again! I feel confident that even that single change will make my code handle all 9 channels without problems. Looking forward to getting home from work so I can start coding it! :-)

Pate
_________________

Now working on DSx86 http://dsx86.patrickaalto.com
Get LineWarsDS from http://linewars.patrickaalto.com

Well, a branch is 3 cycles. And you've got "tst, moveq, beq" followed by 4 instructions of the opposite condition. If the condition was 0, then it would take 4 cycles + 1 for the tst, and 6 cycles + 1 for the tst in the other case. Without the branch it would be 5 cycles + 1 for the tst, which is in between, so it's a nice eq/ne trade-off.

EDIT:

On further inspection:

After the tst, you've got 4 cycles if it was 0. If it was not 0, then you've got 5 cycles. In short, you can keep the beq to make it faster if it was 0, or keep the conditionals to 'level out' the speed.

EDIT 2:

On further inspection again...

After the test, if the condition was 0, it would take 4 cycles. If it wasn't 0, then you've got *six* cycles, so yes, I would say to get rid of the branch to have a trade-off.

Last edited by Ruben on Thu Sep 17, 2009 7:14 am; edited 1 time in total

I see quite a few mov / tst / lsr type ops. These almost always can be incorporated into arithmetic ops and removed. Also, you are working with halfwords - anyway to up that to 32-bits and better take advantage of your CPU (and probably bus size)?

Miked0801 wrote:

I see quite a few mov / tst / lsr type ops. These almost always can be incorporated into arithmetic ops and removed.

Hmm.. Can you give an example? I'm just learning ARM ASM, so all tricks are appreciated!

Quote:

Also, you are working with halfwords - anyway to up that to 32-bits and better take advantage of your CPU (and probably bus size)?

Like doing 2 samples at a time? That might be worth trying. I tried to fit everything I need in the inner loops into registers, but I would run out of registers when trying to do two samples at the same time.

Btw, I managed now to have all 9 channels running, after doing the separate loops as suggested by Ruben, and unrolling the inner loops once (so I only test for buffer end once every two samples). I could probably also safely skip the envelope checks and new volume calculation for every second sample.

Anyways, it is starting to look like this might actually work, thanks again for your help!

Pate
_________________

Now working on DSx86 http://dsx86.patrickaalto.com
Get LineWarsDS from http://linewars.patrickaalto.com

Quote:

Hmm.. Can you give an example? I'm just learning ARM ASM, so all tricks are appreciated!

Most of the stuff on there is pretty well optimized in this sense, but he means something like

Code:

@ Eep, slow
mov r0, r1, asr #5
add r3, r0, r3

@ Yay, one opcode faster ^_^'
add r3, r3, r1, asr #5

Quote:

Like doing 2 samples at a time?

Probably. Depends on if you're using stereo or not. If you're not using stereo, then yes, 2 samples at once, thereby avoiding using 2 costly stores and replace it with 1.

Quote:

I could probably also safely skip the envelope checks and new volume calculation for every second sample.

You probably could and if you can, you should: if you have played any Japan-originating GBA game that use the 'Sappy' engine, pay attention to the envelopes: these are only updated once per *frame* and it's barely noticeable, so I think you can 'afford' to do this outside of the mixing loop.

Ruben wrote:

Quote:

Like doing 2 samples at a time?

Probably. Depends on if you're using stereo or not. If you're not using stereo, then yes, 2 samples at once, thereby avoiding using 2 costly stores and replace it with 1.

Yeah, I'm using mono (stereo in the AdLib/SoundBlaster world means having 2 OPL2 chips, each with their own 9 channels/18 operators, so that is pretty much out of reach with ARM7). And, I am already keeping two samples for slot1 in r3 register, where the high halfword is the previous sample and low halfword is the current sample. I'll just need to swap the meaning of those and store the full word, should be a straightforward change. I'll see if something similar could be done with slot2.

Quote:

You probably could and if you can, you should: if you have played any Japan-originating GBA game that use the 'Sappy' engine, pay attention to the envelopes: these are only updated once per *frame* and it's barely noticeable, so I think you can 'afford' to do this outside of the mixing loop.

Yeah, I'll have to see what effect that has. In the original code from DosBox that I am using as an example, the attack envelope does a table lookup and a MULTIPLY operation for each sample!

Edit: Actually, the DosBox emulator does a table lookup and multiply at 50000Hz per slot, so at 16384Hz I should do that 3 times per sample to make my code work exactly like the DosBox one. I don't think I will, though. :-)

Thanks for your tips again, it is very useful to hear other people's optimization ideas, you become so blind to problems in your own code.

Pate
_________________

Now working on DSx86 http://dsx86.patrickaalto.com
Get LineWarsDS from http://linewars.patrickaalto.com

I worked on the code again a bit yesterday, looks like handling two samples at a time is pretty easy for both slots. Thanks again for the idea!

When I want to replace the low halfword of register r3 with the value in r1, is this the optimal code?

Code:

lsr r3, #16
orr r3, r1, r3, lsl #16

How about when I need to add a halfword to the low halfword, without affecting the high halfword. Is this the best that can be done?

Code:

ror r3, #16
add r3, r1, lsl #16
ror r3, #16

The latter does not look very efficient, but I couldn't figure out a better way to do it...

Thanks!

Pate
_________________

Now working on DSx86 http://dsx86.patrickaalto.com
Get LineWarsDS from http://linewars.patrickaalto.com

Code:

lsr r3, #16
orr r3, r1, r3, lsl #16

Yes, I think that's the best you can get it to.

Code:

ror r3, #16
add r3, r1, lsl #16
ror r3, #16

Let's look at that again...

Code:

ror r3, #16 @ DDDDxxxx
add r3, r1, lsl #16 @ DDDD += r1
ror r3, #16 @ xxxxDDDD

@ Basically, you want to add a value into
@ DDDD, without affecting the upper hword.
@ If you're going to be doing this twice,
@ you can do this...

@ Sample 1: r3 swapped to DDDDxxxx
mov r1, lsl #16
add r3, r1, r3, ror #16

@ Sample 2: r3 swapped back to xxxxDDDD
mov r1, lsl #16
add r3, r1, r3, ror #16

Okay, I guess it's time for an update...

First off, I only noticed last weekend that all the problems I was having were caused by my playing the SAME buffer that I wrote into, not the other buffer! Argh.. So much for that being the easy part. :-)

What threw me off was that for some peculiar reason No$GBA sounds better when writing to the wrong buffer! The real hardware warbled horribly while No$GBA sounded reasonably clean with only a few clicks now and then (which I then assumed was caused by the CPU lagging behind the buffer fill). Last weekend I finally noticed that something was badly wrong when even only 2 channels caused similar problems.

I then finally after many hours of head scratching found that I had the buffers wrong, and when I switched those, real hardware suddenly sounded completely clean, while No$GBA began warbling. Strange...

Anyways, I then added some checks for CPU load and noticed that at 16kHz my code only took 20% of ARM7 power to handle all 9 channels. So, I immediately upped the mixing speed to 32kHz, and now the code takes around 40% CPU, which I think is fine as I want the ARM7 to do other things besides the AdLib emulation as well.

I still have various problems in the code that I need to fix, but looks like I have a reasonable speed margin now to add the missing features.

Thanks again for all your tips!

Pate
_________________

Now working on DSx86 http://dsx86.patrickaalto.com
Get LineWarsDS from http://linewars.patrickaalto.com

gbadev.org forum archive

ASM > AdLib emulation optimization help

#170293 - Pate - Wed Sep 16, 2009 6:17 am

#170295 - Ruben - Wed Sep 16, 2009 6:54 am

#170296 - Pate - Wed Sep 16, 2009 7:09 am

#170297 - Ruben - Wed Sep 16, 2009 7:13 am

#170305 - Miked0801 - Wed Sep 16, 2009 2:46 pm

#170312 - Pate - Wed Sep 16, 2009 3:57 pm

#170314 - Ruben - Wed Sep 16, 2009 4:14 pm

#170335 - Pate - Thu Sep 17, 2009 5:10 am

#170352 - Pate - Fri Sep 18, 2009 4:57 am

#170354 - Ruben - Fri Sep 18, 2009 4:03 pm

#170392 - Pate - Mon Sep 21, 2009 4:58 am