gbadev.org forum archive

This is a read-only mirror of the content originally found on forum.gbadev.org (now offline), salvaged from Wayback machine copies. A new forum can be found here.

C/C++ > GCC optimisation error

#90325 - Schultz - Thu Jun 29, 2006 6:40 pm

As I compiled my code with the gcc as follows:

gcc DMA.c -c -O1

No mistake were found while running the code.
However, when I compiled it such as follows:

gcc DMA.c -c -O2

A huge bug took place and when I called my function mem_copy(word *, word *, hword), which was in the above module, the data would not move from source to destiny.

Futhermore, the game itself and all the other modules appeared to show no mistake as I compiled them even with -O3.

Provided that the code have not slightly changed, what could the cause of the error regarding optimisation be?

Thanks in advance.

#90326 - tepples - Thu Jun 29, 2006 6:55 pm

Optimization "bugs" usually happen when you forget a volatile.
_________________
-- Where is he?
-- Who?
-- You know, the human.
-- I think he moved to Tilwick.

#90478 - Schultz - Fri Jun 30, 2006 11:59 am

Volatiles were not forgotten.
The module in which the error took place was optimised with -O1, which normally causes errors with volatiles.
Futhermore, the module makes only writes to IO addresses and a volatile error happens only as a read takes place.

Still, I cannot know the cause of the error.
The code was the following:

DMA.h
Code:

#ifndef      __DMA
#define      __DMA

/*
   Includes
*/
#include   "schultz.h"
#include   "GBA.h"

/*
   Constants
*/
/*   DMA registers   */
#define      DMA_ENABLED            ( 1 << 15 )
#define      DMA_IRQ_ENABLED            ( 1 << 14 )
#define      DMA_WORD_TRANSFER         ( 1 << 10 )

/*
   External
*/
extern   void   DMA_transfer(int, word *, word *, hword, hword);
extern   void   mem_copy(word *, word *, hword);

/*
   Inline Functions
*/

#endif



DMA.c
Code:

#include   "DMA.h"

void   DMA_transfer(int channel, word *src, word *dest, hword size, hword ctrl) {
   DMA_SRC(channel) = src;
   DMA_DEST(channel) = dest;
   DMA_CNT(channel) = size;
   DMA_CTRL(channel) = ctrl;
}

void   mem_copy(word *src, word *dest, hword size) {
   DMA_SRC(3) = src;
   DMA_DEST(3) = dest;
   DMA_CNT(3) = size;
   DMA_CTRL(3) = DMA_ENABLED | DMA_WORD_TRANSFER;
}



GBA.h
Code:

#ifndef      __GBA
#define      __GBA

/*
   GBA DEFINITIONS
   Schultz, die Zahlberer
*/

/*
   Defines the GBA hardware.
*/

/*      Definitions   */
#include   "schultz.h"

/*
   Memory Units
*/
#define      W            1
#define      KW            1024

/*
   Memory Addresses
*/
#define      BIOS_ROM         ((word *)   0x00000000)
#define      EWRAM            ((word *)   0x02000000)
#define      IWRAM            ((word *)   0x03000000)
#define      IO            ((word *)   0x04000000)
#define      PAL_RAM            ((word *)   0x05000000)
#define      VRAM            ((word *)   0x06000000)
#define      OAM            ((word *)   0x07000000)
#define      CROM            ((word *)   0x08000000)
#define      _CROM(i)         ((word *)   0x08000000 + 0x00100000 * (i))

/*
   IO registers
*/
#define      DISP_CTRL         *((hword *)   (0x04000000))
#define      DISP_STAT         *((hword *)   (0x04000004))
#define      VCOUNT            *((hword *)   (0x04000006))
#define      BG_CTRL(i)         *((hword *)   (0x04000008 + 0x0002 * (i)))
#define      BG_HOFFSET(i)         *((hword *)   (0x04000010 + 0x0002 * (i)))
#define      BG_VOFFSET(i)         *((hword *)   (0x04000016 + 0x0002 * (i)))
#define      BG_ROT(i, a)         *((hword *)   (0x04000020 + 0x0002 * (a) + 0x0010 * ((i) - 2)))
#define      BG_X(i)            *((word *)   (0x04000028 + 0x0010 * ((i) - 2)))
#define      BG_Y(i)            *((word *)   (0x0400002C + 0x0010 * ((i) - 2)))
#define      MOSAIC_CTRL         *((hword *)   (0x0400004C))
#define      DMA_SRC(i)         (word *) (*((word *)   (0x040000B0 + 0x000C * (i))))
#define      DMA_DEST(i)         (word *) (*((word *)   (0x040000B4 + 0x000C * (i))))
#define      DMA_CNT(i)         *((hword *)   (0x040000B8 + 0x000C * (i)))
#define      DMA_CTRL(i)         *((hword *)   (0x040000BA + 0x000C * (i)))
#define      TIMER_CNT(i)         *((hword *)   (0x04000100 + 0x0004 * (i)))
#define      TIMER_CTR(i)         *((hword *)   (0x04000104 + 0x0004 * (i)))
#define      KEYPAD            *((volatile hword *)   (0x04000130))
#define      KEYPAD_CTRL         *((hword *)   (0x04000132))
#define      IE            *((hword *)   (0x04000200))
#define      IF            *((hword *)   (0x04000202))
#define      WAIT_CTRL         *((hword *)   (0x04000204))
#define      IME            *((hword *)   (0x04000208))
#define      HALT_CTRL         *((qword *)   (0x04000301))

#endif



schultz.h

Code:

#ifndef      __SCHULTZ
#define      __SCHULTZ

/*
   SCHULTZ
   Schultz, die Zahlberer
*/

/*
   Definitions for use with 32 bit ARM 7TDMI chip.
*/

/*
   Integer constants
*/
/*   CPU word   */
typedef   unsigned long int   word;

/*   CPU hword   */
typedef unsigned short int   hword;

/*   CPU quarter word   */
typedef   unsigned char      qword;

/*   fixed point   */
typedef   int   real;
#define      product(a, b)            ( ( (a) * (b) ) >> 8 )
#define      _real(x)            ( (x) << 8 )

#endif




That is all there is to it.
Can someone please help.
I am falling in despair.

Thanks in advance.

#90484 - kusma - Fri Jun 30, 2006 1:16 pm

declare all memory-mapped hw-registers as volatile.

#90493 - tepples - Fri Jun 30, 2006 2:01 pm

Schultz wrote:
Volatiles were not forgotten.

[snip]

GBA.h
Code:

#ifndef      __GBA
#define      __GBA

// snip

#define      DMA_SRC(i)         (word *) (*((word *)   (0x040000B0 + 0x000C * (i))))
#define      DMA_DEST(i)         (word *) (*((word *)   (0x040000B4 + 0x000C * (i))))
#define      DMA_CNT(i)         *((hword *)   (0x040000B8 + 0x000C * (i)))
#define      DMA_CTRL(i)         *((hword *)   (0x040000BA + 0x000C * (i)))



I don't see any volatiles here.
_________________
-- Where is he?
-- Who?
-- You know, the human.
-- I think he moved to Tilwick.

#90531 - Cearn - Fri Jun 30, 2006 5:23 pm

All mem_copy() and DMA_Transfer() do here is write to the (global) registers once, so the volatility of the things are not the issue. That only matters when you read from or write to them multiple times consecutively. The problem is elsewhere.

Your data wouldn't be a byte or halfword array in the same file, would it? (Yes, I am thinking of data alignment)

Also:
Code:
#define      DMA_SRC(i)         (word *) (*((word *)   (0x040000B0 + 0x000C * (i))))

Copy-paste error? I can't even get it compiled with the outer (word *).

#90552 - col - Fri Jun 30, 2006 7:52 pm

After a vague recollection of some technical gotcha, I found this in Gbatek:

"After changing the Enable bit from 0 to 1, wait 2 clock cycles before accessing any DMA related registers."

Not sure if this is correct, but if so, maybe the optimiser is treating your two 16 bit registers DMA_CNT and DMA_CTRL as a single 32 bit register, and that is causing some weirdness? if you make them both volatile, this will not happen.

#90557 - Schultz - Fri Jun 30, 2006 8:33 pm

I shall try to assume both of them are volatile.
Nevertheless, while programming games for GBA in assembly two months ago, I used to write to both registers at once, causing no mistake.
Thanks anyway, it was of great help.

#90568 - sniper - Fri Jun 30, 2006 9:25 pm

col wrote:
After a vague recollection of some technical gotcha, I found this in Gbatek:

"After changing the Enable bit from 0 to 1, wait 2 clock cycles before accessing any DMA related registers."

Not sure if this is correct, but if so, maybe the optimiser is treating your two 16 bit registers DMA_CNT and DMA_CTRL as a single 32 bit register, and that is causing some weirdness? if you make them both volatile, this will not happen.


Yes thats right, the hardware is buggy even the DMA. Take care to handle with it.

#90574 - sajiimori - Fri Jun 30, 2006 10:20 pm

Hey Schultz, isn't the word "Zauberer"? Just curious -- I don't know German.

#90575 - Cearn - Fri Jun 30, 2006 10:49 pm

gcc -S -mthumb -O2 wrote:
ldr r3,=0x040000D4
str r0, [r3]
add r3, r3, #4
str r1, [r3]
add r3, r3, #4
strh r2, [r3]
ldr r2,=0x8400
add r3, r3, #2
strh r2, [r3]
bx lr

This is what mem_copy() actually compiles to under devkitadv r5b3 and devkitpro r19a, -O1 and -O2, volatile and non-volatile DMA-regs. As you can see, it's pretty much what you'd expect. The function works fine for me too. The problem is probably how you're using it.

sajimori wrote:
Hey Schultz, isn't the word "Zauberer"? Just curious -- I don't know German.
Zauberer = wizard/sorcerer
Zahl = number
Zahlberer= mathemagician? Heh, nice.