Como criar programas ARM bare metal e executá-los no QEMU?

Estou tentando entendertutorial funcionar como pretendido sem sucesso (algo falha após obl principal instrução). De acordo com o tutorial, o comando

(qemu) xp /1dw 0xa0000018  

deve resultar na impressão 33 (mas recebo 0x00)

a0000018:         33

Este é o conteúdo dos registros após a chamada principal (consulte startup.s)

(qemu) info registers 
R00=a000001c R01=a000001c R02=00000006 R03=00000000
R04=00000000 R05=00000005 R06=00000006 R07=00000007
R08=00000008 R09=00000009 R10=00000000 R11=a3fffffc
R12=00000000 R13=00000000 R14=0000003c R15=00000004
PSR=800001db N--- A und32
FPSCR: 00000000

Eu tenho os seguintes arquivos

  main.c
  startup.s
  lscript.ld
  Makefile        

E eu estou usando a seguinte cadeia de ferramentas

arm-2013.11-24-arm-none-eabi-i686-pc-linux-gnu

Makefile:

SRCS := main.c startup.s

LINKER_NAME := lscript.ld
ELF_NAME := program.elf
BIN_NAME := program.bin
FLASH_NAME := flash.bin

CC := arm-none-eabi
CFLAGS := -nostdlib
OBJFLAGS ?= -DS
QEMUFLAGS := -M connex -pflash $(FLASH_NAME) -nographic -serial /dev/null

# Allocate 16MB to use as a virtual flash for th qemu
# bs = blocksize -> 4KB
# count = number of block -> 4096
# totalsize = 16MB
setup:
    dd if=/dev/zero of=$(FLASH_NAME) bs=4096 count=4096

# Compile srcs and write to virtual flash
all: clean setup
    $(CC)-gcc $(CFLAGS) -o $(ELF_NAME) -T $(LINKER_NAME) $(SRCS)
    $(CC)-objcopy -O binary $(ELF_NAME) $(BIN_NAME)
    dd if=$(BIN_NAME) of=$(FLASH_NAME) bs=4096 conv=notrunc

objdump:
    $(CC)-objdump $(OBJFLAGS) $(ELF_NAME)

mem-placement:
    $(CC)-nm -n $(ELF_NAME)

qemu:
    qemu-system-arm $(QEMUFLAGS) 

clean:
    rm -rf *.bin
    rm -rf *.elf

main.c:

static int arr[] = { 1, 10, 4, 5, 6, 7 };
static int sum;
static const int n = sizeof(arr) / sizeof(arr[0]);

int main()
{
    int i;

    for (i = 0; i < n; i++){
        sum += arr[i];
    }
    return 0;
}

iniciantes:

.section "vectors"
reset:  b     _start
undef:  b     undef
swi:    b     swi
pabt:   b     pabt
dabt:   b     dabt
    nop
irq:    b     irq
fiq:    b     fiq

    .text
_start:
init:
    @@ Copy data to RAM.
    ldr   r0, =flash_sdata
    ldr   r1, =ram_sdata
    ldr   r2, =data_size

    @@ Handle data_size == 0
    cmp   r2, #0
    beq   init_bss

copy:
    ldrb   r4, [r0], #1
    strb   r4, [r1], #1
    subs   r2, r2, #1
    bne    copy

init_bss:
    @@ Initialize .bss
    ldr   r0, =sbss
    ldr   r1, =ebss
    ldr   r2, =bss_size

    @@ Handle bss_size == 0
    cmp   r2, #0
    beq   init_stack
    mov   r4, #0

zero:
    strb  r4, [r0], #1
    subs  r2, r2, #1
    bne   zero

init_stack:
    @@ Initialize the stack pointer
    ldr   sp, =0xA4000000

    @@ **this call dosent work as expected.. (r13/sp contains 0xA4000000)**
    bl    main

    @@ Dosent return from main
    @@ r0 should now contain 33 
stop:
    b     stop

lscript.ld:

/*
 * Linker for testing purposes 
 * (using 16 MB virtual flash = 0x0100_0000)
 */

MEMORY {
    rom (rx)  : ORIGIN = 0x00000000, LENGTH = 0x01000000
    ram (rwx) : ORIGIN = 0xA0000000, LENGTH = 0x04000000
}


SECTIONS {
    .text : {
          * (vectors);
          * (.text);
    } > rom

    .rodata : {
          * (.rodata);
    } > rom

    flash_sdata = .;
    ram_sdata = ORIGIN(ram);

    .data : AT (flash_sdata) {
          * (.data);
    } > ram

    ram_edata = .;
    data_size = ram_edata - ram_sdata;

    sbss = .;
    .bss : {
         * (.bss);
    } > ram
    ebss = .;
    bss_size = ebss - sbss;

    /DISCARD/ : {
      *(.note*)
      *(.comment)
      *(.ARM*)
      /*
      *(.debug*)
      */
    }
} 

Desmontagem do executável (objdump):

program.elf:     file format elf32-littlearm

Disassembly of section .text:
00000000 <reset>:
0:  ea000023    b   94 <_start>

00000004 <undef>:
4:  eafffffe    b   4 <undef>

00000008 <swi>:
8:  eafffffe    b   8 <swi>

0000000c <pabt>:
c:  eafffffe    b   c <pabt>

00000010 <dabt>:
10: eafffffe    b   10 <dabt>
14: e320f000    nop {0} 

00000018 <irq>:
18: eafffffe    b   18 <irq>

0000001c <fiq>:
1c: eafffffe    b   1c <fiq>

00000020 <main>:
20: e52db004    push    {fp}        ; (str fp, [sp, #-4]!)
24: e28db000    add fp, sp, #0
28: e24dd00c    sub sp, sp, #12
2c: e3a03000    mov r3, #0
30: e50b3008    str r3, [fp, #-8]
34: ea00000d    b   70 <main+0x50>
38: e3003000    movw    r3, #0
3c: e34a3000    movt    r3, #40960  ; 0xa000
40: e51b2008    ldr r2, [fp, #-8]
44: e7932102    ldr r2, [r3, r2, lsl #2]
48: e3003018    movw    r3, #24
4c: e34a3000    movt    r3, #40960  ; 0xa000
50: e5933000    ldr r3, [r3]
54: e0822003    add r2, r2, r3
58: e3003018    movw    r3, #24
5c: e34a3000    movt    r3, #40960  ; 0xa000
60: e5832000    str r2, [r3]
64: e51b3008    ldr r3, [fp, #-8]
68: e2833001    add r3, r3, #1
6c: e50b3008    str r3, [fp, #-8]
70: e3a02006    mov r2, #6
74: e51b3008    ldr r3, [fp, #-8]
78: e1530002    cmp r3, r2
7c: baffffed    blt 38 <main+0x18>
80: e3a03000    mov r3, #0
84: e1a00003    mov r0, r3
88: e24bd000    sub sp, fp, #0
8c: e49db004    pop {fp}        ; (ldr fp, [sp], #4)
90: e12fff1e    bx  lr 

00000094 <_start>:
94: e59f004c    ldr r0, [pc, #76]   ; e8 <stop+0x4>
98: e59f104c    ldr r1, [pc, #76]   ; ec <stop+0x8>
9c: e59f204c    ldr r2, [pc, #76]   ; f0 <stop+0xc>
a0: e3520000    cmp r2, #0
a4: 0a000003    beq b8 <init_bss> 

000000a8 <copy>:
a8: e4d04001    ldrb    r4, [r0], #1
ac: e4c14001    strb    r4, [r1], #1
b0: e2522001    subs    r2, r2, #1
b4: 1afffffb    bne a8 <copy> 

000000b8 <init_bss>:
b8: e59f0034    ldr r0, [pc, #52]   ; f4 <stop+0x10>
bc: e59f1034    ldr r1, [pc, #52]   ; f8 <stop+0x14>
c0: e59f2034    ldr r2, [pc, #52]   ; fc <stop+0x18>
c4: e3520000    cmp r2, #0
c8: 0a000003    beq dc <init_stack>
cc: e3a04000    mov r4, #0

000000d0 <zero>:
d0: e4c04001    strb    r4, [r0], #1
d4: e2522001    subs    r2, r2, #1
d8: 1afffffc    bne d0 <zero>  

000000dc <init_stack>:
dc: e3a0d329    mov sp, #-1543503872    ; 0xa4000000
e0: ebffffce    bl  20 <main> 

000000e4 <stop>:
e4: eafffffe    b   e4 <stop>
e8: 00000104    andeq   r0, r0, r4, lsl #2
ec: a0000000    andge   r0, r0, r0
f0: 00000018    andeq   r0, r0, r8, lsl r0
f4: a0000018    andge   r0, r0, r8, lsl r0
f8: a000001c    andge   r0, r0, ip, lsl r0
fc: 00000004    andeq   r0, r0, r4

Disassembly of section .rodata:
00000100 <n>:
100:    00000006    andeq   r0, r0, r6

Disassembly of section .data:
a0000000 <arr>:
a0000000:   00000001    andeq   r0, r0, r1
a0000004:   0000000a    andeq   r0, r0, sl
a0000008:   00000004    andeq   r0, r0, r4
a000000c:   00000005    andeq   r0, r0, r5
a0000010:   00000006    andeq   r0, r0, r6
a0000014:   00000007    andeq   r0, r0, r7

Disassembly of section .bss: 
a0000018 <sum>:
a0000018:   00000000    andeq   r0, r0, r0

Alguém pode me indicar a direção certa de por que isso não está funcionando de acordo com minhas expectativas?

Obrigado Henrik

questionAnswers(3)

yourAnswerToTheQuestion