链接脚本
以fw_jump
固件类型为例,链接脚本是firmware/fw_jump.elf.ldS
,内容如下:
OUTPUT_ARCH(riscv)
ENTRY(_start)
SECTIONS
{
#include "fw_base.ldS"
PROVIDE(_fw_reloc_end = .);
}
- OUTPUT_ARCH:指定输出文件的处理器架构,这里是
riscv
- ENTRY:用来设置程序的入口,这里是
_start
- SECTIONS:描述输出文件的内存布局,这里又引用了
fw_base.ldS
这个链接脚本 - PROVIDE:定义一个符号的值,这里是指定
_fw_reloc_end
为.
(“.”表示当前位置计数器)
fw_base.ldS
链接脚本内容如下:
. = FW_TEXT_START;
/* Don't add any section between FW_TEXT_START and _fw_start */
PROVIDE(_fw_start = .);
. = ALIGN(0x1000); /* Need this to create proper sections */
/* Beginning of the code section */
.text :
{
PROVIDE(_text_start = .);
*(.entry)
*(.text)
. = ALIGN(8);
PROVIDE(_text_end = .);
}
/* End of the code sections */
. = ALIGN(0x1000); /* Ensure next section is page aligned */
/* Beginning of the read-only data sections */
PROVIDE(_rodata_start = .);
.rodata :
{
*(.rodata .rodata.*)
. = ALIGN(8);
}
.dynsym :
{
*(.dynsym)
}
. = ALIGN(0x1000); /* Ensure next section is page aligned */
.rela.dyn : {
PROVIDE(__rel_dyn_start = .);
*(.rela*)
PROVIDE(__rel_dyn_end = .);
}
PROVIDE(_rodata_end = .);
/* End of the read-only data sections */
/*
* PMP regions must be to be power-of-2. RX/RW will have separate
* regions, so ensure that the split is power-of-2.
*/
. = ALIGN(1 << LOG2CEIL((SIZEOF(.rodata) + SIZEOF(.text)
+ SIZEOF(.dynsym) + SIZEOF(.rela.dyn))));
PROVIDE(_fw_rw_start = .);
/* Beginning of the read-write data sections */
.data :
{
PROVIDE(_data_start = .);
*(.sdata)
*(.sdata.*)
*(.data)
*(.data.*)
*(.readmostly.data)
*(*.data)
. = ALIGN(8);
PROVIDE(_data_end = .);
}
. = ALIGN(0x1000); /* Ensure next section is page aligned */
.bss :
{
PROVIDE(_bss_start = .);
*(.sbss)
*(.sbss.*)
*(.bss)
*(.bss.*)
. = ALIGN(8);
PROVIDE(_bss_end = .);
}
/* End of the read-write data sections */
. = ALIGN(0x1000); /* Need this to create proper sections */
PROVIDE(_fw_end = .);
- FW_TEXT_START:定义OpenSBI固件的链接地址,如果未定义,默认链接到0地址
- ALIGN:基于当前位置,返回下一个与align字节对齐的地址
- .text:定义输出文件的代码段组成
- .rodata:定义输出文件的只读数据段组成
- .data:定义输出文件的读写数据段组成
- .bss:定义输出文件的未初始化的数据段组成
启动汇编
寻找启动HART
根据ENTRY(_start)
定义,复位的入口函数为_start
,在firmware/fw_base.S
中实现:
_start:
/* Find preferred boot HART id */
MOV_3R s0, a0, s1, a1, s2, a2
call fw_boot_hart
add a6, a0, zero
MOV_3R a0, s0, a1, s1, a2, s2
li a7, -1
beq a6, a7, _try_lottery
/* Jump to relocation wait loop if we are not boot hart */
bne a0, a6, _wait_for_boot_hart
_try_lottery:
/* Jump to relocation wait loop if we don't get relocation lottery */
lla a6, _boot_status
li a7, BOOT_STATUS_LOTTERY_DONE
amoswap.w a6, a7, (a6)
bnez a6, _wait_for_boot_hart
上面代码主要是寻找启动硬件线程ID,主要是针对多核启动的情形。在启动的前一阶段可以通过a0
、a1
和a2
寄存器传递参数信息给OpenSBI:
- a0:硬件线程ID
- a1:设备树地址
- a2:动态固件信息的内存地址,即
struct fw_dynamic_info
的地址
函数fw_boot_hart
是确定启动的硬件线程ID,对于fw_jump
和fw_payload
固件类型都是返回-1,而对于fw_dynamic
固件类型返回从struct fw_dynamic_info
读到的硬件线程ID。如果没有找到启动硬件线程ID,需要通过_try_lottery
函数随机抽签选择启动的HART,然后跳到下一步重定位过程(relocate)。如果是从核(非启动核)执行至此,直接跳转_wait_for_boot_hart
函数等待主核启动完成。
/* waiting for boot hart to be done (_boot_status == 2) */
_wait_for_boot_hart:
li t0, BOOT_STATUS_BOOT_HART_DONE
lla t1, _boot_status
REG_L t1, 0(t1)
/* Reduce the bus traffic so that boot hart may proceed faster */
div t2, t2, zero
div t2, t2, zero
div t2, t2, zero
bne t0, t1, _wait_for_boot_hart
加载重定位
如果程序的加载地址和链接地址不一样,需要执行加载重定位,即将代码段从加载地址处拷贝到链接地址,以便程序在链接地址处执行。
/* relocate the global table content */
li t0, FW_TEXT_START /* link start */
lla t1, _fw_start /* load start */
sub t2, t1, t0 /* load offset */
lla t0, __rel_dyn_start
lla t1, __rel_dyn_end
beq t0, t1, _relocate_done
2:
REG_L t5, REGBYTES(t0) /* t5 <-- relocation info:type */
li t3, R_RISCV_RELATIVE /* reloc type R_RISCV_RELATIVE */
bne t5, t3, 3f
REG_L t3, 0(t0)
REG_L t5, (REGBYTES * 2)(t0) /* t5 <-- addend */
add t5, t5, t2
add t3, t3, t2
REG_S t5, 0(t3) /* store runtime address to the GOT entry */
3:
addi t0, t0, (REGBYTES * 3)
blt t0, t1, 2b
初始化
接下来进行一系列初始化和设置操作,如寄存器复位,BSS段清零,建立临时的异常处理程序,设置临时堆栈等。
_relocate_done:
/* At this point we are running from link address */
/* Reset all registers except ra, a0, a1, a2, a3 and a4 for boot HART */
li ra, 0
call _reset_regs
/* Zero-out BSS */
lla s4, _bss_start
lla s5, _bss_end
_bss_zero:
REG_S zero, (s4)
add s4, s4, __SIZEOF_POINTER__
blt s4, s5, _bss_zero
/* Setup temporary trap handler */
lla s4, _start_hang
csrw CSR_MTVEC, s4
/* Setup temporary stack */
lla s4, _fw_end
li s5, (SBI_SCRATCH_SIZE * 2)
add sp, s4, s5
/* Allow main firmware to save info */
MOV_5R s0, a0, s1, a1, s2, a2, s3, a3, s4, a4
call fw_save_info
MOV_5R a0, s0, a1, s1, a2, s2, a3, s3, a4, s4
#ifdef FW_FDT_PATH
/* Override previous arg1 */
lla a1, fw_fdt_bin
#endif
_reset_regs
函数复位所有寄存器(除ra,a0-a4之外)。BSS段清零是_bss_start
和_bss_end
之间的内存空间。临时的异常处理程序为_start_hang
函数。临时的堆栈大小为SBI_SCRATCH_SIZE的2倍,即8KB。fw_save_info
是暂时保存相关信息,这主要是针对fw_dynamic
固件类型的信息,即保存下一阶段的启动地址、运行模式以及OpenSBI库的选项(在运行时是否输出)。
如果OpenSBI定义了设备树固件路径,就将固件地址加载到a1寄存器;否则还是使用上一阶段的设备树地址。
平台初始化
接下来是针对特定平台进行初始化。
/*
* Initialize platform
* Note: The a0 to a4 registers passed to the
* firmware are parameters to this function.
*/
MOV_5R s0, a0, s1, a1, s2, a2, s3, a3, s4, a4
call fw_platform_init
add t0, a0, zero
MOV_5R a0, s0, a1, s1, a2, s2, a3, s3, a4, s4
add a1, t0, zero
首先是调用fw_platform_init
函数进行平台相关的初始化,入参是a0~a4寄存器,即arg0是启动HART id,arg1是上一阶段传递过来的FDT地址,函数返回的是FDT的位置。
对于通用平台,主要是解析设备树FDT,获取平台相关的参数,如平台名,支持的特性,HART数量等,并存储到platform
结构体中,代码如下:
/*
* The fw_platform_init() function is called very early on the boot HART
* OpenSBI reference firmwares so that platform specific code get chance
* to update "platform" instance before it is used.
*
* The arguments passed to fw_platform_init() function are boot time state
* of A0 to A4 register. The "arg0" will be boot HART id and "arg1" will
* be address of FDT passed by previous booting stage.
*
* The return value of fw_platform_init() function is the FDT location. If
* FDT is unchanged (or FDT is modified in-place) then fw_platform_init()
* can always return the original FDT location (i.e. 'arg1') unmodified.
*/
unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
unsigned long arg2, unsigned long arg3,
unsigned long arg4)
{
const char *model;
void *fdt = (void *)arg1;
u32 hartid, hart_count = 0;
int rc, root_offset, cpus_offset, cpu_offset, len;
root_offset = fdt_path_offset(fdt, "/");
if (root_offset < 0)
goto fail;
fw_platform_lookup_special(fdt, root_offset);
if (generic_plat && generic_plat->fw_init)
generic_plat->fw_init(fdt, generic_plat_match);
model = fdt_getprop(fdt, root_offset, "model", &len);
if (model)
sbi_strncpy(platform.name, model, sizeof(platform.name) - 1);
if (generic_plat && generic_plat->features)
platform.features = generic_plat->features(generic_plat_match);
cpus_offset = fdt_path_offset(fdt, "/cpus");
if (cpus_offset < 0)
goto fail;
fdt_for_each_subnode(cpu_offset, fdt, cpus_offset) {
rc = fdt_parse_hart_id(fdt, cpu_offset, &hartid);
if (rc)
continue;
if (SBI_HARTMASK_MAX_BITS <= hartid)
continue;
if (!fdt_node_is_enabled(fdt, cpu_offset))
continue;
generic_hart_index2id[hart_count++] = hartid;
}
platform.hart_count = hart_count;
platform.heap_size = fw_platform_calculate_heap_size(hart_count);
platform_has_mlevel_imsic = fdt_check_imsic_mlevel(fdt);
fw_platform_coldboot_harts_init(fdt);
/* Return original FDT pointer */
return arg1;
fail:
while (1)
wfi();
}
接着是从platform
结构体中加载HART的一些参数到通用寄存器:
- s7:HART数量
- s8:HART堆栈大小
- s9:堆大小
- s10:堆偏移
/* Preload HART details
* s7 -> HART Count
* s8 -> HART Stack Size
* s9 -> Heap Size
* s10 -> Heap Offset
*/
lla a4, platform
#if __riscv_xlen > 32
lwu s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4)
lwu s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4)
lwu s9, SBI_PLATFORM_HEAP_SIZE_OFFSET(a4)
#else
lw s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4)
lw s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4)
lw s9, SBI_PLATFORM_HEAP_SIZE_OFFSET(a4)
#endif
初始化scratch空间
Scratch空间是临时工作区域,用于存储每个HART(硬件线程)启动和运行时的临时数据。OpenSBI定义的scratch空间大小为4KB,使用sbi_scratch
数据结构,定义如下:
/** Representation of per-HART scratch space */
struct sbi_scratch {
/** Start (or base) address of firmware linked to OpenSBI library */
unsigned long fw_start;
/** Size (in bytes) of firmware linked to OpenSBI library */
unsigned long fw_size;
/** Offset (in bytes) of the R/W section */
unsigned long fw_rw_offset;
/** Offset (in bytes) of the heap area */
unsigned long fw_heap_offset;
/** Size (in bytes) of the heap area */
unsigned long fw_heap_size;
/** Arg1 (or 'a1' register) of next booting stage for this HART */
unsigned long next_arg1;
/** Address of next booting stage for this HART */
unsigned long next_addr;
/** Privilege mode of next booting stage for this HART */
unsigned long next_mode;
/** Warm boot entry point address for this HART */
unsigned long warmboot_addr;
/** Address of sbi_platform */
unsigned long platform_addr;
/** Address of HART ID to sbi_scratch conversion function */
unsigned long hartid_to_scratch;
/** Address of current trap context */
unsigned long trap_context;
/** Temporary storage */
unsigned long tmp0;
/** Options for OpenSBI library */
unsigned long options;
};
下面的代码是循环创建和初始化所有HART的Scratch空间,即填充上面的sbi_scratch
结构体。
/* Setup scratch space for all the HARTs*/
lla tp, _fw_end
mul a5, s7, s8
add tp, tp, a5
/* Setup heap base address */
lla s10, _fw_start
sub s10, tp, s10
add tp, tp, s9
/* Keep a copy of tp */
add t3, tp, zero
/* Counter */
li t2, 1
/* hartid 0 is mandated by ISA */
li t1, 0
_scratch_init:
/*
* The following registers hold values that are computed before
* entering this block, and should remain unchanged.
*
* t3 -> the firmware end address
* s7 -> HART count
* s8 -> HART stack size
* s9 -> Heap Size
* s10 -> Heap Offset
*/
add tp, t3, zero
sub tp, tp, s9
mul a5, s8, t1
sub tp, tp, a5
li a5, SBI_SCRATCH_SIZE
sub tp, tp, a5
/* Initialize scratch space */
/* Store fw_start and fw_size in scratch space */
lla a4, _fw_start
sub a5, t3, a4
REG_S a4, SBI_SCRATCH_FW_START_OFFSET(tp)
REG_S a5, SBI_SCRATCH_FW_SIZE_OFFSET(tp)
/* Store R/W section's offset in scratch space */
lla a5, _fw_rw_start
sub a5, a5, a4
REG_S a5, SBI_SCRATCH_FW_RW_OFFSET(tp)
/* Store fw_heap_offset and fw_heap_size in scratch space */
REG_S s10, SBI_SCRATCH_FW_HEAP_OFFSET(tp)
REG_S s9, SBI_SCRATCH_FW_HEAP_SIZE_OFFSET(tp)
/* Store next arg1 in scratch space */
MOV_3R s0, a0, s1, a1, s2, a2
call fw_next_arg1
REG_S a0, SBI_SCRATCH_NEXT_ARG1_OFFSET(tp)
MOV_3R a0, s0, a1, s1, a2, s2
/* Store next address in scratch space */
MOV_3R s0, a0, s1, a1, s2, a2
call fw_next_addr
REG_S a0, SBI_SCRATCH_NEXT_ADDR_OFFSET(tp)
MOV_3R a0, s0, a1, s1, a2, s2
/* Store next mode in scratch space */
MOV_3R s0, a0, s1, a1, s2, a2
call fw_next_mode
REG_S a0, SBI_SCRATCH_NEXT_MODE_OFFSET(tp)
MOV_3R a0, s0, a1, s1, a2, s2
/* Store warm_boot address in scratch space */
lla a4, _start_warm
REG_S a4, SBI_SCRATCH_WARMBOOT_ADDR_OFFSET(tp)
/* Store platform address in scratch space */
lla a4, platform
REG_S a4, SBI_SCRATCH_PLATFORM_ADDR_OFFSET(tp)
/* Store hartid-to-scratch function address in scratch space */
lla a4, _hartid_to_scratch
REG_S a4, SBI_SCRATCH_HARTID_TO_SCRATCH_OFFSET(tp)
/* Clear trap_context and tmp0 in scratch space */
REG_S zero, SBI_SCRATCH_TRAP_CONTEXT_OFFSET(tp)
REG_S zero, SBI_SCRATCH_TMP0_OFFSET(tp)
/* Store firmware options in scratch space */
MOV_3R s0, a0, s1, a1, s2, a2
#ifdef FW_OPTIONS
li a0, FW_OPTIONS
#else
call fw_options
#endif
REG_S a0, SBI_SCRATCH_OPTIONS_OFFSET(tp)
MOV_3R a0, s0, a1, s1, a2, s2
/* Move to next scratch space */
add t1, t1, t2
blt t1, s7, _scratch_init
下面是scratch空间的示意图,每个HART分配8KB空间,包括4KB的栈空间和4KB的scratch空间。其中Scratch空间前112字节是存储struct sbi_scratch
结构体。
重定位FDT
如果编译OpenSBI时,提供了设备树的文件路径(即FW_FDT_PATH
参数,链接放到rodata
段中,变量为fw_fdt_bin
),或者上一阶段传递了设备数的地址,就需要进行设备树重定位,即将FDT从源地址拷贝到目标地址。
/*
* Relocate Flatened Device Tree (FDT)
* source FDT address = previous arg1
* destination FDT address = next arg1
*
* Note: We will preserve a0 and a1 passed by
* previous booting stage.
*/
beqz a1, _fdt_reloc_done
/* Mask values in a4 */
li a4, 0xff
/* t1 = destination FDT start address */
MOV_3R s0, a0, s1, a1, s2, a2
call fw_next_arg1
add t1, a0, zero
MOV_3R a0, s0, a1, s1, a2, s2
beqz t1, _fdt_reloc_done
beq t1, a1, _fdt_reloc_done
/* t0 = source FDT start address */
add t0, a1, zero
/* t2 = source FDT size in big-endian */
#if __riscv_xlen > 32
lwu t2, 4(t0)
#else
lw t2, 4(t0)
#endif
/* t3 = bit[15:8] of FDT size */
add t3, t2, zero
srli t3, t3, 16
and t3, t3, a4
slli t3, t3, 8
/* t4 = bit[23:16] of FDT size */
add t4, t2, zero
srli t4, t4, 8
and t4, t4, a4
slli t4, t4, 16
/* t5 = bit[31:24] of FDT size */
add t5, t2, zero
and t5, t5, a4
slli t5, t5, 24
/* t2 = bit[7:0] of FDT size */
srli t2, t2, 24
and t2, t2, a4
/* t2 = FDT size in little-endian */
or t2, t2, t3
or t2, t2, t4
or t2, t2, t5
/* t2 = destination FDT end address */
add t2, t1, t2
/* FDT copy loop */
ble t2, t1, _fdt_reloc_done
_fdt_reloc_again:
REG_L t3, 0(t0)
REG_S t3, 0(t1)
add t0, t0, __SIZEOF_POINTER__
add t1, t1, __SIZEOF_POINTER__
blt t1, t2, _fdt_reloc_again
_fdt_reloc_done:
FDT拷贝的目标地址通过fw_next_arg1
获取,以FW_JUMP
固件类型为例:
.section .entry, "ax", %progbits
.align 3
.global fw_next_arg1
/*
* We can only use a0, a1, and a2 registers here.
* The a0, a1, and a2 registers will be same as passed by
* previous booting stage.
* The next arg1 should be returned in 'a0'.
*/
fw_next_arg1:
#ifdef FW_JUMP_FDT_ADDR
li a0, FW_JUMP_FDT_ADDR
#elif defined(FW_JUMP_FDT_OFFSET)
lla a0, _fw_start
li a1, FW_JUMP_FDT_OFFSET
add a0, a0, a1
#else
add a0, a1, zero
#endif
ret
- FW_JUMP_FDT_ADDR:直接定义下一阶段的FDT地址
- FW_JUMP_FDT_OFFSET:基于固件加载地址
_fw_start
的偏移量 - 其他情况:直接使用上一阶段的传递过来的FDT地址
然后获取设备数的大小,由于其是以大端格式存储的,因此需要转换为小端格式,以便在RISC-V(小端架构)上处理。最后将设备数从源地址复制到目标地址中去。
标记启动完成
至此将主核(启动核)标记启动完成。
/* mark boot hart done */
li t0, BOOT_STATUS_BOOT_HART_DONE
lla t1, _boot_status
fence rw, rw
REG_S t0, 0(t1)
j _start_warm
对于从核(非启动核),第一节寻找启动HART讲到其会等待主核启动完成:
/* waiting for boot hart to be done (_boot_status == 2) */
_wait_for_boot_hart:
li t0, BOOT_STATUS_BOOT_HART_DONE
lla t1, _boot_status
REG_L t1, 0(t1)
/* Reduce the bus traffic so that boot hart may proceed faster */
div t2, t2, zero
div t2, t2, zero
div t2, t2, zero
bne t0, t1, _wait_for_boot_hart
热启动
接着是执行启动核和和非启动核的热启动流程,设置运行环境,堆栈,异常处理程序等。
_start_warm:
/* Reset all registers except ra, a0, a1, a2, a3 and a4 for non-boot HART */
li ra, 0
call _reset_regs
/* Disable all interrupts */
csrw CSR_MIE, zero
/* Find HART count and HART stack size */
lla a4, platform
#if __riscv_xlen > 32
lwu s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4)
lwu s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4)
#else
lw s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4)
lw s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4)
#endif
REG_L s9, SBI_PLATFORM_HART_INDEX2ID_OFFSET(a4)
/* Find HART id */
csrr s6, CSR_MHARTID
/* Find HART index */
beqz s9, 3f
li a4, 0
1:
#if __riscv_xlen > 32
lwu a5, (s9)
#else
lw a5, (s9)
#endif
beq a5, s6, 2f
add s9, s9, 4
add a4, a4, 1
blt a4, s7, 1b
2: add s6, a4, zero
3: bge s6, s7, _start_hang
/* Find the scratch space based on HART index */
lla tp, _fw_end
mul a5, s7, s8
add tp, tp, a5
mul a5, s8, s6
sub tp, tp, a5
li a5, SBI_SCRATCH_SIZE
sub tp, tp, a5
/* update the mscratch */
csrw CSR_MSCRATCH, tp
/* Setup stack */
add sp, tp, zero
/* Setup trap handler */
lla a4, _trap_handler
csrr a5, CSR_MISA
srli a5, a5, ('H' - 'A')
andi a5, a5, 0x1
beq a5, zero, _skip_trap_handler_hyp
lla a4, _trap_handler_hyp
_skip_trap_handler_hyp:
csrw CSR_MTVEC, a4
/* Initialize SBI runtime */
csrr a0, CSR_MSCRATCH
call sbi_init
/* We don't expect to reach here hence just hang */
j _start_hang
- HART index:由于多核处理器HART ID不一定连续编号,这里需要创建HART索引
- CSR_MSCRATCH:mscratch是一个临时寄存器,这里用于存储scratch space 的地址
- CSR_MTVEC:mtvec是异常向量表的基地址寄存器,这里设置为异常处理程序
_trap_handler
,即后续异常发生时,处理器跳转到_trap_handler
进行异常处理。
最后调用sbi_init
初始化SBI运行时环境,并跳转到下一级镜像,见后续章节。