linux在被bootloader加載到內存後,cpu最初執行的linux內核代碼是/header.S文件中的start_of_setup函數,這個函數在做了一些准備工作後會跳轉到boot目下文件main.c的main函數執行,在這個main函數中我們可以第一次看到與內存管理相關的代碼,這段代碼調用detect_memeory()函數檢測系統物理內存
在header.S中執行下面匯編代碼:
start_of_setup:
.....
# Jump to C code (should not return)
calll main
.....
跳到boot目錄下的main.c文件中
void main(void)
{
......
/* Detect memory layout */
detect_memory();/*內存探測函數*/
......
}
int detect_memory(void)
{
int err = -1;
if (detect_memory_e820() > 0)
err = 0;
if (!detect_memory_e801())
err = 0;
if (!detect_memory_88())
err = 0;
return err;
}
由上面的代碼可知,linux內核會分別嘗試調用detect_memory_e820()、detcct_memory_e801()、detect_memory_88()獲得系統物理內存布局,這3個函數內部其實都會以內聯匯編的形式調用bios中斷以取得內存信息,該中斷調用形式為int 0x15,同時調用前分別把AX寄存器設置為0xe820h、0xe801h、0x88h,關於0x15號中斷有興趣的可以去查詢相關手冊。下面分析detect_memory_e820()的代碼,其它代碼基本一樣。
#define SMAP 0x534d4150 /* ASCII "SMAP" */
/*由於歷史原因,一些i/o設備也會占據一部分內存
物理地址空間,因此系統可以使用的物理內存空
間是不連續的,系統內存被分成了很多段,每個段
的屬性也是不一樣的。int 0x15 查詢物理內存時每次
返回一個內存段的信息,因此要想返回系統中所有
的物理內存,我們必須以迭代的方式去查詢。
detect_memory_e820()函數把int 0x15放到一個do-while循環裡,
每次得到的一個內存段放到struct e820entry裡,而
struct e820entry的結構正是e820返回結果的結構!而像
其它啟動時獲得的結果一樣,最終都會被放到
boot_params裡,e820被放到了boot_params.e820_map。
*/
static int detect_memory_e820(void)
{
int count = 0;/*用於記錄已檢測到的物理內存數目*/
struct biosregs ireg, oreg;
struct e820entry *desc = boot_params.e820_map;
static struct e820entry buf; /* static so it is zeroed */
initregs(&ireg);/*初始化ireg中的相關寄存器*/
ireg.ax = 0xe820;
ireg.cx = sizeof buf;/*e820entry數據結構大小*/
ireg.edx = SMAP;/*標識*/
ireg.di = (size_t)&buf;/*int15返回值的存放處*/
/*
* Note: at least one BIOS is known which assumes that the
* buffer pointed to by one e820 call is the same one as
* the previous call, and only changes modified fields. Therefore,
* we use a temporary buffer and copy the results entry by entry.
*
* This routine deliberately does not try to account for
* ACPI 3+ extended attributes. This is because there are
* BIOSes in the field which report zero for the valid bit for
* all ranges, and we don't currently make any use of the
* other attribute bits. Revisit this if we see the extended
* attribute bits deployed in a meaningful way in the future.
*/
do {
/*在執行這條內聯匯編語句時輸入的參數有:
eax寄存器=0xe820?
dx寄存器=’SMAP’
edi寄存器=desc
ebx寄存器=next
ecx寄存器=size
返回給c語言代碼的參數有:
id=eax寄存器?
rr=edx寄存器?
ext=ebx寄存器?
size=ecx寄存器
desc指向的內存地址在執行0x15中斷調用時被設置
*/
intcall(0x15, &ireg, &oreg);
/*選擇下一個*/
ireg.ebx = oreg.ebx; /* for next iteration... */
/* BIOSes which terminate the chain with CF = 1 as opposed
to %ebx = 0 don't always report the SMAP signature on
the final, failing, probe. */
if (oreg.eflags & X86_EFLAGS_CF)
break;
/* Some BIOSes stop returning SMAP in the middle of
the search loop. We don't know exactly how the BIOS
screwed up the map at that point, we might have a
partial map, the full map, or complete garbage, so
just return failure. */
if (oreg.eax != SMAP) {
count = 0;
break;
}
*desc++ = buf;/*將buf賦值給desc*/
count++;/*探測數加一*/
}
while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map));
/*將內存塊數保持到變量中*/
return boot_params.e820_entries = count;
}
其中存放中斷返回值得結構如下
struct e820entry {
__u64 addr; /* start of memory segment */
__u64 size; /* size of memory segment */
__u32 type; /* type of memory segment */
} __attribute__((packed));
在內核初始化跳入start_kernel函數後執行以下初始化
start_kernel()->setup_arch()->setup_memory_map()
/*調用x86_init.resources.memory_setup()實現對e820內存圖的優化,
將e820中得值保存在e820_saved中,打印內存圖
*/
void __init setup_memory_map(void)
{
char *who;
/*調用x86體系下的memory_setup函數*/
who = x86_init.resources.memory_setup();
/*保存到e820_saved中*/
memcpy(&e820_saved, &e820, sizeof(struct e820map));
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
/*打印輸出*/
e820_print_map(who);
}
在x86_init.c中定義了x86下的memory_setup函數
struct x86_init_ops x86_init __initdata = {
.resources = {
……
.memory_setup = default_machine_specific_memory_setup,
},
……
};
char *__init default_machine_specific_memory_setup(void)
{
char *who = "BIOS-e820";
u32 new_nr;
/*
* Try to copy the BIOS-supplied E820-map.
*
* Otherwise fake a memory map; one section from 0k->640k,
* the next section from 1mb->appropriate_mem_k
*/
new_nr = boot_params.e820_entries;
/*將重疊的去除*/
sanitize_e820_map(boot_params.e820_map,
ARRAY_SIZE(boot_params.e820_map),
&new_nr);
/*去掉重疊的部分後得到的內存個數*/
boot_params.e820_entries = new_nr;
/*將其賦值到全局變量e820中,小於0時,為出錯處理*/
if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
< 0) {
……
}
/* In case someone cares... */
return who;
}
append_e820_map調用__append_e820_map實現
static int __init __append_e820_map(struct e820entry *biosmap, int nr_map)
{
while (nr_map) {/*循環nr_map次調用,添加內存塊到e820*/
u64 start = biosmap->addr;
u64 size = biosmap->size;
u64 end = start + size;
u32 type = biosmap->type;
/* Overflow in 64 bits? Ignore the memory map. */
if (start > end)
return -1;
/*添加函數*/
e820_add_region(start, size, type);
biosmap++;
nr_map--;
}
return 0;
}
void __init e820_add_region(u64 start, u64 size, int type)
{
__e820_add_region(&e820, start, size, type);
}
e820為e820map結構
struct e820map {
__u32 nr_map;
struct e820entry map[E820_X_MAX];
};
其中E820_X_MAX大小為128.
tatic void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
int type)
{
int x = e820x->nr_map;
if (x >= ARRAY_SIZE(e820x->map)) {
printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
return;
}
到這裡,物理內存就已經從BIOS中讀出來存放到全局變量e820中,e820是linux內核中用於建立內存管理框架的基礎。在後面我們會看到,建立初始化節點、管理區會用到他。