歡迎來到Linux教程網
Linux教程網
Linux教程網
Linux教程網
您现在的位置: Linux教程網 >> UnixLinux >  >> Linux基礎 >> 關於Linux

linux啟動內存分配器

linux啟動內存分配器是在伙伴系統、slab機制實現之前,為滿足內核中內存的分配而建立的。本身的機制比較簡單,使用位圖來進行標志分配和釋放。
 
一、數據結構介紹
 
1,保留區間
 
因為在建立啟動內存分配器的時候,會涉及保留內存。也就是說,之前保留給頁表、分配器本身(用於映射的位圖)、io等得內存在分配器建立後,當用它來分配內存空間時,保留出來的那些部分就不能再分配了。linux中對保留內存空間的部分用下列數據結構表示
 
 
/*
 * Early reserved memory areas.
 */ 
#define MAX_EARLY_RES 20/*保留空間最大塊數*/ 
 
struct early_res {/*保留空間結構*/ 
    u64 start, end; 
    char name[16]; 
    char overlap_ok; 
}; 
/*保留內存空間全局變量*/ 
static struct early_res early_res[MAX_EARLY_RES] __initdata = { 
    { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ 
    {} 
}; 
2,bootmem分配器
 
/*
 * node_bootmem_map is a map pointer - the bits represent all physical 
 * memory pages (including holes) on the node.
 */ 
 /*用於bootmem分配器的節點數據結構*/ 
typedef struct bootmem_data { 
    unsigned long node_min_pfn;/*存放bootmem位圖的第一個頁面(即內核映象結束處的第一個頁面)。*/ 
    unsigned long node_low_pfn;/*物理內存的頂點,最高不超過896MB。*/ 
    void *node_bootmem_map; 
    unsigned long last_end_off;/*用來存放在前一次分配中所分配的最後一個字節相對於last_pos的位移量*/ 
    unsigned long hint_idx;/*存放前一次分配的最後一個頁面號*/ 
    struct list_head list; 
} bootmem_data_t; 
全局鏈表
 
static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); 
二、啟動分配器的建立
 
啟動分配器的建立主要的流程為初始化映射位圖、活動內存區的映射位置0(表示可用)、保留內存區域處理,其中保留區存放在上面介紹的全局數組中,這裡只是將分配器中對應映射位圖值1,表示已經分配。
 
下面我們看內核中具體的初始化流程。
 
start_kernel()->setup_arch()->initmem_init()
 
 
void __init setup_arch(char **cmdline_p) 

          ....... 
<span style="white-space: pre; "> </span>/*此函數在開始對bootmem分配制度建立做些准備工作 
    然後調用相關函數建立bootmem分配制度*/ 
    initmem_init(0, max_pfn); 
          ....... 
}            
 
<span style="font-family: Arial, Verdana, sans-serif; "><span style="white-space: normal; "></span></span> 
 
<span style="font-family: Arial, Verdana, sans-serif; "><span style="white-space: normal; "></span></span><pre name="code" class="cpp">void __init initmem_init(unsigned long start_pfn, 
                  unsigned long end_pfn) 

#ifdef CONFIG_HIGHMEM 
    highstart_pfn = highend_pfn = max_pfn; 
    if (max_pfn > max_low_pfn) 
        highstart_pfn = max_low_pfn; 
    /*將活動內存放到early_node_map中,前面已經分析過了*/ 
    e820_register_active_regions(0, 0, highend_pfn); 
    /*設置上面變量中的內存為當前,在這裡沒有 
    設置相關的宏*/ 
    sparse_memory_present_with_active_regions(0); 
    printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", 
        pages_to_mb(highend_pfn - highstart_pfn)); 
    num_physpages = highend_pfn; 
    /*高端內存開始地址物理*/ 
    high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; 
#else 
    e820_register_active_regions(0, 0, max_low_pfn); 
    sparse_memory_present_with_active_regions(0); 
    num_physpages = max_low_pfn; 
    high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; 
#endif 
#ifdef CONFIG_FLATMEM 
    max_mapnr = num_physpages; 
#endif 
    __vmalloc_start_set = true; 
 
    printk(KERN_NOTICE "%ldMB LOWMEM available.\n", 
            pages_to_mb(max_low_pfn)); 
    /*安裝bootmem分配器,此分配器在伙伴系統起來之前 
    用來進行承擔內存的分配等管理*/ 
    setup_bootmem_allocator(); 

 
void __init setup_bootmem_allocator(void) 

    int nodeid; 
    unsigned long bootmap_size, bootmap; 
    /*
     * Initialize the boot-time allocator (with low memory only):
     */ 
     /*計算所需要的映射頁面大小一個字節一位,
     所以需要對總的頁面大小除以8*/ 
    bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; 
    /*直接中e820中找到一個大小合適的內存塊,返回基址*/ 
    bootmap = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, bootmap_size, 
                 PAGE_SIZE); 
    if (bootmap == -1L) 
        panic("Cannot find bootmem map of size %ld\n", bootmap_size); 
    /*將用於位圖映射的頁面保留*/ 
    reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); 
 
    printk(KERN_INFO "  mapped low ram: 0 - %08lx\n", 
         max_pfn_mapped<<PAGE_SHIFT); 
    printk(KERN_INFO "  low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); 
    /*對每一個在線的node*/ 
    for_each_online_node(nodeid) { 
         unsigned long start_pfn, end_pfn; 
 
#ifdef CONFIG_NEED_MULTIPLE_NODES/*not set*/ 
        start_pfn = node_start_pfn[nodeid]; 
        end_pfn = node_end_pfn[nodeid]; 
        if (start_pfn > max_low_pfn) 
            continue; 
        if (end_pfn > max_low_pfn) 
            end_pfn = max_low_pfn; 
#else 
        start_pfn = 0; 
        end_pfn = max_low_pfn; 
#endif 
        /*對指定節點安裝啟動分配器*/ 
        bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, 
                         bootmap); 
    } 
    /*bootmem的分配制度到這裡就已經建立完成,把after_bootmem
    變量置成1,標識*/ 
    after_bootmem = 1; 

 
static unsigned long __init setup_node_bootmem(int nodeid, 
                 unsigned long start_pfn, 
                 unsigned long end_pfn, 
                 unsigned long bootmap) 

    unsigned long bootmap_size; 
 
    /* don't touch min_low_pfn */ 
    /*初始化映射位圖,將位圖中的所有位置1*/ 
    bootmap_size = init_bootmem_node(NODE_DATA(nodeid), 
                     bootmap >> PAGE_SHIFT, 
                     start_pfn, end_pfn); 
    printk(KERN_INFO "  node %d low ram: %08lx - %08lx\n", 
        nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); 
    printk(KERN_INFO "  node %d bootmap %08lx - %08lx\n", 
         nodeid, bootmap, bootmap + bootmap_size); 
    /*將活動內存區對應位圖相關位置0,表示可被分配的*/ 
    free_bootmem_with_active_regions(nodeid, end_pfn); 
    /*對置保留位的相關頁面對應的位圖設置為1,表示已經分配
    或者不可用(不能被分配)*/ 
    early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); 
    /*返回映射頁面的最後地址,下次映射即可以從這裡開始*/ 
    return bootmap + bootmap_size; 

對於初始化映射位圖,最終調用init_bootmem_core()
 
/*
 * Called once to set up the allocator itself.
 */ 
static unsigned long __init init_bootmem_core(bootmem_data_t *bdata, 
    unsigned long mapstart, unsigned long start, unsigned long end) 

    unsigned long mapsize; 
 
    mminit_validate_memmodel_limits(&start, &end); 
    bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart)); 
    bdata->node_min_pfn = start; 
    bdata->node_low_pfn = end; 
    /*添加bdata變量到鏈表中*/ 
    link_bootmem(bdata); 
 
    /*
     * Initially all pages are reserved - setup_arch() has to
     * register free RAM areas explicitly.
     */ 
     /*計算本bdata的mapsize,也就是內存頁面大小的1/8*/ 
    mapsize = bootmap_bytes(end - start); 
    /*將所有map置1*/ 
    memset(bdata->node_bootmem_map, 0xff, mapsize); 
 
    bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n", 
        bdata - bootmem_node_data, start, mapstart, end, mapsize); 
 
    return mapsize; 

 
/*
 * link bdata in order
 */ 
 /*添加到鏈表,由添加的代碼可知
 鏈表中的數據開始位置為遞增的*/ 
static void __init link_bootmem(bootmem_data_t *bdata) 

    struct list_head *iter; 
    /*添加到全局鏈表bdata_list中*/ 
    list_for_each(iter, &bdata_list) { 
        bootmem_data_t *ent; 
 
        ent = list_entry(iter, bootmem_data_t, list); 
        if (bdata->node_min_pfn < ent->node_min_pfn) 
            break; 
    } 
    list_add_tail(&bdata->list, iter); 

 
/**
 * free_bootmem_with_active_regions - Call free_bootmem_node for each active range
 * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
 * @max_low_pfn: The highest PFN that will be passed to free_bootmem_node
 *
 * If an architecture guarantees that all ranges registered with
 * add_active_ranges() contain no holes and may be freed, this
 * this function may be used instead of calling free_bootmem() manually.
 */ 
 /*用active_region來初始化bootmem分配器,基於低端內存區*/ 
void __init free_bootmem_with_active_regions(int nid, 
                        unsigned long max_low_pfn) 

    int i; 
    /*對每個節點上得活動內存區*/ 
    for_each_active_range_index_in_nid(i, nid) { 
        unsigned long size_pages = 0; 
        unsigned long end_pfn = early_node_map[i].end_pfn; 
 
        if (early_node_map[i].start_pfn >= max_low_pfn) 
            continue; 
 
        if (end_pfn > max_low_pfn) 
            end_pfn = max_low_pfn; 
        /*計算活動區的頁面數*/ 
        size_pages = end_pfn - early_node_map[i].start_pfn; 
        /*釋放這部分內存,起始就是對應位圖值0*/ 
        free_bootmem_node(NODE_DATA(early_node_map[i].nid), 
                PFN_PHYS(early_node_map[i].start_pfn), 
                size_pages << PAGE_SHIFT); 
    } 

 
/**
 * free_bootmem_node - mark a page range as usable
 * @pgdat: node the range resides on
 * @physaddr: starting address of the range
 * @size: size of the range in bytes
 *
 * Partial pages will be considered reserved and left as they are.
 *
 * The range must reside completely on the specified node.
 */ 
void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 
                  unsigned long size) 

    unsigned long start, end; 
    /*相關宏進行控制,調試用*/ 
    kmemleak_free_part(__va(physaddr), size); 
 
    start = PFN_UP(physaddr);/*取上界*/ 
    end = PFN_DOWN(physaddr + size);/*取下界*/ 
 
     
    /*調用此函數對相關bit位清0,表示沒有分配,這裡保留位為0*/ 
    mark_bootmem_node(pgdat->bdata, start, end, 0, 0); 

 
static int __init mark_bootmem_node(bootmem_data_t *bdata, 
                unsigned long start, unsigned long end, 
                int reserve, int flags) 

    unsigned long sidx, eidx; 
 
    bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n", 
        bdata - bootmem_node_data, start, end, reserve, flags); 
 
    BUG_ON(start < bdata->node_min_pfn); 
    BUG_ON(end > bdata->node_low_pfn); 
    /*此兩個變量為到節點最小內存頁面的偏移量*/ 
    sidx = start - bdata->node_min_pfn; 
    eidx = end - bdata->node_min_pfn; 
 
    if (reserve)/*如果設置了保留位*/ 
        return __reserve(bdata, sidx, eidx, flags); 
    else/*相關的map位清0*/ 
        __free(bdata, sidx, eidx); 
    return 0; 

 
/*bootmem分配器的保留操作*/ 
static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx, 
            unsigned long eidx, int flags) 

    unsigned long idx; 
    int exclusive = flags & BOOTMEM_EXCLUSIVE; 
 
    bdebug("nid=%td start=%lx end=%lx flags=%x\n", 
        bdata - bootmem_node_data, 
        sidx + bdata->node_min_pfn, 
        eidx + bdata->node_min_pfn, 
        flags); 
    /*對連續的幾個頁面設置為保留*/ 
    for (idx = sidx; idx < eidx; idx++) 
        if (test_and_set_bit(idx, bdata->node_bootmem_map)) { 
            if (exclusive) { 
                __free(bdata, sidx, idx); 
                return -EBUSY; 
            } 
            bdebug("silent double reserve of PFN %lx\n", 
                idx + bdata->node_min_pfn); 
        } 
    return 0; 

 
/*bootmem分配器中釋放內存*/ 
static void __init __free(bootmem_data_t *bdata, 
            unsigned long sidx, unsigned long eidx) 

    unsigned long idx; 
 
    bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data, 
        sidx + bdata->node_min_pfn, 
        eidx + bdata->node_min_pfn); 
 
    if (bdata->hint_idx > sidx) 
        bdata->hint_idx = sidx;/*更新變量hint_idx,用於分配*/ 
 
    for (idx = sidx; idx < eidx; idx++)/*對應位清0*/ 
        if (!test_and_clear_bit(idx, bdata->node_bootmem_map)) 
            BUG(); 

 
void __init early_res_to_bootmem(u64 start, u64 end) 

    int i, count; 
    u64 final_start, final_end; 
 
    count  = 0; 
    for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) 
        count++;/*計算保留塊的個數*/ 
 
    printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n", 
             count, start, end); 
    for (i = 0; i < count; i++) { 
        struct early_res *r = &early_res[i]; 
        printk(KERN_INFO "  #%d [%010llx - %010llx] %16s", i, 
            r->start, r->end, r->name); 
        final_start = max(start, r->start); 
        final_end = min(end, r->end); 
        if (final_start >= final_end) { 
            printk(KERN_CONT "\n"); 
            continue; 
        } 
        printk(KERN_CONT " ==> [%010llx - %010llx]\n", 
            final_start, final_end); 
        /*將指定區間置為保留*/ 
        reserve_bootmem_generic(final_start, final_end - final_start, 
                BOOTMEM_DEFAULT); 
    } 

上面的保留指定區間reserve_bootmem_generic()函數最終調用如下函數
 
/**
 * reserve_bootmem - mark a page range as usable
 * @addr: starting address of the range
 * @size: size of the range in bytes
 * @flags: reservation flags (see linux/bootmem.h)
 *
 * Partial pages will be reserved.
 *
 * The range must be contiguous but may span node boundaries.
 */ 
int __init reserve_bootmem(unsigned long addr, unsigned long size, 
                int flags) 

    unsigned long start, end; 
 
    start = PFN_DOWN(addr);/*下界*/ 
    end = PFN_UP(addr + size);/*上界*/ 
 
    return mark_bootmem(start, end, 1, flags); 

 
/*保留指定內存區間*/ 
static int __init mark_bootmem(unsigned long start, unsigned long end, 
                int reserve, int flags) 

    unsigned long pos; 
    bootmem_data_t *bdata; 
 
    pos = start; 
    /*通過bdata_list鏈表找到在指定區間的bdata*/ 
    list_for_each_entry(bdata, &bdata_list, list) { 
        int err; 
        unsigned long max; 
 
        if (pos < bdata->node_min_pfn || 
            pos >= bdata->node_low_pfn) { 
            BUG_ON(pos != start); 
            continue; 
        } 
 
        max = min(bdata->node_low_pfn, end); 
        /*設置為保留*/ 
        err = mark_bootmem_node(bdata, pos, max, reserve, flags); 
        if (reserve && err) {/*如果出錯,遞歸調用*/ 
            mark_bootmem(start, pos, 0, 0); 
            return err; 
        } 
 
        if (max == end) 
            return 0; 
        pos = bdata->node_low_pfn; 
    } 
    BUG(); 

三、內存的分配和釋放
介紹了上面的初始化流程,對於分配和釋放就簡單了,分配就是將分配器映射位圖中對應的位置1,釋放過程相反。
 
 
/*分配size大小的空間*/ 
static void * __init alloc_bootmem_core(struct bootmem_data *bdata, 
                    unsigned long size, unsigned long align, 
                    unsigned long goal, unsigned long limit) 

    unsigned long fallback = 0; 
    unsigned long min, max, start, sidx, midx, step; 
 
    bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n", 
        bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT, 
        align, goal, limit); 
 
    BUG_ON(!size); 
    BUG_ON(align & (align - 1)); 
    BUG_ON(limit && goal + size > limit); 
    /*如果沒有映射位圖返回空,分配失敗*/ 
    if (!bdata->node_bootmem_map) 
        return NULL; 
 
    min = bdata->node_min_pfn; 
    max = bdata->node_low_pfn; 
 
    goal >>= PAGE_SHIFT; 
    limit >>= PAGE_SHIFT; 
 
    if (limit && max > limit) 
        max = limit; 
    if (max <= min) 
        return NULL; 
    /*step為需要對齊於頁面數*/ 
    step = max(align >> PAGE_SHIFT, 1UL); 
    /*計算起始頁面*/ 
    if (goal && min < goal && goal < max) 
        start = ALIGN(goal, step); 
    else 
        start = ALIGN(min, step); 
    /*計算分配頁面區間*/ 
    sidx = start - bdata->node_min_pfn; 
    midx = max - bdata->node_min_pfn; 
    /*前一次分配的頁號比這次開始分配的頁面號大
    那麼,如果第一次沒有分配到,回退到這次的
    開始重新試,因為第一次分配是從上一次分配
    的位置開始的*/ 
    if (bdata->hint_idx > sidx) {      
         * Handle the valid case of sidx being zero and still 
         * catch the fallback below. 
         */ 
        fallback = sidx + 1; 
        /*從上一次分配的位置開始,對齊與頁面*/ 
        sidx = align_idx(bdata, bdata->hint_idx, step); 
    } 
 
    while (1) { 
        int merge; 
        void *region; 
        unsigned long eidx, i, start_off, end_off; 
find_block: 
        /*查找第一個為0的位*/ 
        sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx); 
        sidx = align_idx(bdata, sidx, step); 
        eidx = sidx + PFN_UP(size);/*結束位置*/ 
 
        if (sidx >= midx || eidx > midx)/*找到結束了*/ 
            break; 
 
        for (i = sidx; i < eidx; i++)/*檢查這段區域是否空閒*/ 
            if (test_bit(i, bdata->node_bootmem_map)) {/*如果不是,將跳過這段繼續查找*/ 
                sidx = align_idx(bdata, i, step); 
                if (sidx == i) 
                    sidx += step; 
                goto find_block; 
            } 
 
        if (bdata->last_end_off & (PAGE_SIZE - 1) &&/*如果為相鄰的頁面,也就是說上次分配的頁面和這次分配的開始頁面為相鄰的*/ 
                PFN_DOWN(bdata->last_end_off) + 1 == sidx) 
            start_off = align_off(bdata, bdata->last_end_off, align); 
        else 
            start_off = PFN_PHYS(sidx); 
 
        /*merge==1表示上次結束和這次開始不在同一個頁面上*/ 
        merge = PFN_DOWN(start_off) < sidx; 
        end_off = start_off + size; 
        /*更新數據*/ 
        bdata->last_end_off = end_off; 
        bdata->hint_idx = PFN_UP(end_off); 
 
        /*
         * Reserve the area now:
         */ 
         /*設定新加入的頁面為保留,就是將對應的映射位置1*/ 
        if (__reserve(bdata, PFN_DOWN(start_off) + merge, 
                PFN_UP(end_off), BOOTMEM_EXCLUSIVE)) 
            BUG(); 
        /*對應開始地址的虛擬地址返回*/ 
        region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) + 
                start_off); 
        memset(region, 0, size);/*分配的大小*/ 
        /*
         * The min_count is set to 0 so that bootmem allocated blocks
         * are never reported as leaks.
         */ 
         /*調試用*/ 
        kmemleak_alloc(region, size, 0, 0); 
        return region; 
    } 
 
    if (fallback) {/*回退,重新查看*/  
        sidx = align_idx(bdata, fallback - 1, step); 
        fallback = 0; 
        goto find_block; 
    } 
 
    return NULL; 
}   

摘自 bullbat的專欄
Copyright © Linux教程網 All Rights Reserved