歡迎來到Linux教程網
Linux教程網
Linux教程網
Linux教程網
您现在的位置: Linux教程網 >> UnixLinux >  >> Linux基礎 >> 關於Linux

linux內存管理之伙伴系統(建立)

內核使用伙伴系統來解決內存分配引起的外部碎片問題。
一、數據結構描述
 
結構zone中的free_area數組描述伙伴系統該數組為free_area結構
 
www.2cto.com
struct zone { 
…… 
    struct free_area    free_area[MAX_ORDER]; 
…… 
}; 
www.2cto.com
struct free_area {/*鏈表類型為5類,對於分類為新加入的*/ 
    struct list_head    free_list[MIGRATE_TYPES]; 
    unsigned long       nr_free; 
}; 
下圖為伙伴系統在管理區中的表示。
 
 
 \
 
 
 
二、伙伴系統的初始化
 
在初始化物理管理區的時候初始化伙伴系統的,具體實現在下面的函數中:
 
Start_kernel()->setup_arch()->paging_init()->zone_sizes_init()->free_area_init_nodes()->free_area_init_node()->free_area_init_core()->init_currently_empty_zone()->zone_init_free_lists()
 
www.2cto.com
/*初始化對應zone中所有order和所有類型的鏈表*/ 
static void __meminit zone_init_free_lists(struct zone *zone) 

    int order, t; 
    for_each_migratetype_order(order, t) { 
        INIT_LIST_HEAD(&zone->free_area[order].free_list[t]); 
        zone->free_area[order].nr_free = 0; 
    } 

三、伙伴系統中數據初始化
 
將bootmem分配器中的數據回收到伙伴系統中
 
start_kernel()->mm_init()->mem_init()
 
www.2cto.com
void __init mem_init(void) 

    int codesize, reservedpages, datasize, initsize; 
    int tmp; 
/*和具體硬件相關*/ 
    pci_iommu_alloc(); 
 
#ifdef CONFIG_FLATMEM 
    BUG_ON(!mem_map); 
#endif 
    /* this will put all low memory onto the freelists */ 
    /*釋放bootmem中的內存到伙伴系統中,包括bootmem占有的位圖
     返回總共釋放的頁面數**/ 
    totalram_pages += free_all_bootmem(); 
 
    reservedpages = 0; 
    for (tmp = 0; tmp < max_low_pfn; tmp++) 
        /*
         * Only count reserved RAM pages:
         */ 
        if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) 
            reservedpages++; 
    /*初始化高端內存區,將高端內存區放入伙伴系統中*/ 
    set_highmem_pages_init(); 
    /*內核代碼段、數據段、初始化端長度*/ 
    codesize =  (unsigned long) &_etext - (unsigned long) &_text; 
    datasize =  (unsigned long) &_edata - (unsigned long) &_etext; 
    initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin; 
    /*打印輸出各種內存初始化後的信息*/ 
    printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " 
            "%dk reserved, %dk data, %dk init, %ldk highmem)\n", 
        nr_free_pages() << (PAGE_SHIFT-10), 
        num_physpages << (PAGE_SHIFT-10), 
        codesize >> 10, 
        reservedpages << (PAGE_SHIFT-10), 
        datasize >> 10, 
        initsize >> 10, 
        (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) 
           ); 
 
    printk(KERN_INFO "virtual kernel memory layout:\n" 
        "    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n" 
#ifdef CONFIG_HIGHMEM 
        "    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n" 
#endif 
        "    vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n" 
        "    lowmem  : 0x%08lx - 0x%08lx   (%4ld MB)\n" 
        "      .init : 0x%08lx - 0x%08lx   (%4ld kB)\n" 
        "      .data : 0x%08lx - 0x%08lx   (%4ld kB)\n" 
        "      .text : 0x%08lx - 0x%08lx   (%4ld kB)\n", 
        FIXADDR_START, FIXADDR_TOP, 
        (FIXADDR_TOP - FIXADDR_START) >> 10, 
 
#ifdef CONFIG_HIGHMEM 
        PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, 
        (LAST_PKMAP*PAGE_SIZE) >> 10, 
#endif 
 
        VMALLOC_START, VMALLOC_END, 
        (VMALLOC_END - VMALLOC_START) >> 20, 
 
        (unsigned long)__va(0), (unsigned long)high_memory, 
        ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, 
 
        (unsigned long)&__init_begin, (unsigned long)&__init_end, 
        ((unsigned long)&__init_end - 
         (unsigned long)&__init_begin) >> 10, 
 
        (unsigned long)&_etext, (unsigned long)&_edata, 
        ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, 
 
        (unsigned long)&_text, (unsigned long)&_etext, 
        ((unsigned long)&_etext - (unsigned long)&_text) >> 10); 
 
    /*
     * Check boundaries twice: Some fundamental inconsistencies can
     * be detected at build time already.
     */ 
#define __FIXADDR_TOP (-PAGE_SIZE) 
#ifdef CONFIG_HIGHMEM 
    BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE  > FIXADDR_START); 
    BUILD_BUG_ON(VMALLOC_END            > PKMAP_BASE); 
#endif 
#define high_memory (-128UL << 20) 
    BUILD_BUG_ON(VMALLOC_START          >= VMALLOC_END); 
#undef high_memory 
#undef __FIXADDR_TOP 
 
#ifdef CONFIG_HIGHMEM 
    BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE    > FIXADDR_START); 
    BUG_ON(VMALLOC_END              > PKMAP_BASE); 
#endif 
    BUG_ON(VMALLOC_START                >= VMALLOC_END); 
    BUG_ON((unsigned long)high_memory       > VMALLOC_START); 
 
    if (boot_cpu_data.wp_works_ok < 0) 
        test_wp_bit(); 
 
    save_pg_dir(); 
    /*調用zap_low_mappings函數清low_memory的映射,內核線程只訪問內核空間是不能訪問用戶空間的
    ,其實low_memory的映射被設置的部分也就是當初為
     8MB建立的恆等映射填充了臨時內核頁全局目錄的第0項,第1項
     這裡將用戶空間的頁目錄項<3G的PGD清0;*/ 
    zap_low_mappings(true); 

www.2cto.com
/**
 * free_all_bootmem - release free pages to the buddy allocator
 *
 * Returns the number of pages actually released.
 */ 
unsigned long __init free_all_bootmem(void) 

    return free_all_bootmem_core(NODE_DATA(0)->bdata); 

www.2cto.com
static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) 

    int aligned; 
    struct page *page; 
    unsigned long start, end, pages, count = 0; 
 
    if (!bdata->node_bootmem_map) 
        return 0; 
    /*節點內存開始和結束處*/ 
    start = bdata->node_min_pfn; 
    end = bdata->node_low_pfn; 
 
    /*
     * If the start is aligned to the machines wordsize, we might
     * be able to free pages in bulks of that order.
     */ 
    aligned = !(start & (BITS_PER_LONG - 1)); 
 
    bdebug("nid=%td start=%lx end=%lx aligned=%d\n", 
        bdata - bootmem_node_data, start, end, aligned); 
    /*用於釋放整個bootmem所涉及的內存*/ 
    while (start < end) { 
        unsigned long *map, idx, vec; 
 
        map = bdata->node_bootmem_map; 
        idx = start - bdata->node_min_pfn;/*相對於開始處的偏移*/ 
        vec = ~map[idx / BITS_PER_LONG];/*vec值為頁面分配情況*/ 
        /*如果開始地址以32位對其、連續的32個頁面都沒有被分配(空閒),並且
        釋放起點以上的32個頁面都是合法的(不超過end值),則釋放連續的32個
        頁面,即1<<5個頁面*/ 
        if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) { 
            int order = ilog2(BITS_PER_LONG);/*32位下為5*/ 
            /*釋放到伙伴系統中*/ 
            __free_pages_bootmem(pfn_to_page(start), order); 
            count += BITS_PER_LONG;/*釋放的總頁面數更新*/ 
        } else { 
            unsigned long off = 0; 
            /*vec!=0表示這個區間存在頁面空閒,off為這個區間的下標,從0開始*/  
            while (vec && off < BITS_PER_LONG) { 
                if (vec & 1) {/*如果頁面空閒*/ 
                    /*偏移轉化為具體的頁面*/ 
                    page = pfn_to_page(start + off); 
                    /*一個頁面一個頁面的釋放*/ 
                    __free_pages_bootmem(page, 0);/*釋放單個頁面*/ 
                    count++;/*更新釋放頁面總數*/ 
                } 
                vec >>= 1;/*vec向右移動一位,表示訪問下一個頁面*/ 
                off++;/*偏移加一*/ 
            } 
        } 
        start += BITS_PER_LONG;/*偏移向後移動*/ 
    } 
    /*虛擬地址轉化為page
    用於釋放bdata中的位圖所占有的內存*/ 
    page = virt_to_page(bdata->node_bootmem_map); 
    pages = bdata->node_low_pfn - bdata->node_min_pfn; 
     
    /*計算bootmem分配器中所使用的頁面數,即位圖使用的頁面數*/ 
    pages = bootmem_bootmap_pages(pages); 
    count += pages;/*釋放的總頁面數加*/ 
    while (pages--)/*每次釋放一個頁面,釋放
        總共的pages個頁面*/ 
        __free_pages_bootmem(page++, 0); 
 
    bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count); 
 
    return count;/*返回釋放的總頁面數*/ 

www.2cto.com
/*
 * permit the bootmem allocator to evade page validation on high-order frees
 */ 
void __meminit __free_pages_bootmem(struct page *page, unsigned int order) 

    if (order == 0) { 
        __ClearPageReserved(page); 
        set_page_count(page, 0);/*設置頁面的引用位為0*/ 
        set_page_refcounted(page);/*設置頁面的引用位為1*/ 
        __free_page(page);/*釋放單個頁面到伙伴系統中*/ 
    } else { 
        int loop; 
         
        /*這個不是很明白,可能和特定的體系相關???*/ 
        prefetchw(page); 
        for (loop = 0; loop < BITS_PER_LONG; loop++) { 
            struct page *p = &page[loop]; 
 
            if (loop + 1 < BITS_PER_LONG) 
                prefetchw(p + 1); 
            __ClearPageReserved(p); 
            set_page_count(p, 0); 
        } 
 
        set_page_refcounted(page);/*設置頁面的引用計數為1*/ 
        /*這裡具體釋放到那個類型裡面,
        要看page的裡面具體的東西,也就是
        可以用相關函數來獲取他所屬的類型*/ 
        __free_pages(page, order);/*釋放order個頁面*/ 
    } 

www.2cto.com
void __init set_highmem_pages_init(void) 

    struct zone *zone; 
    int nid; 
 
    for_each_zone(zone) { 
        unsigned long zone_start_pfn, zone_end_pfn; 
 
        if (!is_highmem(zone))/*驗證是否屬於高端內存區域中*/ 
        /*如果不屬於,將不執行下面的操作*/ 
            continue; 
 
        zone_start_pfn = zone->zone_start_pfn; 
        zone_end_pfn = zone_start_pfn + zone->spanned_pages; 
        /*返回zone中的node的id*/ 
        nid = zone_to_nid(zone); 
        printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n", 
                zone->name, nid, zone_start_pfn, zone_end_pfn); 
        /*將區間中的內存放到伙伴系統中*/ 
        add_highpages_with_active_regions(nid, zone_start_pfn, 
                 zone_end_pfn); 
    } 
    totalram_pages += totalhigh_pages; 
} 
www.2cto.com
void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, 
                          unsigned long end_pfn) 

    struct add_highpages_data data; 
 
    data.start_pfn = start_pfn; 
    data.end_pfn = end_pfn; 
    /*對節點中的每個區域進行頁面的回收到伙伴系統中*/ 
    work_with_active_regions(nid, add_highpages_work_fn, &data); 

www.2cto.com
/*用指定函數來操作活動區,在高端內存初始化時用了*/ 
void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) 

    int i; 
    int ret; 
 
    for_each_active_range_index_in_nid(i, nid) { 
        ret = work_fn(early_node_map[i].start_pfn, 
                  early_node_map[i].end_pfn, data); 
        if (ret) 
            break; 
    } 

www.2cto.com
static int __init add_highpages_work_fn(unsigned long start_pfn, 
                     unsigned long end_pfn, void *datax) 

    int node_pfn; 
    struct page *page; 
    unsigned long final_start_pfn, final_end_pfn; 
    struct add_highpages_data *data; 
 
    data = (struct add_highpages_data *)datax; 
    /*活動內存區間與指定考慮區間交集*/ 
    final_start_pfn = max(start_pfn, data->start_pfn); 
    final_end_pfn = min(end_pfn, data->end_pfn); 
    if (final_start_pfn >= final_end_pfn) 
        return 0; 
 
    for (node_pfn = final_start_pfn; node_pfn < final_end_pfn; 
         node_pfn++) { 
        if (!pfn_valid(node_pfn))/*驗證頁面是否有效*/ 
            continue; 
        page = pfn_to_page(node_pfn);/*將下標轉換為具體的頁面*/ 
        /*初始化頁面的count值,將頁面釋放到伙伴系統中*/ 
        add_one_highpage_init(page, node_pfn); 
    } 
 
    return 0; 
 

www.2cto.com
static void __init add_one_highpage_init(struct page *page, int pfn) 

    /*ClearPageReserved清除了該頁面flag中的reserved標志,表示該頁面屬於動態內存*/ 
    ClearPageReserved(page); 
    init_page_count(page);/*設置page的count值為1*/ 
    __free_page(page);  /*釋放頁面到伙伴系統*/ 
    totalhigh_pages++;/*更新高端頁面總數*/ 

www.2cto.com
void zap_low_mappings(bool early) 

    int i; 
 
    /*
     * Zap initial low-memory mappings.
     *
     * Note that "pgd_clear()" doesn't do it for
     * us, because pgd_clear() is a no-op on i386.
     */ 
    /*這個函數很簡單,就是把前面我們在arch/x86/kernel/head_32.S中設置的頁全局目錄的前若干項清零
    。這若干項到底是多少
     不錯,0xc0000000>>22 & 1023= 768,這些也全局目錄項代表虛擬地址前3G的頁面,也就是所謂的用戶區
     ,我們在這裡把它全清零了。*/ 
    for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) { 
#ifdef CONFIG_X86_PAE 
        set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); 
#else 
        set_pgd(swapper_pg_dir+i, __pgd(0)); 
#endif 
    } 
 
    if (early) 
        __flush_tlb(); 
    else 
        flush_tlb_all(); 

到此,伙伴系統已經建立並且裡面存放了應有的內存數據。要從伙伴系統中分配內存,必須要有分配和釋放機制。後面總結具體的分配和釋放工作
Copyright © Linux教程網 All Rights Reserved