一、內核啟動早期初始化
start_kernel()->mm_init()->kmem_cache_init()
執行流程:
1,初始化靜態initkmem_list3三鏈;
2,初始化cache_cache的nodelists字段為1中的三鏈;
3,根據內存情況初始化每個slab占用的頁面數變量slab_break_gfp_order;
4,將cache_cache加入cache_chain鏈表中,初始化cache_cache;
5,創建kmalloc所用的general cache:
1)cache的名稱和大小存放在兩個數據結構對應的數組中,對應大小的cache可以從size數組中找到;
2)先創建INDEX_AC和INDEX_L3下標的cache;
3)循環創建size數組中各個大小的cache;
6,替換靜態本地cache全局變量:
1) 替換cache_cache中的arry_cache,本來指向靜態變量initarray_cache.cache;
2) 替換malloc_sizes[INDEX_AC].cs_cachep的local cache,原本指向靜態變量initarray_generic.cache;
7,替換靜態三鏈
1)替換cache_cache三鏈,原本指向靜態變量initkmem_list3;
2)替換malloc_sizes[INDEX_AC].cs_cachep三鏈,原本指向靜態變量initkmem_list3;
8,更新初始化進度
www.2cto.com
/*
* Initialisation. Called after the page allocator have been initialised and
* before smp_init().
*/
void __init kmem_cache_init(void)
{
size_t left_over;
struct cache_sizes *sizes;
struct cache_names *names;
int i;
int order;
int node;
/* 在slab初始化好之前,無法通過kmalloc分配初始化過程中必要的一些對象
,只能使用靜態的全局變量
,待slab初始化後期,再使用kmalloc動態分配的對象替換全局變量*/
/* 如前所述,先借用全局變量initkmem_list3表示的slab三鏈
,每個內存節點對應一組slab三鏈。initkmem_list3是個slab三鏈數組,對於每個內存節點,包含三組
:struct kmem_cache的slab三鏈、struct arraycache_init的slab 三鏈、struct kmem_list3的slab三鏈
。這裡循環初始化所有內存節點的所有slab三鏈*/
if (num_possible_nodes() == 1)
use_alien_caches = 0;
/*初始化所有node的所有slab中的三個鏈表*/
for (i = 0; i < NUM_INIT_LISTS; i++) {
kmem_list3_init(&initkmem_list3[i]);
/* 全局變量cache_cache指向的slab cache包含所有struct kmem_cache對象,不包含cache_cache本身
。這裡初始化所有內存節點的struct kmem_cache的slab三鏈為空。*/
if (i < MAX_NUMNODES)
cache_cache.nodelists[i] = NULL;
}
/* 設置struct kmem_cache的slab三鏈指向initkmem_list3中的一組slab三鏈,
CACHE_CACHE為cache在內核cache鏈表中的索引,
struct kmem_cache對應的cache是內核中創建的第一個cache
,故CACHE_CACHE為0 */
set_up_list3s(&cache_cache, CACHE_CACHE);
/*
* Fragmentation resistance on low memory - only use bigger
* page orders on machines with more than 32MB of memory.
*/
/* 全局變量slab_break_gfp_order為每個slab最多占用幾個頁面
,用來抑制碎片,比如大小為3360的對象
,如果其slab只占一個頁面,碎片為736
,slab占用兩個頁面,則碎片大小也翻倍
。只有當對象很大
,以至於slab中連一個對象都放不下時
,才可以超過這個值
。有兩個可能的取值
:當可用內存大於32MB時
,BREAK_GFP_ORDER_HI為1
,即每個slab最多占用2個頁面
,只有當對象大小大於8192時
,才可以突破slab_break_gfp_order的限制
。小於等於32MB時BREAK_GFP_ORDER_LO為0。*/
if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
slab_break_gfp_order = BREAK_GFP_ORDER_HI;
/* Bootstrap is tricky, because several objects are allocated
* from caches that do not exist yet:
* 1) initialize the cache_cache cache: it contains the struct
* kmem_cache structures of all caches, except cache_cache itself:
* cache_cache is statically allocated.
* Initially an __init data area is used for the head array and the
* kmem_list3 structures, it's replaced with a kmalloc allocated
* array at the end of the bootstrap.
* 2) Create the first kmalloc cache.
* The struct kmem_cache for the new cache is allocated normally.
* An __init data area is used for the head array.
* 3) Create the remaining kmalloc caches, with minimally sized
* head arrays.
* 4) Replace the __init data head arrays for cache_cache and the first
* kmalloc cache with kmalloc allocated arrays.
* 5) Replace the __init data for kmem_list3 for cache_cache and
* the other cache's with kmalloc allocated memory.
* 6) Resize the head arrays of the kmalloc caches to their final sizes.
*/
node = numa_node_id();
/* 1) create the cache_cache */
/* 第一步,創建struct kmem_cache所在的cache,由全局變量cache_cache指向
,這裡只是初始化數據結構
,並未真正創建這些對象,要待分配時才創建。*/
/* 全局變量cache_chain是內核slab cache鏈表的表頭*/
INIT_LIST_HEAD(&cache_chain);
/* 將cache_cache加入到slab cache鏈表*/
list_add(&cache_cache.next, &cache_chain);
/* 設置cache著色基本單位為cache line的大小:32字節*/
cache_cache.colour_off = cache_line_size();
/* 初始化cache_cache的local cache,同樣這裡也不能使用kmalloc
,需要使用靜態分配的全局變量initarray_cache */
cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
/* 初始化slab鏈表,用全局變量*/
cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
/*
* struct kmem_cache size depends on nr_node_ids, which
* can be less than MAX_NUMNODES.
*/
/* buffer_size保存slab中對象的大小,這裡是計算struct kmem_cache的大小
,nodelists是最後一個成員
,nr_node_ids保存內存節點個數,UMA為1
,所以nodelists偏移加上1個struct kmem_list3 的大小即為struct kmem_cache的大小*/
cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
nr_node_ids * sizeof(struct kmem_list3 *);
#if DEBUG
cache_cache.obj_size = cache_cache.buffer_size;
#endif
/* 將對象大小與cache line大小對齊*/
cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
cache_line_size());
/* 計算對象大小的倒數,用於計算對象在slab中的索引*/
cache_cache.reciprocal_buffer_size =
reciprocal_value(cache_cache.buffer_size);
for (order = 0; order < MAX_ORDER; order++) {
/* 計算cache_cache中的對象數目*/
cache_estimate(order, cache_cache.buffer_size,
cache_line_size(), 0, &left_over, &cache_cache.num);
/* num不為0意味著創建struct kmem_cache對象成功,退出*/
if (cache_cache.num)
break;
}
BUG_ON(!cache_cache.num);
/* gfporder表示本slab包含2^gfporder個頁面*/
cache_cache.gfporder = order;
/* 著色區的大小,以colour_off為單位*/
cache_cache.colour = left_over / cache_cache.colour_off;
/* slab管理對象的大小*/
cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
sizeof(struct slab), cache_line_size());
/* 2+3) create the kmalloc caches */
/* 第二步,創建kmalloc所用的general cache
,kmalloc所用的對象按大小分級
,malloc_sizes保存大小,cache_names保存cache名*/
sizes = malloc_sizes;
names = cache_names;
/*
* Initialize the caches that provide memory for the array cache and the
* kmem_list3 structures first. Without this, further allocations will
* bug.
*/
/* 首先創建struct array_cache和struct kmem_list3所用的general cache
,它們是後續初始化動作的基礎*/
/* INDEX_AC是計算local cache所用的struct arraycache_init對象在kmalloc size中的索引
,即屬於哪一級別大小的general cache
,創建此大小級別的cache為local cache所用*/
sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
sizes[INDEX_AC].cs_size,
ARCH_KMALLOC_MINALIGN,
ARCH_KMALLOC_FLAGS|SLAB_PANIC,
NULL);
/* 如果struct kmem_list3和struct arraycache_init對應的kmalloc size索引不同
,即大小屬於不同的級別
,則創建struct kmem_list3所用的cache,否則共用一個cache */
if (INDEX_AC != INDEX_L3) {
sizes[INDEX_L3].cs_cachep =
kmem_cache_create(names[INDEX_L3].name,
sizes[INDEX_L3].cs_size,
ARCH_KMALLOC_MINALIGN,
ARCH_KMALLOC_FLAGS|SLAB_PANIC,
NULL);
}
/* 創建完上述兩個general cache後,slab early init階段結束,在此之前
,不允許創建外置式slab */
slab_early_init = 0;
/* 循環創建kmalloc各級別的general cache */
while (sizes->cs_size != ULONG_MAX) {
/*
* For performance, all the general caches are L1 aligned.
* This should be particularly beneficial on SMP boxes, as it
* eliminates "false sharing".
* Note for systems short on memory removing the alignment will
* allow tighter packing of the smaller caches.
*/
/* 某級別的kmalloc cache還未創建,創建之,struct kmem_list3和
struct arraycache_init對應的cache已經創建過了*/
if (!sizes->cs_cachep) {
sizes->cs_cachep = kmem_cache_create(names->name,
sizes->cs_size,
ARCH_KMALLOC_MINALIGN,
ARCH_KMALLOC_FLAGS|SLAB_PANIC,
NULL);
}
#ifdef CONFIG_ZONE_DMA
sizes->cs_dmacachep = kmem_cache_create(
names->name_dma,
sizes->cs_size,
ARCH_KMALLOC_MINALIGN,
ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
SLAB_PANIC,
NULL);
#endif
sizes++;
names++;
}
/* 至此,kmalloc general cache已經創建完畢,可以拿來使用了*/
/* 4) Replace the bootstrap head arrays */
/* 第四步,用kmalloc對象替換靜態分配的全局變量
。到目前為止一共使用了兩個全局local cache
,一個是cache_cache的local cache指向initarray_cache.cache
,另一個是malloc_sizes[INDEX_AC].cs_cachep的local cache指向initarray_generic.cache
,參見setup_cpu_cache函數。這裡替換它們。*/
{
struct array_cache *ptr;
/* 申請cache_cache所用local cache的空間*/
ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
/* 復制原cache_cache的local cache,即initarray_cache,到新的位置*/
memcpy(ptr, cpu_cache_get(&cache_cache),
sizeof(struct arraycache_init));
/*
* Do not assume that spinlocks can be initialized via memcpy:
*/
spin_lock_init(&ptr->lock);
/* cache_cache的local cache指向新的位置*/
cache_cache.array[smp_processor_id()] = ptr;
/* 申請malloc_sizes[INDEX_AC].cs_cachep所用local cache的空間*/
ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
!= &initarray_generic.cache);
/* 復制原local cache到新分配的位置,注意此時local cache的大小是固定的*/
memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
sizeof(struct arraycache_init));
/*
* Do not assume that spinlocks can be initialized via memcpy:
*/
spin_lock_init(&ptr->lock);
malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
ptr;
}
/* 5) Replace the bootstrap kmem_list3's */
/* 第五步,與第四步類似,用kmalloc的空間替換靜態分配的slab三鏈*/
{
int nid;
/* UMA只有一個節點*/
for_each_online_node(nid) {
/* 復制struct kmem_cache的slab三鏈*/
init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);
/* 復制struct arraycache_init的slab三鏈*/
init_list(malloc_sizes[INDEX_AC].cs_cachep,
&initkmem_list3[SIZE_AC + nid], nid);
/* 復制struct kmem_list3的slab三鏈*/
if (INDEX_AC != INDEX_L3) {
init_list(malloc_sizes[INDEX_L3].cs_cachep,
&initkmem_list3[SIZE_L3 + nid], nid);
}
}
}
/* 更新slab系統初始化進度*/
g_cpucache_up = EARLY;
}
輔助操作
1,slab三鏈初始化
www.2cto.com
static void kmem_list3_init(struct kmem_list3 *parent)
{
INIT_LIST_HEAD(&parent->slabs_full);
INIT_LIST_HEAD(&parent->slabs_partial);
INIT_LIST_HEAD(&parent->slabs_free);
parent->shared = NULL;
parent->alien = NULL;
parent->colour_next = 0;
spin_lock_init(&parent->list_lock);
parent->free_objects = 0;
parent->free_touched = 0;
}
2,slab三鏈靜態數據初始化
www.2cto.com
/*設置cache的slab三鏈指向靜態分配的全局變量*/
static void __init set_up_list3s(struct kmem_cache *cachep, int index)
{
int node;
/* UMA只有一個節點*/
for_each_online_node(node) {
/* 全局變量initkmem_list3是初始化階段使用的slab三鏈*/
cachep->nodelists[node] = &initkmem_list3[index + node];
/* 設置回收時間*/
cachep->nodelists[node]->next_reap = jiffies +
REAPTIMEOUT_LIST3 +
((unsigned long)cachep) % REAPTIMEOUT_LIST3;
}
}
3,計算每個slab中對象的數目
www.2cto.com
/*
* Calculate the number of objects and left-over bytes for a given buffer size.
*/
/*計算每個slab中對象的數目。*/
/*
1) gfporder:slab由2gfporder個頁面組成。
2) buffer_size:對象的大小。
3) align:對象的對齊方式。
4) flags:內置式slab還是外置式slab。
5) left_over:slab中浪費空間的大小。
6) num:slab中的對象數目。
*/
static void cache_estimate(unsigned long gfporder, size_t buffer_size,
size_t align, int flags, size_t *left_over,
unsigned int *num)
{
int nr_objs;
size_t mgmt_size;
/* slab大小為1<<order個頁面*/
size_t slab_size = PAGE_SIZE << gfporder;
/*
* The slab management structure can be either off the slab or
* on it. For the latter case, the memory allocated for a
* slab is used for:
*
* - The struct slab
* - One kmem_bufctl_t for each object
* - Padding to respect alignment of @align
* - @buffer_size bytes for each object
*
* If the slab management structure is off the slab, then the
* alignment will already be calculated into the size. Because
* the slabs are all pages aligned, the objects will be at the
* correct alignment when allocated.
*/
if (flags & CFLGS_OFF_SLAB) {
/* 外置式slab */
mgmt_size = 0;
/* slab頁面不含slab管理對象,全部用來存儲slab對象*/
nr_objs = slab_size / buffer_size;
/* 對象數不能超過上限*/
if (nr_objs > SLAB_LIMIT)
nr_objs = SLAB_LIMIT;
} else {
/*
* Ignore padding for the initial guess. The padding
* is at most @align-1 bytes, and @buffer_size is at
* least @align. In the worst case, this result will
* be one greater than the number of objects that fit
* into the memory allocation when taking the padding
* into account.
*//* 內置式slab,slab管理對象與slab對象在一起
,此時slab頁面中包含:一個struct slab對象,一個kmem_bufctl_t數組,slab對象。
kmem_bufctl_t數組大小與slab對象數目相同*/
nr_objs = (slab_size - sizeof(struct slab)) /
(buffer_size + sizeof(kmem_bufctl_t));
/*
* This calculated number will be either the right
* amount, or one greater than what we want.
*//* 計算cache line對齊後的大小,如果超出了slab總的大小,則對象數減一*/
if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
> slab_size)
nr_objs--;
if (nr_objs > SLAB_LIMIT)
nr_objs = SLAB_LIMIT;
/* 計算cache line對齊後slab管理對象的大小*/
mgmt_size = slab_mgmt_size(nr_objs, align);
}
*num = nr_objs;/* 保存slab對象數目*/
/* 計算浪費空間的大小*/
*left_over = slab_size - nr_objs*buffer_size - mgmt_size;
}
輔助數據結構與變量
Linux內核中將所有的通用cache以不同的大小存放在數組中,以方便查找。其中malloc_sizes[]數組為cache_sizes類型的數組,存放各個cache的大小;cache_names[]數組為cache_names結構類型數組,存放各個cache大小的名稱;malloc_sizes[]數組和cache_names[]數組下標對應,也就是說cache_names[i]名稱的cache對應的大小為malloc_sizes[i]。
www.2cto.com
/* Size description struct for general caches. */
struct cache_sizes {
size_t cs_size;
struct kmem_cache *cs_cachep;
#ifdef CONFIG_ZONE_DMA
struct kmem_cache *cs_dmacachep;
#endif
};
/*
* These are the default caches for kmalloc. Custom caches can have other sizes.
*/
struct cache_sizes malloc_sizes[] = {
#define CACHE(x) { .cs_size = (x) },
#include <linux/kmalloc_sizes.h>
CACHE(ULONG_MAX)
#undef CACHE
};
www.2cto.com
/* Must match cache_sizes above. Out of line to keep cache footprint low. */
struct cache_names {
char *name;
char *name_dma;
};
static struct cache_names __initdata cache_names[] = {
#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
#include <linux/kmalloc_sizes.h>
{NULL,}
#undef CACHE
};
二、內核啟動末期初始化
1,根據對象大小計算local cache中對象數目上限;
2,借助數據結構ccupdate_struct操作cpu本地cache。為每個在線cpu分配cpu本地cache;
3,用新分配的cpu本地cache替換原有的cache;
4,更新slab三鏈以及cpu本地共享cache。
第二階段代碼分析
Start_kernel()->kmem_cache_init_late()
www.2cto.com
/*Slab系統初始化分兩個部分,先初始化一些基本的,待系統初始化工作進行的差不多時,再配置一些特殊功能。*/
void __init kmem_cache_init_late(void)
{
struct kmem_cache *cachep;
/* 初始化階段local cache的大小是固定的,要根據對象大小重新計算*/
/* 6) resize the head arrays to their final sizes */
mutex_lock(&cache_chain_mutex);
list_for_each_entry(cachep, &cache_chain, next)
if (enable_cpucache(cachep, GFP_NOWAIT))
BUG();
mutex_unlock(&cache_chain_mutex);
/* Done! */
/* 大功告成,general cache終於全部建立起來了*/
g_cpucache_up = FULL;
/* Annotate slab for lockdep -- annotate the malloc caches */
init_lock_keys();
/*
* Register a cpu startup notifier callback that initializes
* cpu_cache_get for all new cpus
*/
/* 注冊cpu up回調函數,cpu up時配置local cache */
register_cpu_notifier(&cpucache_notifier);
/*
* The reap timers are started later, with a module init call: That part
* of the kernel is not yet operational.
*/
}
www.2cto.com
/* Called with cache_chain_mutex held always */
/*local cache 初始化*/
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
{
int err;
int limit, shared;
/*
* The head array serves three purposes:
* - create a LIFO ordering, i.e. return objects that are cache-warm
* - reduce the number of spinlock operations.
* - reduce the number of linked list operations on the slab and
* bufctl chains: array operations are cheaper.
* The numbers are guessed, we should auto-tune as described by
* Bonwick.
*/ /* 根據對象大小計算local cache中對象數目上限*/
if (cachep->buffer_size > 131072)
limit = 1;
else if (cachep->buffer_size > PAGE_SIZE)
limit = 8;
else if (cachep->buffer_size > 1024)
limit = 24;
else if (cachep->buffer_size > 256)
limit = 54;
else
limit = 120;
/*
* CPU bound tasks (e.g. network routing) can exhibit cpu bound
* allocation behaviour: Most allocs on one cpu, most free operations
* on another cpu. For these cases, an efficient object passing between
* cpus is necessary. This is provided by a shared array. The array
* replaces Bonwick's magazine layer.
* On uniprocessor, it's functionally equivalent (but less efficient)
* to a larger limit. Thus disabled by default.
*/
shared = 0;
/* 多核系統,設置shared local cache中對象數目*/
if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)
shared = 8;
#if DEBUG
/*
* With debugging enabled, large batchcount lead to excessively long
* periods with disabled local interrupts. Limit the batchcount
*/
if (limit > 32)
limit = 32;
#endif
/* 配置local cache */
err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
if (err)
printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
cachep->name, -err);
return err;
}
www.2cto.com
/* Always called with the cache_chain_mutex held */
/*配置local cache、shared local cache和slab三鏈*/
static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
int batchcount, int shared, gfp_t gfp)
{
struct ccupdate_struct *new;
int i;
new = kzalloc(sizeof(*new), gfp);
if (!new)
return -ENOMEM;
/* 為每個cpu分配新的struct array_cache對象*/
for_each_online_cpu(i) {
new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
batchcount, gfp);
if (!new->new[i]) {
for (i--; i >= 0; i--)
kfree(new->new[i]);
kfree(new);
return -ENOMEM;
}
}
new->cachep = cachep;
/* 用新的struct array_cache對象替換舊的struct array_cache對象
,在支持cpu熱插拔的系統上,離線cpu可能沒有釋放local cache
,使用的仍是舊local cache,參見__kmem_cache_destroy函數
。雖然cpu up時要重新配置local cache,也無濟於事。考慮下面的情景
:共有Cpu A和Cpu B,Cpu B down後,destroy Cache X,由於此時Cpu B是down狀態
,所以Cache X中Cpu B的local cache未釋放,過一段時間Cpu B又up了
,更新cache_chain 鏈中所有cache的local cache,但此時Cache X對象已經釋放回
cache_cache中了,其Cpu B local cache並未被更新。又過了一段時間
,系統需要創建新的cache,將Cache X對象分配出去,其Cpu B仍然是舊的
local cache,需要進行更新。
*/
on_each_cpu(do_ccupdate_local, (void *)new, 1);
check_irq_on();
cachep->batchcount = batchcount;
cachep->limit = limit;
cachep->shared = shared;
/* 釋放舊的local cache */
for_each_online_cpu(i) {
struct array_cache *ccold = new->new[i];
if (!ccold)
continue;
spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
/* 釋放舊local cache中的對象*/
free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
/* 釋放舊的struct array_cache對象*/
kfree(ccold);
}
kfree(new);
/* 初始化shared local cache 和slab三鏈*/
return alloc_kmemlist(cachep, gfp);
}
更新本地cache
www.2cto.com
/*更新每個cpu的struct array_cache對象*/
static void do_ccupdate_local(void *info)
{
struct ccupdate_struct *new = info;
struct array_cache *old;
check_irq_off();
old = cpu_cache_get(new->cachep);
/* 指向新的struct array_cache對象*/
new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
/* 保存舊的struct array_cache對象*/
new->new[smp_processor_id()] = old;
}
www.2cto.com
/*初始化shared local cache和slab三鏈,初始化完成後,slab三鏈中沒有任何slab*/
static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
{
int node;
struct kmem_list3 *l3;
struct array_cache *new_shared;
struct array_cache **new_alien = NULL;
for_each_online_node(node) {
/* NUMA相關*/
if (use_alien_caches) {
new_alien = alloc_alien_cache(node, cachep->limit, gfp);
if (!new_alien)
goto fail;
}
new_shared = NULL;
if (cachep->shared) {
/* 分配shared local cache */
new_shared = alloc_arraycache(node,
cachep->shared*cachep->batchcount,
0xbaadf00d, gfp);
if (!new_shared) {
free_alien_cache(new_alien);
goto fail;
}
}
/* 獲得舊的slab三鏈*/
l3 = cachep->nodelists[node];
if (l3) {
/* 就slab三鏈指針不為空,需要先釋放舊的資源*/
struct array_cache *shared = l3->shared;
spin_lock_irq(&l3->list_lock);
/* 釋放舊的shared local cache中的對象*/
if (shared)
free_block(cachep, shared->entry,
shared->avail, node);
/* 指向新的shared local cache */
l3->shared = new_shared;
if (!l3->alien) {
l3->alien = new_alien;
new_alien = NULL;
}/* 計算cache中空閒對象的上限*/
l3->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
spin_unlock_irq(&l3->list_lock);
/* 釋放舊shared local cache的struct array_cache對象*/
kfree(shared);
free_alien_cache(new_alien);
continue;/*訪問下一個節點*/
}
/* 如果沒有舊的l3,分配新的slab三鏈*/
l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);
if (!l3) {
free_alien_cache(new_alien);
kfree(new_shared);
goto fail;
}
/* 初始化slab三鏈*/
kmem_list3_init(l3);
l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
((unsigned long)cachep) % REAPTIMEOUT_LIST3;
l3->shared = new_shared;
l3->alien = new_alien;
l3->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
cachep->nodelists[node] = l3;
}
return 0;
fail:
if (!cachep->next.next) {
/* Cache is not active yet. Roll back what we did */
node--;
while (node >= 0) {
if (cachep->nodelists[node]) {
l3 = cachep->nodelists[node];
kfree(l3->shared);
free_alien_cache(l3->alien);
kfree(l3);
cachep->nodelists[node] = NULL;
}
node--;
}
}
return -ENOMEM;
}
看一個輔助函數
/*分配struct array_cache對象。*/
static struct array_cache *alloc_arraycache(int node, int entries,
int batchcount, gfp_t gfp)
{
/* struct array_cache後面緊接著的是entry數組,合在一起申請內存*/
int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
struct array_cache *nc = NULL;
/* 分配一個local cache對象,kmalloc從general cache中分配*/
nc = kmalloc_node(memsize, gfp, node);
/*
* The array_cache structures contain pointers to free object.
* However, when such objects are allocated or transfered to another
* cache the pointers are not cleared and they could be counted as
* valid references during a kmemleak scan. Therefore, kmemleak must
* not scan such objects.
*/
kmemleak_no_scan(nc);
/* 初始化local cache */
if (nc) {
nc->avail = 0;
nc->limit = entries;
nc->batchcount = batchcount;
nc->touched = 0;
spin_lock_init(&nc->lock);
}
return nc;
}
源代碼中涉及了slab的分配、釋放等操作在後面分析中陸續總結。slab相關數據結構、工作機制以及整體框架在分析完了slab的創建、釋放工作後再做總結,這樣可能會對slab機制有更好的了解。當然,從代碼中看運行機制會更有說服了,也是一種習慣
摘自 bullbat的專欄