Linux內核中創建slab主要由函數cache_grow()實現,從slab的創建中我們可以完整地看到slab與對象、頁面的組織方式。
www.2cto.com
/*
* Grow (by 1) the number of slabs within a cache. This is called by
* kmem_cache_alloc() when there are no active objs left in a cache.
*/
/*使用一個或多個頁面創建一個空slab。
objp:頁面虛擬地址,為空表示還未申請內存頁,不為空
,說明已申請內存頁,可直接用來創建slab*/
static int cache_grow(struct kmem_cache *cachep,
gfp_t flags, int nodeid, void *objp)
{
struct slab *slabp;
size_t offset;
gfp_t local_flags;
struct kmem_list3 *l3;
/*
* Be lazy and only check for valid flags here, keeping it out of the
* critical path in kmem_cache_alloc().
*/
BUG_ON(flags & GFP_SLAB_BUG_MASK);
local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
/* Take the l3 list lock to change the colour_next on this node */
check_irq_off();
/* 獲得本內存節點的slab三鏈*/
l3 = cachep->nodelists[nodeid];
spin_lock(&l3->list_lock);
/* Get colour for the slab, and cal the next value. */
/* 獲得本slab的著色區偏移*/
offset = l3->colour_next;
/* 更新著色區偏移,使不同slab的著色偏移不同*/
l3->colour_next++;
/* 不能超過著色區的總大小,如果超過了,重置為0。這就是前面分析過的著色循環問題
。事實上,如果slab中浪費的空間很少,那麼很快就會循環一次。*/
if (l3->colour_next >= cachep->colour)
l3->colour_next = 0;
spin_unlock(&l3->list_lock);
/* 將著色單位區間的個數轉換為著色區大小*/
offset *= cachep->colour_off;
if (local_flags & __GFP_WAIT)
local_irq_enable();
/*
* The test for missing atomic flag is performed here, rather than
* the more obvious place, simply to reduce the critical path length
* in kmem_cache_alloc(). If a caller is seriously mis-behaving they
* will eventually be caught here (where it matters).
*/
kmem_flagcheck(cachep, flags);
/*
* Get mem for the objs. Attempt to allocate a physical page from
* 'nodeid'.
*/
if (!objp)/* 還未分配頁面,從本內存節點分配1<<cachep->gfporder個頁面
,objp為slab首頁面的虛擬地址*/
objp = kmem_getpages(cachep, local_flags, nodeid);
if (!objp)
goto failed;
/* Get slab management. */
/* 分配slab管理對象*/
slabp = alloc_slabmgmt(cachep, objp, offset,
local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
if (!slabp)
goto opps1;
/* 設置page到cache、slab的映射*/
slab_map_pages(cachep, slabp, objp);
/* 初始化slab中的對象*/
cache_init_objs(cachep, slabp);
if (local_flags & __GFP_WAIT)
local_irq_disable();
check_irq_off();
spin_lock(&l3->list_lock);
/* Make slab active. */
list_add_tail(&slabp->list, &(l3->slabs_free));
/* 更新本cache增長計數*/
STATS_INC_GROWN(cachep);
/* 更新slab鏈表中空閒對象計數*/
l3->free_objects += cachep->num;
spin_unlock(&l3->list_lock);
return 1;
opps1:
kmem_freepages(cachep, objp);
failed:
if (local_flags & __GFP_WAIT)
local_irq_disable();
return 0;
}
執行流程:
1,從cache結構中獲得並計算著色區偏移量;
2,從伙伴系統中獲得1<<cachep->gfporder個頁面用於slab;
3,初始化slab中相關變量,如果是外置式slab需要從新申請slab管理區的空間,由函數alloc_slabmgmt()實現。
www.2cto.com
/*分配slab管理對象*/
static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
int colour_off, gfp_t local_flags,
int nodeid)
{
struct slab *slabp;
if (OFF_SLAB(cachep)) {
/* Slab management obj is off-slab. */
/* 外置式slab。從general slab cache中分配一個管理對象,
slabp_cache指向保存有struct slab對象的general slab cache。
slab初始化階段general slab cache可能還未創建,slabp_cache指針為空
,故初始化階段創建的slab均為內置式slab。*/
slabp = kmem_cache_alloc_node(cachep->slabp_cache,
local_flags, nodeid);
/*
* If the first object in the slab is leaked (it's allocated
* but no one has a reference to it), we want to make sure
* kmemleak does not treat the ->s_mem pointer as a reference
* to the object. Otherwise we will not report the leak.
*//* 對第一個對象做檢查*/
kmemleak_scan_area(slabp, offsetof(struct slab, list),
sizeof(struct list_head), local_flags);
if (!slabp)
return NULL;
} else {/* 內置式slab。objp為slab首頁面的虛擬地址,加上著色偏移
,得到slab管理對象的虛擬地址*/
slabp = objp + colour_off;
/* 計算slab中第一個對象的頁內偏移,slab_size保存slab管理對象的大小
,包含struct slab對象和kmem_bufctl_t數組*/
colour_off += cachep->slab_size;
} /* 在用(已分配)對象數為0 */
slabp->inuse = 0;
/* 第一個對象的頁內偏移,可見對於內置式slab,colouroff成員不僅包括著色區
,還包括管理對象占用的空間
,外置式slab,colouroff成員只包括著色區。*/
slabp->colouroff = colour_off;
/* 第一個對象的虛擬地址*/
slabp->s_mem = objp + colour_off;
/* 內存節點ID */
slabp->nodeid = nodeid;
/* 第一個空閒對象索引為0,即kmem_bufctl_t數組的第一個元素*/
slabp->free = 0;
return slabp;
}
通過初始化,我們畫出下面圖像。
4,設置slab中頁面(1<<cachep->gfporder個)到slab、cache的映射。這樣,可以通過page的lru鏈表找到page所屬的slab和cache。slab_map_pages()實現
www.2cto.com
/*設置page到cache、slab的指針,這樣就能知道頁面所在的cache、slab
addr:slab首頁面虛擬地址*/
static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
void *addr)
{
int nr_pages;
struct page *page;
/* 獲得slab首頁面*/
page = virt_to_page(addr);
nr_pages = 1;
/* 如果不是大頁面(關於大頁面請參閱相關文檔)
,計算頁面的個數*/
if (likely(!PageCompound(page)))
nr_pages <<= cache->gfporder;
do {
/* struct page結構中的lru根據頁面的用途有不同的含義
,當頁面空閒或用於高速緩存時,
lru成員用於構造雙向鏈表將page串聯起來,而當page用於slab時,
next指向page所在的cache,prev指向page所在的slab */
page_set_cache(page, cache);
page_set_slab(page, slab);
page++;
} while (--nr_pages);
}
代碼實現結果如下圖
5,初始化slab中kmem_bufctl_t[]數組,其中kmem_bufctl_t[]數組為一個靜態鏈表,指定了slab對象(obj)的訪問順序。即kmem_bufctl_t[]中存放的是下一個訪問的obj。在後面分析中slab_get_obj()函數從slab中提取一個空閒對象,他通過index_to_obj()函數找到空閒對象在kmem_bufctl_t[]數組中的下標,然後通過slab_bufctl(slabp)[slabp->free]獲得下一個空閒對象的索引並用它更新靜態鏈表。
www.2cto.com
/*初始化slab中的對象,主要是通過kmem_bufctl_t數組將對象串聯起來*/
static void cache_init_objs(struct kmem_cache *cachep,
struct slab *slabp)
{
int i;
/* 逐一初始化slab中的對象*/
for (i = 0; i < cachep->num; i++) {
/* 獲得slab中第i個對象*/
void *objp = index_to_obj(cachep, slabp, i);
#if DEBUG
/* need to poison the objs? */
if (cachep->flags & SLAB_POISON)
poison_obj(cachep, objp, POISON_FREE);
if (cachep->flags & SLAB_STORE_USER)
*dbg_userword(cachep, objp) = NULL;
if (cachep->flags & SLAB_RED_ZONE) {
*dbg_redzone1(cachep, objp) = RED_INACTIVE;
*dbg_redzone2(cachep, objp) = RED_INACTIVE;
}
/*
* Constructors are not allowed to allocate memory from the same
* cache which they are a constructor for. Otherwise, deadlock.
* They must also be threaded.
*/
if (cachep->ctor && !(cachep->flags & SLAB_POISON))
cachep->ctor(objp + obj_offset(cachep));
if (cachep->flags & SLAB_RED_ZONE) {
if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
slab_error(cachep, "constructor overwrote the"
" end of an object");
if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
slab_error(cachep, "constructor overwrote the"
" start of an object");
}
if ((cachep->buffer_size % PAGE_SIZE) == 0 &&
OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
kernel_map_pages(virt_to_page(objp),
cachep->buffer_size / PAGE_SIZE, 0);
#else
/* 調用此對象的構造函數*/
if (cachep->ctor)
cachep->ctor(objp);
#endif /* 初始時所有對象都是空閒的,只需按照數組順序串起來即可*/
/*相當於靜態索引指針*/
slab_bufctl(slabp)[i] = i + 1;
}
/* 最後一個指向BUFCTL_END */
slab_bufctl(slabp)[i - 1] = BUFCTL_END;
}