1,mbuf就是存儲要發送數據的memery buf,類似於skb_buf.不過結構比較簡單。
/* header at beginning of each mbuf: */
這個結構用來描述mbuf跟具體的內容無關
struct m_hdr {
struct mbuf *mh_next; /* next buffer in chain */
struct mbuf *mh_nextpkt; /* next chain in queue/record */
int mh_len; /* amount of data in this mbuf */
caddr_t mh_data; /* location of data */
short mh_type; /* type of data in this mbuf */
short mh_flags; /* flags; see below */
};
/* record/packet header in first mbuf of chain; valid if M_PKTHDR set */
對mbuf中數據的描述,len和接收接口
struct pkthdr {
int len; /* total packet length */
struct ifnet *rcvif; /* rcv interface */
};
/* description of external storage mapped into mbuf, valid if M_EXT set */
struct m_ext {
caddr_t ext_buf; /* start of buffer */
void (*ext_free)(); /* free routine if not the usual */
u_int ext_size; /* size of buffer, for ext_free */
};
這個就是mbuf的描述,設計的比較巧妙
struct mbuf {
struct m_hdr m_hdr;
union {
struct {
struct pkthdr MH_pkthdr; /* M_PKTHDR set */
union {
struct m_ext MH_ext; /* M_EXT set */
char MH_databuf[MHLEN];
} MH_dat;
} MH;
char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */
} M_dat;
};
2,mbuf中mhdr.md_flags:
/* mbuf flags */
一個mbuf的大小是128字節,猜測是一個cacheline的大小。如果數據比較多,就需要多個mbuf連起來或者用一個叫cluster的東西來存儲數據。M_EXT就是這個標志
#define M_EXT 0x0001 /* has associated external storage */
表明分組的第一個mbuf,在數據區中有pkthdr
#define M_PKTHDR 0x0002 /* start of record */
表明記錄的尾部,TCP是一個字節流,不設置這個標志
#define M_EOR 0x0004 /* end of record */
/* mbuf pkthdr flags, also in m_flags */
#define M_BCAST 0x0100 /* send/received as link-level broadcast */
#define M_MCAST 0x0200 /* send/received as link-level multicast */
/* flags copied when copying m_pkthdr */
這個具體干嘛用的不懂。。。
#define M_COPYFLAGS (M_PKTHDR|M_EOR|M_BCAST|M_MCAST)
3,mbuf的種類
/* mbuf types */
#define MT_FREE 0 /* should be on free list */
#define MT_DATA 1 /* dynamic (data) allocation */ 數據就是這個類型
#define MT_HEADER 2 /* packet header */
#define MT_SOCKET 3 /* socket structure */
#define MT_PCB 4 /* protocol control block */
#define MT_RTABLE 5 /* routing tables */
#define MT_HTABLE 6 /* IMP host tables */
#define MT_ATABLE 7 /* address resolution tables */
#define MT_SONAME 8 /* socket name */
#define MT_SOOPTS 10 /* socket options */
#define MT_FTABLE 11 /* fragment reassembly header */
#define MT_RIGHTS 12 /* access rights */
#define MT_IFADDR 13 /* interface address */
#define MT_CONTROL 14 /* extra-data protocol message */
#define MT_OOBDATA 15 /* expedited data */
4,mbuf相關函數
4.1mbuf的分配
/*
* mbuf allocation/deallocation macros:
*
* MGET(struct mbuf *m, int how, int type)
* allocates an mbuf and initializes it to contain internal data.
*
* MGETHDR(struct mbuf *m, int how, int type)
* allocates an mbuf and initializes it to contain a packet header
* and internal data.
*/
#define MGET(m, how, type) { \
mbtypes[type]把mbuf的type轉換成MALLOC需要的type,如M_MBUF,M_SOCKET等
MALLOC((m), struct mbuf *, MSIZE, mbtypes[type], (how)); \
if (m) { \
(m)->m_type = (type); \
MBUFLOCK改變處理器優先級,防止被網絡處理器中斷,共享資源的保護
MBUFLOCK(mbstat.m_mtypes[type]++;) \
(m)->m_next = (struct mbuf *)NULL; \
(m)->m_nextpkt = (struct mbuf *)NULL; \
#define m_dat M_dat.M_databuf 為pkthdr和m_ext預留了空間
(m)->m_data = (m)->m_dat; \
(m)->m_flags = 0; \
} else \
嘗試重新分配,一個主要的問題,分配的內存從哪裡來?詳見後面
(m) = m_retry((how), (type)); \
}
/*
* When MGET failes, ask protocols to free space when short of memory,
* then re-attempt to allocate an mbuf.
*/
struct mbuf *
m_retry(i, t)
int i, t;
{
register struct mbuf *m;
調用協議的注冊函數釋放內存
m_reclaim();
把m_retrydefine成NULL這樣就直接返回NULL了,但這裡怎麼保證這個MGET中m_retry返回的是NULL,而上一個返回的是這個函數???????#define在預編譯期間就做替換了。
這個的關鍵就是MGET是一個宏,而不是函數。
#define m_retry(i, t) (struct mbuf *)0
MGET(m, i, t);
#undef m_retry
return (m);
}
這個函數循環調用協議的drain函數分配內存
m_reclaim()
{
register struct domain *dp;
register struct protosw *pr;
提升處理器的優先級不被網絡處理中斷
int s = splimp();
for (dp = domains; dp; dp = dp->dom_next)
for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
if (pr->pr_drain)
(*pr->pr_drain)();
恢復處理器的優先級
splx(s);
mbstat.m_drain++;
}
分配一個分組頭部的mbuf,對m_data和m_flags進行初始化
#define MGETHDR(m, how, type) { \
MALLOC((m), struct mbuf *, MSIZE, mbtypes[type], (how)); \
if (m) { \
(m)->m_type = (type); \
MBUFLOCK(mbstat.m_mtypes[type]++;) \
(m)->m_next = (struct mbuf *)NULL; \
(m)->m_nextpkt = (struct mbuf *)NULL; \
(m)->m_data = (m)->m_pktdat; \
(m)->m_flags = M_PKTHDR; \
} else \
(m) = m_retryhdr((how), (type)); \
}
587 /*
588 * Routine to copy from device local memory into mbufs.
589 */
590 struct mbuf *
591 m_devget(buf, totlen, off0, ifp, copy)
592 char *buf;
593 int totlen, off0;
594 struct ifnet *ifp;
595 void (*copy)();
這個函數是對MGET和MGETHDR的封裝,一般由設備驅動程序調用,分配mbuf空間。
1,如果數據長度《84,則在數據(IP數據包)的前面保留16個字節。為輸出時添加14字節的MAC包頭准備。(一個包含pak_hdr的mbuf最多放100字節的數據)
2,如果數據》85 && 數據《100則不額外保留這16字節的數據
3,如果數據》100,則分配一個cluster進行數據的存放。
可見m_devget根據數據的長度,分配合適的mbuf
4.2mbuf到mbuf中data的轉換
定義了兩個宏
56 * mtod(m,t) - convert mbuf pointer to data pointer of correct type
57 * dtom(x) - convert data pointer within mbuf to mbuf pointer (XXX)
61 #define mtod(m,t) ((t)((m)->m_data))
MSIZE == 128 這個基於mbuf是128字節對齊
62 #define dtom(x) ((struct mbuf *)((int)(x) & ~(MSIZE-1)))
dotm對cluster的數據有問題,不能正常轉換到mbuf,所以需要下面的函數
/*
* Rearange an mbuf chain so that len bytes are contiguous
* and in the data area of an mbuf (so that mtod and dtom
* will work for a structure of size len). Returns the resulting
* mbuf chain on success, frees it and returns null on failure.
* If there is room, it will add up to max_protohdr-len extra bytes to the
* contiguous region in an attempt to avoid being called next time.
*/
這個函數從mbuf鏈表中取出len字節的數據放在第一個mbuf中,使dtom能正確運行
struct mbuf *
m_pullup(n, len)
register struct mbuf *n;
int len;
{
register struct mbuf *m;
register int count;
int space;
/*
* If first mbuf has no cluster, and has room for len bytes
* without shifting current data, pullup into it,
* otherwise allocate a new mbuf to prepend to the chain.
*/
if ((n->m_flags & M_EXT) == 0 &&
n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
if (n->m_len >= len)
return (n);
m = n;
n = n->m_next;
len -= m->m_len;
} else {
if (len > MHLEN)
goto bad;
MGET(m, M_DONTWAIT, n->m_type);
if (m == 0)
goto bad;
m->m_len = 0;
if (n->m_flags & M_PKTHDR) {
M_COPY_PKTHDR(m, n);
n->m_flags &= ~M_PKTHDR;
}
}
space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
do {
count = min(min(max(len, max_protohdr), space), n->m_len);
bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
(unsigned)count);
len -= count;
m->m_len += count;
n->m_len -= count;
space -= count;
if (n->m_len)
n->m_data += count;
else
n = m_free(n);
} while (len > 0 && n);
if (len > 0) {
(void) m_free(m);
goto bad;
}
m->m_next = n;
return (m);
bad:
m_freem(n);
MPFail++;
return (0);
}
有些細節的東西還不明確,還需要進一步整理。
TCP/IP詳解2 學習筆記2---ifnet ifaddr http://www.linuxidc.com/Linux/2014-11/109289.htm