網卡驅動架構分析:
1. Linux網絡子系統


2. 重要數據結構



總結一下三個重要的數據結構:
2.1. net_device
2.2. net_device_ops
2.3. sk_buff
3. 網卡驅動架構分析
CS8900.c //早期2410使用的網卡芯片
3.1. 網卡初始化
首先找到驅動程序的入口:
早期的驅動入口並不是module_init()函數,而是init_module,所以找到這個函數
int __init init_module(void)
{
struct net_device *dev = alloc_etherdev(sizeof(struct net_local));
struct net_local *lp;
int ret = 0;
#if DEBUGGING
net_debug = debug;
#else
debug = 0;
#endif
if (!dev)
return -ENOMEM;
dev->irq = irq;
dev->base_addr = io;
lp = netdev_priv(dev);
#if ALLOW_DMA
if (use_dma) {
lp->use_dma = use_dma;
lp->dma = dma;
lp->dmasize = dmasize;
}
#endif
spin_lock_init(&lp->lock);
/* boy, they'd better get these right */
if (!strcmp(media, "rj45"))
lp->adapter_cnf = A_CNF_MEDIA_10B_T | A_CNF_10B_T;
else if (!strcmp(media, "aui"))
lp->adapter_cnf = A_CNF_MEDIA_AUI | A_CNF_AUI;
else if (!strcmp(media, "bnc"))
lp->adapter_cnf = A_CNF_MEDIA_10B_2 | A_CNF_10B_2;
else
lp->adapter_cnf = A_CNF_MEDIA_10B_T | A_CNF_10B_T;
if (duplex==-1)
lp->auto_neg_cnf = AUTO_NEG_ENABLE;
if (io == 0) {
printk(KERN_ERR "cs89x0.c: Module autoprobing not allowed.\n");
printk(KERN_ERR "cs89x0.c: Append io=0xNNN\n");
ret = -EPERM;
goto out;
} else if (io <= 0x1ff) {
ret = -ENXIO;
goto out;
}

第一步:分配net_device結構,
第二步:初始化net_device結構,
dev->irq = irq;//分配中斷號
dev->base_addr = io;//設備基地址
lp = netdev_priv(dev);
第三步:
ret = cs89x0_probe1(dev, io, 1);\\這一步其實也是初始化硬件的!還有一部分是對device結構進行一些初始化這個函數比較長就不貼代碼了,其中一行比較重要:
dev->netdev_ops = &net_ops; \\這個是對netdev_ops成員進行初始化

最後一步注冊網卡驅動!上圖中第二個紅色箭頭所指向的地方!
總結一下上圖:

3.2. 網卡數據的發送
這個結合前面的經驗,找到網卡的函數操作集結構:

可以看到這個成員函數的名字叫做:net_send_packet
static netdev_tx_t net_send_packet(struct sk_buff *skb,struct net_device *dev)
{
struct net_local *lp = netdev_priv(dev);
unsigned long flags;
if (net_debug > 3) {
printk("%s: sent %d byte packet of type %x\n",
dev->name, skb->len,
(skb->data[ETH_ALEN+ETH_ALEN] << 8) | skb->data[ETH_ALEN+ETH_ALEN+1]);
}
/* keep the upload from being interrupted, since we
ask the chip to start transmitting before the
whole packet has been completely uploaded. */
spin_lock_irqsave(&lp->lock, flags);
netif_stop_queue(dev);//1. 網卡驅動在向上層發送數據的時候暫時停止接收上層發來的數據
/* initiate a transmit sequence */
writeword(dev->base_addr, TX_CMD_PORT, lp->send_cmd);//2. 將skb中的數據寫入寄存器
writeword(dev->base_addr, TX_LEN_PORT, skb->len);
/* Test to see if the chip has allocated memory for the packet */
if ((readreg(dev, PP_BusST) & READY_FOR_TX_NOW) == 0) {
/*
* Gasp! It hasn't. But that shouldn't happen since
* we're waiting for TxOk, so return 1 and requeue this packet.
*/
spin_unlock_irqrestore(&lp->lock, flags);
if (net_debug) printk("cs89x0: Tx buffer not free!\n");
return NETDEV_TX_BUSY;
}
/* Write the contents of the packet */
writewords(dev->base_addr, TX_FRAME_PORT,skb->data,(skb->len+1) >>1);
spin_unlock_irqrestore(&lp->lock, flags);
dev->stats.tx_bytes += skb->len;
dev_kfree_skb (skb);//3. 釋放skb結構
//發送數據完後,網卡會產生一個中斷
return NETDEV_TX_OK;
}
產生一個中斷這個可以查查request_irq函數,在這個函數被調用的地方可以看到這樣的一行代碼:
ret = request_irq(dev->irq, net_interrupt, 0, dev->name, dev);

可以看到這裡調用了net_initerupt函數,網卡發送和接收中斷!

一個是發送中斷,一個是接收中斷,
netif_wake_queue(dev);/* Inform upper layers. */ // 這行代碼表示在發送中斷處理過程中,通知上層協議,可以再次向網卡傳輸數據。
3.3. 網卡數據的接收
網卡數據的接收入口是在中斷中完成的,這個是從中斷函數中可以看到net_interrupt

接收中斷處理函數net_rx(dev)
/* We have a good packet(s), get it/them out of the buffers. */
static void
net_rx(struct net_device *dev)
{
struct sk_buff *skb;
int status, length;
int ioaddr = dev->base_addr;
status = readword(ioaddr, RX_FRAME_PORT);//讀取寄存器,網卡接收狀態
length = readword(ioaddr, RX_FRAME_PORT);//網卡接收字節長度
if ((status & RX_OK) == 0) {
count_rx_errors(status, dev);
return;
}
/* Malloc up new buffer. */
skb = dev_alloc_skb(length + 2);//分配skb結構 +2字節空間是為頭預留的
if (skb == NULL) {
#if 0 /* Again, this seems a cruel thing to do */
printk(KERN_WARNING "%s: Memory squeeze, dropping packet.\n", dev->name);
#endif
dev->stats.rx_dropped++;
return;
}
skb_reserve(skb, 2); /* longword align L3 header */
readwords(ioaddr, RX_FRAME_PORT, skb_put(skb, length), length >> 1);//將收到的數據填充入skb
if (length & 1)
skb->data[length-1] = readword(ioaddr, RX_FRAME_PORT);
if (net_debug > 3) {
printk( "%s: received %d byte packet of type %x\n",
dev->name, length,
(skb->data[ETH_ALEN+ETH_ALEN] << 8) | skb->data[ETH_ALEN+ETH_ALEN+1]);
}
skb->protocol=eth_type_trans(skb,dev);
netif_rx(skb);//將skb提交到協議棧
dev->stats.rx_packets++;
dev->stats.rx_bytes += length;
}

回環網卡驅動設計:

使用ifocnfig,可以看到除了eth0還有一個l0, eth0代表的是一個物理網卡,l0代表的就是回環網卡,從上面的打印信息可以看到l0的IP地址是127.0.0.1,可以看到當ping 127.0.0.x的時候能ping通,其實l0就是網卡的tx和rx在軟件層的短接!所以才叫做回環網卡!
其實內核代碼中也可以找到回環網卡的驅動!Lookback.c
這個文件中的代碼部分其實不是內核模塊,而是由其它部分的調用的!
刪掉內核代碼中的原有的loopback.c,結合上面的的分析和原有源碼的分析重寫編寫loopback.c,
#include#include #include #include #include #include #include #include #include /* For the statistics structure. */ unsigned long bytes = 0; unsigned long packets = 0;//skb包 static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)//數據發送 { skb->protocol = eth_type_trans(skb,dev);//表明skb包的協議 以太網協議 bytes += skb->len;//發送的數據量 packets++; //發送的數據包也要加一 netif_rx(skb);//將skb向回送, 回環網卡驅動就是這實現的,這是很關鍵的一步 return 0; } static struct net_device_stats *loopback_get_stats(struct net_device *dev)//獲取網卡狀態 { struct net_device_stats *stats = &dev->stats;//首先把state這個成員取出來 stats->rx_packets = packets;//表示網卡收到了多少個包 stats->tx_packets = packets;//表示網卡發送了多少個包 stats->rx_bytes = bytes;//表示網卡接收到了多少個字節 stats->tx_bytes = bytes; return stats;//返回狀態 } static const struct net_device_ops loopback_ops = {//定義一個net_device_ops 結構 .ndo_start_xmit= loopback_xmit,//發送指針 .ndo_get_stats = loopback_get_stats,//獲取網卡狀態的函數 }; static void loopback_setup(struct net_device *dev)//初始化設置操作 { dev->mtu = (16 * 1024) + 20 + 20 + 12;//網卡最大接收包的尺寸:16K + TCP頭 + IP頭 + 以太網頭 dev->flags = IFF_LOOPBACK;//回環網卡專有標志 這是一個宏內核代碼可查 dev->header_ops = ð_header_ops;//這個是網絡包的函數操作集,內核可以看這個成員的數據結構 dev->netdev_ops = &loopback_ops;//網卡所支持操作的集合 } static __net_init int loopback_net_init(struct net *net) { struct net_device *dev; int err; err = -ENOMEM; dev = alloc_netdev(0, "lo", loopback_setup);//分配一個net_device結構,loopback為一個初始化函數 if (!dev) goto out; err = register_netdev(dev);//注冊網卡驅動程序 if (err) goto out_free_netdev; net->loopback_dev = dev; return 0; out_free_netdev: free_netdev(dev); out: if (net == &init_net) panic("loopback: Failed to register netdevice: %d\n", err); return err; } static __net_exit void loopback_net_exit(struct net *net) { struct net_device *dev = net->loopback_dev; unregister_netdev(dev);//注銷網卡驅動程序 } /* Registered in net/core/dev.c */ struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, .exit = loopback_net_exit, };

上面的回環網卡驅動有點問題,ping不同!
這是能ping通的內核自帶的源碼:
#include#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* For the statistics structure. */ #include /* For ARPHRD_ETHER */ #include #include #include #include #include struct pcpu_lstats { u64 packets; u64 bytes; struct u64_stats_sync syncp; }; /* * The higher levels take care of making this non-reentrant (it's * called with bh's disabled). */ static netdev_tx_t loopback_xmit(struct sk_buff *skb, struct net_device *dev) { struct pcpu_lstats *lb_stats; int len; skb_orphan(skb); skb->protocol = eth_type_trans(skb, dev); /* it's OK to use per_cpu_ptr() because BHs are off */ lb_stats = this_cpu_ptr(dev->lstats); len = skb->len; if (likely(netif_rx(skb) == NET_RX_SUCCESS)) { u64_stats_update_begin(&lb_stats->syncp); lb_stats->bytes += len; lb_stats->packets++; u64_stats_update_end(&lb_stats->syncp); } return NETDEV_TX_OK; } static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { u64 bytes = 0; u64 packets = 0; int i; for_each_possible_cpu(i) { const struct pcpu_lstats *lb_stats; u64 tbytes, tpackets; unsigned int start; lb_stats = per_cpu_ptr(dev->lstats, i); do { start = u64_stats_fetch_begin(&lb_stats->syncp); tbytes = lb_stats->bytes; tpackets = lb_stats->packets; } while (u64_stats_fetch_retry(&lb_stats->syncp, start)); bytes += tbytes; packets += tpackets; } stats->rx_packets = packets; stats->tx_packets = packets; stats->rx_bytes = bytes; stats->tx_bytes = bytes; return stats; } static u32 always_on(struct net_device *dev) { return 1; } static const struct ethtool_ops loopback_ethtool_ops = { .get_link = always_on, }; static int loopback_dev_init(struct net_device *dev) { dev->lstats = alloc_percpu(struct pcpu_lstats); if (!dev->lstats) return -ENOMEM; return 0; } static void loopback_dev_free(struct net_device *dev) { free_percpu(dev->lstats); free_netdev(dev); } static const struct net_device_ops loopback_ops = { .ndo_init = loopback_dev_init, .ndo_start_xmit= loopback_xmit, .ndo_get_stats64 = loopback_get_stats64, }; /* * The loopback device is special. There is only one instance * per network namespace. */ static void loopback_setup(struct net_device *dev) { dev->mtu = (16 * 1024) + 20 + 20 + 12; dev->hard_header_len = ETH_HLEN; /* 14 */ dev->addr_len = ETH_ALEN; /* 6 */ dev->tx_queue_len = 0; dev->type = ARPHRD_LOOPBACK; /* 0x0001*/ dev->flags = IFF_LOOPBACK; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; dev->hw_features = NETIF_F_ALL_TSO | NETIF_F_UFO; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | NETIF_F_UFO | NETIF_F_NO_CSUM | NETIF_F_RXCSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | NETIF_F_VLAN_CHALLENGED | NETIF_F_LOOPBACK; dev->ethtool_ops = &loopback_ethtool_ops; dev->header_ops = ð_header_ops; dev->netdev_ops = &loopback_ops; dev->destructor = loopback_dev_free; } /* Setup and register the loopback device. */ static __net_init int loopback_net_init(struct net *net) { struct net_device *dev; int err; err = -ENOMEM; dev = alloc_netdev(0, "lo", loopback_setup); if (!dev) goto out; dev_net_set(dev, net); err = register_netdev(dev); if (err) goto out_free_netdev; net->loopback_dev = dev; return 0; out_free_netdev: free_netdev(dev); out: if (net_eq(net, &init_net)) panic("loopback: Failed to register netdevice: %d\n", err); return err; } /* Registered in net/core/dev.c */ struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, };
自己將兩份源碼對照著看了,暫時還沒找出原因,這裡先上一張錯誤的截圖以及我認為出錯的原因
