網卡驅動架構分析:
1. Linux網絡子系統
2. 重要數據結構
總結一下三個重要的數據結構:
2.1. net_device
2.2. net_device_ops
2.3. sk_buff
3. 網卡驅動架構分析
CS8900.c //早期2410使用的網卡芯片
3.1. 網卡初始化
首先找到驅動程序的入口:
早期的驅動入口並不是module_init()函數,而是init_module,所以找到這個函數
int __init init_module(void) { struct net_device *dev = alloc_etherdev(sizeof(struct net_local)); struct net_local *lp; int ret = 0; #if DEBUGGING net_debug = debug; #else debug = 0; #endif if (!dev) return -ENOMEM; dev->irq = irq; dev->base_addr = io; lp = netdev_priv(dev); #if ALLOW_DMA if (use_dma) { lp->use_dma = use_dma; lp->dma = dma; lp->dmasize = dmasize; } #endif spin_lock_init(&lp->lock); /* boy, they'd better get these right */ if (!strcmp(media, "rj45")) lp->adapter_cnf = A_CNF_MEDIA_10B_T | A_CNF_10B_T; else if (!strcmp(media, "aui")) lp->adapter_cnf = A_CNF_MEDIA_AUI | A_CNF_AUI; else if (!strcmp(media, "bnc")) lp->adapter_cnf = A_CNF_MEDIA_10B_2 | A_CNF_10B_2; else lp->adapter_cnf = A_CNF_MEDIA_10B_T | A_CNF_10B_T; if (duplex==-1) lp->auto_neg_cnf = AUTO_NEG_ENABLE; if (io == 0) { printk(KERN_ERR "cs89x0.c: Module autoprobing not allowed.\n"); printk(KERN_ERR "cs89x0.c: Append io=0xNNN\n"); ret = -EPERM; goto out; } else if (io <= 0x1ff) { ret = -ENXIO; goto out; }
第一步:分配net_device結構,
第二步:初始化net_device結構,
dev->irq = irq;//分配中斷號 dev->base_addr = io;//設備基地址 lp = netdev_priv(dev);第三步:
ret = cs89x0_probe1(dev, io, 1);\\這一步其實也是初始化硬件的!還有一部分是對device結構進行一些初始化這個函數比較長就不貼代碼了,其中一行比較重要:
dev->netdev_ops = &net_ops; \\這個是對netdev_ops成員進行初始化
最後一步注冊網卡驅動!上圖中第二個紅色箭頭所指向的地方!
總結一下上圖:
3.2. 網卡數據的發送
這個結合前面的經驗,找到網卡的函數操作集結構:
可以看到這個成員函數的名字叫做:net_send_packet
static netdev_tx_t net_send_packet(struct sk_buff *skb,struct net_device *dev) { struct net_local *lp = netdev_priv(dev); unsigned long flags; if (net_debug > 3) { printk("%s: sent %d byte packet of type %x\n", dev->name, skb->len, (skb->data[ETH_ALEN+ETH_ALEN] << 8) | skb->data[ETH_ALEN+ETH_ALEN+1]); } /* keep the upload from being interrupted, since we ask the chip to start transmitting before the whole packet has been completely uploaded. */ spin_lock_irqsave(&lp->lock, flags); netif_stop_queue(dev);//1. 網卡驅動在向上層發送數據的時候暫時停止接收上層發來的數據 /* initiate a transmit sequence */ writeword(dev->base_addr, TX_CMD_PORT, lp->send_cmd);//2. 將skb中的數據寫入寄存器 writeword(dev->base_addr, TX_LEN_PORT, skb->len); /* Test to see if the chip has allocated memory for the packet */ if ((readreg(dev, PP_BusST) & READY_FOR_TX_NOW) == 0) { /* * Gasp! It hasn't. But that shouldn't happen since * we're waiting for TxOk, so return 1 and requeue this packet. */ spin_unlock_irqrestore(&lp->lock, flags); if (net_debug) printk("cs89x0: Tx buffer not free!\n"); return NETDEV_TX_BUSY; } /* Write the contents of the packet */ writewords(dev->base_addr, TX_FRAME_PORT,skb->data,(skb->len+1) >>1); spin_unlock_irqrestore(&lp->lock, flags); dev->stats.tx_bytes += skb->len; dev_kfree_skb (skb);//3. 釋放skb結構 //發送數據完後,網卡會產生一個中斷 return NETDEV_TX_OK; }產生一個中斷這個可以查查request_irq函數,在這個函數被調用的地方可以看到這樣的一行代碼:
ret = request_irq(dev->irq, net_interrupt, 0, dev->name, dev);
可以看到這裡調用了net_initerupt函數,網卡發送和接收中斷!
一個是發送中斷,一個是接收中斷,
netif_wake_queue(dev);/* Inform upper layers. */ // 這行代碼表示在發送中斷處理過程中,通知上層協議,可以再次向網卡傳輸數據。
3.3. 網卡數據的接收
網卡數據的接收入口是在中斷中完成的,這個是從中斷函數中可以看到net_interrupt
接收中斷處理函數net_rx(dev)
/* We have a good packet(s), get it/them out of the buffers. */ static void net_rx(struct net_device *dev) { struct sk_buff *skb; int status, length; int ioaddr = dev->base_addr; status = readword(ioaddr, RX_FRAME_PORT);//讀取寄存器,網卡接收狀態 length = readword(ioaddr, RX_FRAME_PORT);//網卡接收字節長度 if ((status & RX_OK) == 0) { count_rx_errors(status, dev); return; } /* Malloc up new buffer. */ skb = dev_alloc_skb(length + 2);//分配skb結構 +2字節空間是為頭預留的 if (skb == NULL) { #if 0 /* Again, this seems a cruel thing to do */ printk(KERN_WARNING "%s: Memory squeeze, dropping packet.\n", dev->name); #endif dev->stats.rx_dropped++; return; } skb_reserve(skb, 2); /* longword align L3 header */ readwords(ioaddr, RX_FRAME_PORT, skb_put(skb, length), length >> 1);//將收到的數據填充入skb if (length & 1) skb->data[length-1] = readword(ioaddr, RX_FRAME_PORT); if (net_debug > 3) { printk( "%s: received %d byte packet of type %x\n", dev->name, length, (skb->data[ETH_ALEN+ETH_ALEN] << 8) | skb->data[ETH_ALEN+ETH_ALEN+1]); } skb->protocol=eth_type_trans(skb,dev); netif_rx(skb);//將skb提交到協議棧 dev->stats.rx_packets++; dev->stats.rx_bytes += length; }
回環網卡驅動設計:
使用ifocnfig,可以看到除了eth0還有一個l0, eth0代表的是一個物理網卡,l0代表的就是回環網卡,從上面的打印信息可以看到l0的IP地址是127.0.0.1,可以看到當ping 127.0.0.x的時候能ping通,其實l0就是網卡的tx和rx在軟件層的短接!所以才叫做回環網卡!
其實內核代碼中也可以找到回環網卡的驅動!Lookback.c
這個文件中的代碼部分其實不是內核模塊,而是由其它部分的調用的!
刪掉內核代碼中的原有的loopback.c,結合上面的的分析和原有源碼的分析重寫編寫loopback.c,
#include#include #include #include #include #include #include #include #include /* For the statistics structure. */ unsigned long bytes = 0; unsigned long packets = 0;//skb包 static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)//數據發送 { skb->protocol = eth_type_trans(skb,dev);//表明skb包的協議 以太網協議 bytes += skb->len;//發送的數據量 packets++; //發送的數據包也要加一 netif_rx(skb);//將skb向回送, 回環網卡驅動就是這實現的,這是很關鍵的一步 return 0; } static struct net_device_stats *loopback_get_stats(struct net_device *dev)//獲取網卡狀態 { struct net_device_stats *stats = &dev->stats;//首先把state這個成員取出來 stats->rx_packets = packets;//表示網卡收到了多少個包 stats->tx_packets = packets;//表示網卡發送了多少個包 stats->rx_bytes = bytes;//表示網卡接收到了多少個字節 stats->tx_bytes = bytes; return stats;//返回狀態 } static const struct net_device_ops loopback_ops = {//定義一個net_device_ops 結構 .ndo_start_xmit= loopback_xmit,//發送指針 .ndo_get_stats = loopback_get_stats,//獲取網卡狀態的函數 }; static void loopback_setup(struct net_device *dev)//初始化設置操作 { dev->mtu = (16 * 1024) + 20 + 20 + 12;//網卡最大接收包的尺寸:16K + TCP頭 + IP頭 + 以太網頭 dev->flags = IFF_LOOPBACK;//回環網卡專有標志 這是一個宏內核代碼可查 dev->header_ops = ð_header_ops;//這個是網絡包的函數操作集,內核可以看這個成員的數據結構 dev->netdev_ops = &loopback_ops;//網卡所支持操作的集合 } static __net_init int loopback_net_init(struct net *net) { struct net_device *dev; int err; err = -ENOMEM; dev = alloc_netdev(0, "lo", loopback_setup);//分配一個net_device結構,loopback為一個初始化函數 if (!dev) goto out; err = register_netdev(dev);//注冊網卡驅動程序 if (err) goto out_free_netdev; net->loopback_dev = dev; return 0; out_free_netdev: free_netdev(dev); out: if (net == &init_net) panic("loopback: Failed to register netdevice: %d\n", err); return err; } static __net_exit void loopback_net_exit(struct net *net) { struct net_device *dev = net->loopback_dev; unregister_netdev(dev);//注銷網卡驅動程序 } /* Registered in net/core/dev.c */ struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, .exit = loopback_net_exit, };
上面的回環網卡驅動有點問題,ping不同!
這是能ping通的內核自帶的源碼:
#include#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* For the statistics structure. */ #include /* For ARPHRD_ETHER */ #include #include #include #include #include struct pcpu_lstats { u64 packets; u64 bytes; struct u64_stats_sync syncp; }; /* * The higher levels take care of making this non-reentrant (it's * called with bh's disabled). */ static netdev_tx_t loopback_xmit(struct sk_buff *skb, struct net_device *dev) { struct pcpu_lstats *lb_stats; int len; skb_orphan(skb); skb->protocol = eth_type_trans(skb, dev); /* it's OK to use per_cpu_ptr() because BHs are off */ lb_stats = this_cpu_ptr(dev->lstats); len = skb->len; if (likely(netif_rx(skb) == NET_RX_SUCCESS)) { u64_stats_update_begin(&lb_stats->syncp); lb_stats->bytes += len; lb_stats->packets++; u64_stats_update_end(&lb_stats->syncp); } return NETDEV_TX_OK; } static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { u64 bytes = 0; u64 packets = 0; int i; for_each_possible_cpu(i) { const struct pcpu_lstats *lb_stats; u64 tbytes, tpackets; unsigned int start; lb_stats = per_cpu_ptr(dev->lstats, i); do { start = u64_stats_fetch_begin(&lb_stats->syncp); tbytes = lb_stats->bytes; tpackets = lb_stats->packets; } while (u64_stats_fetch_retry(&lb_stats->syncp, start)); bytes += tbytes; packets += tpackets; } stats->rx_packets = packets; stats->tx_packets = packets; stats->rx_bytes = bytes; stats->tx_bytes = bytes; return stats; } static u32 always_on(struct net_device *dev) { return 1; } static const struct ethtool_ops loopback_ethtool_ops = { .get_link = always_on, }; static int loopback_dev_init(struct net_device *dev) { dev->lstats = alloc_percpu(struct pcpu_lstats); if (!dev->lstats) return -ENOMEM; return 0; } static void loopback_dev_free(struct net_device *dev) { free_percpu(dev->lstats); free_netdev(dev); } static const struct net_device_ops loopback_ops = { .ndo_init = loopback_dev_init, .ndo_start_xmit= loopback_xmit, .ndo_get_stats64 = loopback_get_stats64, }; /* * The loopback device is special. There is only one instance * per network namespace. */ static void loopback_setup(struct net_device *dev) { dev->mtu = (16 * 1024) + 20 + 20 + 12; dev->hard_header_len = ETH_HLEN; /* 14 */ dev->addr_len = ETH_ALEN; /* 6 */ dev->tx_queue_len = 0; dev->type = ARPHRD_LOOPBACK; /* 0x0001*/ dev->flags = IFF_LOOPBACK; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; dev->hw_features = NETIF_F_ALL_TSO | NETIF_F_UFO; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | NETIF_F_UFO | NETIF_F_NO_CSUM | NETIF_F_RXCSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | NETIF_F_VLAN_CHALLENGED | NETIF_F_LOOPBACK; dev->ethtool_ops = &loopback_ethtool_ops; dev->header_ops = ð_header_ops; dev->netdev_ops = &loopback_ops; dev->destructor = loopback_dev_free; } /* Setup and register the loopback device. */ static __net_init int loopback_net_init(struct net *net) { struct net_device *dev; int err; err = -ENOMEM; dev = alloc_netdev(0, "lo", loopback_setup); if (!dev) goto out; dev_net_set(dev, net); err = register_netdev(dev); if (err) goto out_free_netdev; net->loopback_dev = dev; return 0; out_free_netdev: free_netdev(dev); out: if (net_eq(net, &init_net)) panic("loopback: Failed to register netdevice: %d\n", err); return err; } /* Registered in net/core/dev.c */ struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, };
自己將兩份源碼對照著看了,暫時還沒找出原因,這裡先上一張錯誤的截圖以及我認為出錯的原因
在使用ifconfig命令的時候,發現RX, TX, 居然都有packets網絡包數據傳輸,當ping 127.0.0.x的時候會一直阻塞在哪裡,說明問題應該在初始化參數設置的部分!這裡有數據包發送但是沒有接收到數據包!說明數據接收部分,也就是回環發送部分有問題!這裡暫時先擱一下,後邊在殺個回馬槍來深入研究一下!