歡迎來到Linux教程網
Linux教程網
Linux教程網
Linux教程網
您现在的位置: Linux教程網 >> UnixLinux >  >> Linux管理 >> Linux網絡

Linux網絡協議棧之驅動框架

網卡驅動可以以模塊的方式加載也可以內核初始化的時候加載,我們選定e100系列的網卡進行說明網卡驅動的一般框架。

網卡設備通用數據結構:

struct net_device

{

         /*

          * This is the first field of the "visible" part of this structure

          * (i.e. as seen by users in the "Space.c" file).  It is the name

          * the interface.

          */

          /*網絡設備名*/

         char                    name[IFNAMSIZ];

         /* device name hash chain */

         /*根據網絡設備名以散列表的形式組織到dev_name_head散列表中,這樣就可以通過網絡

         設備名快速地定位到網絡設備*/

         struct hlist_node       name_hlist;

         /*

          *     I/O specific fields

          *     FIXME: Merge these and struct ifmap into one

          */

          /*網絡設備共享內存的起始和終止地址*/

         unsigned long            mem_end;          /* shared mem end    */

         unsigned long            mem_start;        /* shared mem start   */

         /*網絡接口I/O基地址,在探測設備時被初始化ifconfig命令可顯示和修改

         當前命令*/

         unsigned long            base_addr;        /* device I/O address         */

         /*分配給設備的中斷號,一般在初始化設備時被初始化*/

         unsigned int               irq;             /* device IRQ number        */

         /*

          *     Some hardware also needs these fields, but they are not

          *     part of the usual set specified in Space.c.

          */

         /*指定在多端口設備上使用那個端口*/

         unsigned char            if_port;     /* Selectable AUI, TP,..*/

         /*為設備分配的DMA通道*/

         unsigned char            dma;          /* DMA channel                 */

         /*設備狀態*/

         unsigned long            state;

         /*網絡設備組織*/

         struct net_device      *next;

         /*驅動程序的初始化函數*/    

         /* The device initialization function. Called only once. */

         int                       (*init)(struct net_device *dev);

         /* ------- Fields preinitialized in Space.c finish here ------- */

         /* Net device features */

         /*接口支持特性*/

         unsigned long            features;

#define NETIF_F_SG                  1       /* Scatter/gather IO. */

#define NETIF_F_IP_CSUM              2       /* Can checksum only TCP/UDP over IPv4. */

#define NETIF_F_NO_CSUM            4       /* Does not require checksum. F.e. loopack. */

#define NETIF_F_HW_CSUM           8       /* Can checksum all the packets. */

#define NETIF_F_HIGHDMA            32     /* Can DMA to high memory. */

#define NETIF_F_FRAGLIST   64     /* Scatter/gather IO. */

#define NETIF_F_HW_VLAN_TX   128   /* Transmit VLAN hw acceleration */

#define NETIF_F_HW_VLAN_RX   256   /* Receive VLAN hw acceleration */

#define NETIF_F_HW_VLAN_FILTER     512   /* Receive filtering on VLAN */

#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */

#define NETIF_F_GSO               2048 /* Enable software GSO. */

#define NETIF_F_LLTX             4096 /* LockLess TX */

         /* Segmentation offload features */

#define NETIF_F_GSO_SHIFT 16

#define NETIF_F_GSO_MASK         0xffff0000

#define NETIF_F_TSO               (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)

#define NETIF_F_UFO               (SKB_GSO_UDP << NETIF_F_GSO_SHIFT)

#define NETIF_F_GSO_ROBUST      (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)

#define NETIF_F_TSO_ECN              (SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT)

#define NETIF_F_TSO6             (SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT)

         /* List of features with software fallbacks. */

#define NETIF_F_GSO_SOFTWARE        (NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6)

 

#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)

#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM)

         /*用於連接那些已經調度有數據報輸出的網絡設備指針*/

         struct net_device      *next_sched;

         /* Interface index. Unique device identifier         */

         /*網絡設備的索引號*/

         int                       ifindex;

         /*網絡設備的唯一標識,主要用於虛擬隧道設備*/

         int                       iflink;

         /*提供給應用程序獲得接口統計信息的接口*/

         struct net_device_stats* (*get_stats)(struct net_device *dev);

         /* List of functions to handle Wireless Extensions (instead of ioctl).

          * See <net/iw_handler.h> for details. Jean II */

         /*無線網相關*/

         const struct iw_handler_def *   wireless_handlers;

         /* Instance data managed by the core of Wireless Extensions. */

         struct iw_public_data *     wireless_data;

 

         const struct ethtool_ops *ethtool_ops;

         /*

          * This marks the end of the "visible" part of the structure. All

          * fields hereafter are internal to the system, and may change at

          * will (read: may be cleaned up at will).

          */

         unsigned int               flags;         /* interface flags (a la BSD)        */

         /*記錄當前網絡設備IFF_PROMISC和IFF_ALLMULTI的狀態,用來配合flags的設置*/

         unsigned short          gflags;

        unsigned short          priv_flags; /* Like 'flags' but invisible to userspace. */

         unsigned short          padded;    /* How much padding added by alloc_netdev() */

 

         unsigned char            operstate; /* RFC2863 operstate */

         unsigned char            link_mode; /* mapping policy to operstate */

 

         unsigned           mtu; /* interface MTU value               */

         unsigned short          type;         /* interface hardware type          */

         unsigned short          hard_header_len;      /* hardware hdr length      */

 

         struct net_device      *master; /* Pointer to master device of a group,

                                                 * which this device is member of.

                                                 */

         /* Interface address info. */

         /*MAC地址,通常初始化時從硬件中讀出來*/

         unsigned char            perm_addr[MAX_ADDR_LEN]; /* permanent hw address */

         unsigned char            addr_len;  /* hardware address length        */

         unsigned short          dev_id;              /* for shared network cards */

 

         struct dev_mc_list    *mc_list;  /* Multicast mac addresses       */

         int                       mc_count;         /* Number of installed mcasts   */

         /*設置網絡設備混雜模式計數器*/

         int                       promiscuity;

         /*設置網絡設備接收所有組播報的計數器,每次設置或是退出操作,該字段

         都會相應的加或減1,為0時,網絡設備才真正不再接收組播報*/

         int                       allmulti;

         /* Protocol specific pointers */

        

         void                            *atalk_ptr;         /* AppleTalk link      */

         void                    *ip_ptr;    /* IPv4 specific data */ 

         void                    *dn_ptr;        /* DECnet specific data */

         void                    *ip6_ptr;       /* IPv6 specific data */

         void                    *ec_ptr;    /* Econet specific data      */

         void                    *ax25_ptr;         /* AX.25 specific data */

/*

 * Cache line mostly used on receive path (including eth_type_trans())

 */

        /*該結構實例通過該字段連接到softnet_data的poll_list成員上*/

         struct list_head          poll_list ____cacheline_aligned_in_smp;

                                               /* Link to poll list      */

         /*輪詢模式操作接口*/

         int                       (*poll) (struct net_device *dev, int *quota);

         /*讀取數據包的配額,動態變化,由netdev_budget初始化,每次從網絡設備中讀取數據包後,

         會從中減去本次讀取的數據包數,當該配額等於或小於0時,結束當前輪詢等待下層輪詢

         這樣即使某個網絡設備有大量的數據包輸入,也能保證其他網絡設備能及時收到數據包

         在輸入時,遍歷網絡設備輪詢隊列,從選定的網絡設備中讀取數據包,一旦已經讀取的數據

         包的數量操作配額,即停止本次讀取,將該網絡設備移至網絡設備輪詢隊列的隊尾,等待

         下次輪詢*/

         int                       quota;

         /*數據包輸入軟中斷中,單個網絡讀取數據包的配額*/

         int                       weight;

         unsigned long            last_rx;      /* Time of last Rx       */

         /* Interface address info used in eth_type_trans() */

         unsigned char            dev_addr[MAX_ADDR_LEN];          /* hw address, (before bcast

                                                                 because most packets are unicast) */

         unsigned char            broadcast[MAX_ADDR_LEN];         /* hw bcast add         */

/*

 * Cache line mostly used on queue transmit path (qdisc)

 */

         /* device queue lock */

         spinlock_t                   queue_lock ____cacheline_aligned_in_smp;

         /*當前使用的根排隊規則,配置的排隊規則生效時由qdisc_sleeping設置*/

         struct Qdisc                *qdisc;

         /*當前配置的排隊規則,生效時將被設置到qdisc*/

         struct Qdisc                *qdisc_sleeping;

         /*通過鏈表方式記錄配置所在網絡的所有排隊規則*/

         struct list_head          qdisc_list;

         /*可在設備發送隊列中排隊的最大數據包*/

         unsigned long            tx_queue_len;   /* Max frames per queue allowed */

         /* Partially transmitted GSO packet. */

         struct sk_buff            *gso_skb;

         /* ingress path synchronizer */

         spinlock_t                   ingress_lock;

         /*數據包輸入的排隊規則*/

         struct Qdisc                *qdisc_ingress;

/*

 * One part is mostly used on xmit path (device)

 */

         /* hard_start_xmit synchronizer */

         spinlock_t                   _xmit_lock ____cacheline_aligned_in_smp;

         /* cpu id of processor entered to hard_start_xmit or -1,

            if nobody entered there.

          */

         int                       xmit_lock_owner;

         void                    *priv;        /* pointer to private data   */

         /*驅動提供給上一層發送數據包的接口,在發送數據包時必定會調用該接口*/

         int                       (*hard_start_xmit) (struct sk_buff *skb,

                                                            struct net_device *dev);

         /* These may be needed for future network-power-down code. */

         unsigned long            trans_start;       /* Time (in jiffies) of last Tx        */

         /*網絡層確定傳輸已經超時,而調用驅動程序的tx_timeout接口的最短時間*/

         int                       watchdog_timeo; /* used by dev_watchdog() */

         /*用於檢測網絡設備處於正常的工作狀態時,是否存在由於關閉隊列功能

         而導致發送超時的情況,一旦發生以上狀況,就調用網絡設備驅動的tx_timeout

         接口處理*/

         struct timer_list          watchdog_timer;

/*

 * refcnt is a very hot point, so align it on SMP

 */

         /* Number of references to this device */

         atomic_t             refcnt ____cacheline_aligned_in_smp;

         /* delayed register/unregister */

         /*用來連接net_todo_list鏈表,包含已經注銷即將結束的網絡設備*/

         struct list_head          todo_list;

         /* device index hash chain */

         /*根據網絡設備的索引,以散列表的形式組織到dev_index_hlist中*/

         struct hlist_node       index_hlist;

         /* register/unregister state machine */

         enum { NETREG_UNINITIALIZED=0,

                NETREG_REGISTERED,        /* completed register_netdevice */

                NETREG_UNREGISTERING,          /* called unregister_netdevice */

                NETREG_UNREGISTERED,  /* completed unregister todo */

                NETREG_RELEASED,            /* called free_netdev */

         } reg_state;

         /* Called after device is detached from network. */

         void                    (*uninit)(struct net_device *dev);

         /* Called after last user reference disappears. */

         void                    (*destructor)(struct net_device *dev);

         /* Pointers to interface service routines.    */

         /*啟用設備函數指針,完成那個注冊所需的系統資源,打開硬件極其所有

         設備*/

         int                       (*open)(struct net_device *dev);

         int                       (*stop)(struct net_device *dev);

#define HAVE_NETDEV_POLL

/*根據先前檢測到的源和目標硬件地址創建硬件首部*/

         int                       (*hard_header) (struct sk_buff *skb,

                                                        struct net_device *dev,

                                                        unsigned short type,

                                                        void *daddr,

                                                        void *saddr,

                                                        unsigned len);

/*用來在傳輸包之前,ARP解析完成之後,重建硬件首部*/

         int                       (*rebuild_header)(struct sk_buff *skb);

#define HAVE_MULTICAST                     

         /*將組播地址列表更新到網絡設備中*/

         void                    (*set_multicast_list)(struct net_device *dev);

#define HAVE_SET_MAC_ADDR                     

         /*修改硬件地址接口,需要網絡設備支持該功能*/

         int                       (*set_mac_address)(struct net_device *dev,

                                                           void *addr);

#define HAVE_PRIVATE_IOCTL

         int                       (*do_ioctl)(struct net_device *dev,

                                                   struct ifreq *ifr, int cmd);

#define HAVE_SET_CONFIG

         int                       (*set_config)(struct net_device *dev,

                                                     struct ifmap *map);

#define HAVE_HEADER_CACHE

         /*根據ARP查詢的結果填充hh_cache結構*/

         int                       (*hard_header_cache)(struct neighbour *neigh,

                                                             struct hh_cache *hh);

         void                    (*header_cache_update)(struct hh_cache *hh,

                                                               struct net_device *dev,

                                                               unsigned char *  haddr);

#define HAVE_CHANGE_MTU

         int                       (*change_mtu)(struct net_device *dev, int new_mtu);

 

#define HAVE_TX_TIMEOUT

         void                    (*tx_timeout) (struct net_device *dev);

 

         void                    (*vlan_rx_register)(struct net_device *dev,

                                                            struct vlan_group *grp);

         void                    (*vlan_rx_add_vid)(struct net_device *dev,

                                                           unsigned short vid);

         void                    (*vlan_rx_kill_vid)(struct net_device *dev,

                                                            unsigned short vid);

 

         int                       (*hard_header_parse)(struct sk_buff *skb,

                                                             unsigned char *haddr);

         /*設置鄰居子系統相關的參數*/

         int                       (*neigh_setup)(struct net_device *dev, struct neigh_parms *);

#ifdef CONFIG_NETPOLL

         /*網絡設備netpoll信息塊*/

         struct netpoll_info    *npinfo;

#endif

#ifdef CONFIG_NET_POLL_CONTROLLER

         /*該函數在禁止中斷的情況下,要求驅動程序以輪詢模式在接口上查詢事件*/

         void                    (*poll_controller)(struct net_device *dev);

#endif

         /* bridge stuff */

         struct net_bridge_port      *br_port;

 

         /* class/net/name entry */

         struct class_device   class_dev;

         /* space for optional statistics and wireless sysfs groups */

         struct attribute_group  *sysfs_groups[3];

};

網卡驅動的注冊是在e100_init_modle中,

static int __init e100_init_module(void)

{

         if(((1 << debug) - 1) & NETIF_MSG_DRV) {

                   printk(KERN_INFO PFX "%s, %s\n", DRV_DESCRIPTION, DRV_VERSION);

                   printk(KERN_INFO PFX "%s\n", DRV_COPYRIGHT);

         }

         return pci_register_driver(&e100_driver);

}

可見,網卡驅動也就是和一般的PCI驅動編寫一樣。

static struct pci_driver e100_driver = {

         .name =         DRV_NAME,

         .id_table =     e100_id_table,

         .probe =        e100_probe,

         .remove =       __devexit_p(e100_remove),

#ifdef CONFIG_PM

         /* Power Management hooks */

         .suspend =      e100_suspend,

         .resume =       e100_resume,

#endif

         .shutdown =     e100_shutdown,

         .err_handler = &e100_err_handler,

};

       如果網絡設備驅動程序被編譯進內核,則將在啟動時被初始化,在運行時作為模塊被加載。無論初始化是否被發生,由驅動程序控制的網絡設備都會被注冊。這種情形適用於所有的總線類型,無論是總線體系結構還是模塊初始��代碼調用注冊函數,結果都是一樣的。PCI設備驅動程序加載以至執行pci_drive->probe()函數。我們看看e100網卡的驅動注冊過程:

static int __devinit e100_probe(struct pci_dev *pdev,

         const struct pci_device_id *ent)

{

         struct net_device *netdev;

         struct nic *nic;

         int err;

         /*分配設備數據結構*/

         if(!(netdev = alloc_etherdev(sizeof(struct nic)))) {

                   if(((1 << debug) - 1) & NETIF_MSG_PROBE)

                            printk(KERN_ERR PFX "Etherdev alloc failed, abort.\n");

                   return -ENOMEM;

         }

         /*初始化設備*/

         netdev->open = e100_open;

         netdev->stop = e100_close;

         /*e100網絡設備的hard_start_xmit接口實現,最終將數據包輸出到硬件*/

         netdev->hard_start_xmit = e100_xmit_frame;

         netdev->get_stats = e100_get_stats;

         netdev->set_multicast_list = e100_set_multicast_list;

         netdev->set_mac_address = e100_set_mac_address;

         netdev->change_mtu = e100_change_mtu;

         netdev->do_ioctl = e100_do_ioctl;

         SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops);

         netdev->tx_timeout = e100_tx_timeout;

         netdev->watchdog_timeo = E100_WATCHDOG_PERIOD;

         netdev->poll = e100_poll;

         netdev->weight = E100_NAPI_WEIGHT;

#ifdef CONFIG_NET_POLL_CONTROLLER

/*為了實現netpoll接收報文功能,需要實現下面的函數調用,該函數

用來模擬網絡設備發生中斷,進行中斷處理*/

         netdev->poll_controller = e100_netpoll;

#endif

         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);

         /*在alloc_etherdev中設置的私有屬性,即結構nic,在這裡提出來*/

         nic = netdev_priv(netdev);

         /*初始化該nic*/

         nic->netdev = netdev;

         nic->pdev = pdev;

         nic->msg_enable = (1 << debug) - 1;

         /*設置PCI設備私有數據為網絡設備結構實例*/

         pci_set_drvdata(pdev, netdev);

        

         /* Initialize device before it's used by a driver. Ask low-level code

 *  to enable I/O and memory. Wake up the device if it was suspended.

 *  Beware, this function can fail.*/

         if((err = pci_enable_device(pdev))) {

                   DPRINTK(PROBE, ERR, "Cannot enable PCI device, aborting.\n");

                   goto err_out_free_dev;

         }

         if(!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {

                   DPRINTK(PROBE, ERR, "Cannot find proper PCI device "

                            "base address, aborting.\n");

                   err = -ENODEV;

                   goto err_out_disable_pdev;

         }

         /*保留資源,包括I/O和內存*/

         if((err = pci_request_regions(pdev, DRV_NAME))) {

                   DPRINTK(PROBE, ERR, "Cannot obtain PCI resources, aborting.\n");

                   goto err_out_disable_pdev;

         }

         /*DMA相關,探測設備的DMA能力,如果設備支持DMA,

         返回0*/

         if((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK))) {

                   DPRINTK(PROBE, ERR, "No usable DMA configuration, aborting.\n");

                   goto err_out_free_res;

         }

         SET_MODULE_OWNER(netdev);

         SET_NETDEV_DEV(netdev, &pdev->dev);

         /*控制狀態寄存器映射內存資源*/

         nic->csr = ioremap(pci_resource_start(pdev, 0), sizeof(struct csr));

         if(!nic->csr) {

                   DPRINTK(PROBE, ERR, "Cannot map device registers, aborting.\n");

                   err = -ENOMEM;

                   goto err_out_free_res;

         }

         if(ent->driver_data)

                   nic->flags |= ich;

         else

                   nic->flags &= ~ich;

         /*初始化nic相關字段*/

         e100_get_defaults(nic);

         /* locks must be initialized before calling hw_reset */

         spin_lock_init(&nic->cb_lock);

         spin_lock_init(&nic->cmd_lock);

         spin_lock_init(&nic->mdio_lock);

         /* Reset the device before pci_set_master() in case device is in some

          * funky state and has an interrupt pending - hint: we don't have the

          * interrupt handler registered yet. */

          /*設備復位,寫相關寄存器方式實現*/

         e100_hw_reset(nic);

         /*啟用設備*/

         pci_set_master(pdev);

         /*初始化兩個軟件時鐘*/

         init_timer(&nic->watchdog);

         nic->watchdog.function = e100_watchdog;

         nic->watchdog.data = (unsigned long)nic;

         init_timer(&nic->blink_timer);

         nic->blink_timer.function = e100_blink_led;

         nic->blink_timer.data = (unsigned long)nic;

         /*初始化工作隊列*/

         INIT_WORK(&nic->tx_timeout_task, e100_tx_timeout_task);

         /*從DMA區分配*/

         if((err = e100_alloc(nic))) {

                   DPRINTK(PROBE, ERR, "Cannot alloc driver memory, aborting.\n");

                   goto err_out_iounmap;

         }

         /*讀取網卡的EEPROM。其中存放這網卡的MAC地址

         */

         if((err = e100_eeprom_load(nic)))

                   goto err_out_free;

         /*初始化nic的物理信息*/

         e100_phy_init(nic);

         memcpy(netdev->dev_addr, nic->eeprom, ETH_ALEN);

         memcpy(netdev->perm_addr, nic->eeprom, ETH_ALEN);

         /*驗證網卡的MAC地址是否格式正確*/

         if(!is_valid_ether_addr(netdev->perm_addr)) {

                   DPRINTK(PROBE, ERR, "Invalid MAC address from "

                            "EEPROM, aborting.\n");

                   err = -EAGAIN;

                   goto err_out_free;

         }

         /* Wol magic packet can be enabled from eeprom */

         if((nic->mac >= mac_82558_D101_A4) &&

            (nic->eeprom[eeprom_id] & eeprom_id_wol))

                   nic->flags |= wol_magic;

         /* ack any pending wake events, disable PME */

         /*,這個函數的第二個參數表示一種電源狀態

         PME#就是Power Management Event Signal,即電源管理事件信號.)PME#信號是PCI Power Spec中出鏡率最高的一個名詞.如果一個設備希望改變它的電源狀態,它就可以發送一個PME#信號.而設備是否允許發送信號也是有開關的,並且每種狀態都有一個開關

         第三個參數是表示開還是關.即傳遞1進去就是enable,傳遞0進去就是disable*/

         err = pci_enable_wake(pdev, 0, 0);

         if (err)

                   DPRINTK(PROBE, ERR, "Error clearing wake event\n");

         /*網絡設備的名稱前加上eth*/

         strcpy(netdev->name, "eth%d");

         /*注冊網絡設備*/

         if((err = register_netdev(netdev))) {

                   DPRINTK(PROBE, ERR, "Cannot register net device, aborting.\n");

                   goto err_out_free;

         }

         DPRINTK(PROBE, INFO, "addr 0x%llx, irq %d, "

                   "MAC addr %02X:%02X:%02X:%02X:%02X:%02X\n",

                   (unsigned long long)pci_resource_start(pdev, 0), pdev->irq,

                   netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],

                   netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]);

         return 0;

err_out_free:

         e100_free(nic);

err_out_iounmap:

         iounmap(nic->csr);

err_out_free_res:

         pci_release_regions(pdev);

err_out_disable_pdev:

         pci_disable_device(pdev);

err_out_free_dev:

         pci_set_drvdata(pdev, NULL);

         free_netdev(netdev);

         return err;

}

其輔助函數:

分配網絡設備結構

/*傳入的參數為nic結構的大小*/

struct net_device *alloc_etherdev(int sizeof_priv)

{

         return alloc_netdev(sizeof_priv, "eth%d", ether_setup);

}

struct net_device *alloc_netdev(int sizeof_priv, const char *name,

                   void (*setup)(struct net_device *))

{

         void *p;

         struct net_device *dev;

         int alloc_size;

         BUG_ON(strlen(name) >= sizeof(dev->name));

         /* ensure 32-byte alignment of both the device and private area */

         /*計算分配的大小為設備結構大小加上nic結構大小*/

         alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;

         alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;

         /*分配空間*/

         p = kzalloc(alloc_size, GFP_KERNEL);

         if (!p) {

                   printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");

                   return NULL;

         }

         dev = (struct net_device *)

                   (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);

         /*計算padd大小為結構大小減去對其的數據大小*/

         dev->padded = (char *)dev - (char *)p;

         if (sizeof_priv)

                   /*私有數據為nic結構的起始地址*/

                   dev->priv = netdev_priv(dev);

         /*調用參數中的函數指針,初始化設備結構*/

         setup(dev);

         strcpy(dev->name, name);

         return dev;

}

/*分配設備結構時調用,用於初始化該設備結構*/

void ether_setup(struct net_device *dev)

{

         dev->change_mtu              = eth_change_mtu;

         dev->hard_header     = eth_header;

         dev->rebuild_header         = eth_rebuild_header;

         dev->set_mac_address    = eth_mac_addr;

         dev->hard_header_cache = eth_header_cache;

         dev->header_cache_update= eth_header_cache_update;

         dev->hard_header_parse  = eth_header_parse;

         dev->type                   = ARPHRD_ETHER;

         dev->hard_header_len     = ETH_HLEN;

         dev->mtu           = ETH_DATA_LEN;

         dev->addr_len           = ETH_ALEN;

         dev->tx_queue_len   = 1000;      /* Ethernet wants good queues */    

         dev->flags                  = IFF_BROADCAST|IFF_MULTICAST;

        

         memset(dev->broadcast, 0xFF, ETH_ALEN);

}

注冊網絡設備的實際操作由register_netdev(netdev)調用register_netdevice()完成

int register_netdevice(struct net_device *dev)

{

         struct hlist_head *head;

         struct hlist_node *p;

         int ret;

         BUG_ON(dev_boot_phase);

         ASSERT_RTNL();

         /*2.6內核支持內核搶占,該函數檢查是否需要從新調度

         如果是,則進行調度,無論此時進行執行在內核空間還是

         用戶空間*/

         might_sleep();

         /*初始化設備的各個字段*/

         /* When net_device's are persistent, this will be fatal. */

         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);

         spin_lock_init(&dev->queue_lock);

         spin_lock_init(&dev->_xmit_lock);

         dev->xmit_lock_owner = -1;

#ifdef CONFIG_NET_CLS_ACT

         spin_lock_init(&dev->ingress_lock);

#endif

         dev->iflink = -1;

         /* Init, if this function is available */

         /*如果有init函數,調用該函數進行初始化*/

         if (dev->init) {

                   ret = dev->init(dev);

                   if (ret) {

                            if (ret > 0)

                                     ret = -EIO;

                            goto out;

                   }

         }

        /*檢測待注冊的網絡設備名是否有效*/

         if (!dev_valid_name(dev->name)) {

                   ret = -EINVAL;

                   goto out;

         }

         /*為設備分配一個唯一索引號和一個用於虛擬隧道設備

         的唯一標識。*/       

         dev->ifindex = dev_new_index();

         if (dev->iflink == -1)

                   dev->iflink = dev->ifindex;

         /* Check for existence of name */

         /*將網絡設備添加到dev_name_head散列表中,並檢測是否

         存在同名的網絡設備*/

         head = dev_name_hash(dev->name);

         hlist_for_each(p, head) {

                   struct net_device *d

                            = hlist_entry(p, struct net_device, name_hlist);

                   if (!strncmp(d->name, dev->name, IFNAMSIZ)) {

                            ret = -EEXIST;

                           goto out;

                   }

        }

         /* Fix illegal SG+CSUM combinations. */

         /*只有在網絡設備支持校驗和計算的情況下,網絡設備才能支持SG類型的聚合分散I/O

         因為SG類型的聚合分散I/O特性沒有傳輸層硬件檢驗和支持是無用的*/

         if ((dev->features & NETIF_F_SG) &&

             !(dev->features & NETIF_F_ALL_CSUM)) {

                   printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",

                          dev->name);

                   dev->features &= ~NETIF_F_SG;

         }

         /* TSO requires that SG is present as well. */

         /*TSO需要SG類型的聚合分散性I/O的支持,因此在後者不被支持時也將被禁用*/

         if ((dev->features & NETIF_F_TSO) &&

             !(dev->features & NETIF_F_SG)) {

                   printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",

                          dev->name);

                   dev->features &= ~NETIF_F_TSO;

         }

         /*UFO需要NETIF_F_HW_CSUM和SG類型的聚合分散I/O的支持,因此在後者不被支持的情況下

         也將被禁用*/

         if (dev->features & NETIF_F_UFO) {

                   if (!(dev->features & NETIF_F_HW_CSUM)) {

                            printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "

                                               "NETIF_F_HW_CSUM feature.\n",

                                                                 dev->name);

                            dev->features &= ~NETIF_F_UFO;

                   }

                   if (!(dev->features & NETIF_F_SG)) {

                            printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "

                                               "NETIF_F_SG feature.\n",

                                               dev->name);

                            dev->features &= ~NETIF_F_UFO;

                   }

         }

         /*

          *     nil rebuild_header routine,

          *     that should be never called and used as just bug trap.

          */

         /*初始化網絡設備用於重建硬件首部的rebuild_header接口

         */

         if (!dev->rebuild_header)

                   dev->rebuild_header = default_rebuild_header;

         /*將網絡設備的注冊信息注冊到sysfs文件系統中*/

         ret = netdev_register_sysfs(dev);

         if (ret)

                   goto out;

         /*設置網絡設備的狀態,表示注冊已經完成*/

         dev->reg_state = NETREG_REGISTERED;

         /*

          *     Default initial state at registry is that the

          *     device is present.

          */

         /*設置相應位,表示設備對系統是可用的*/

         set_bit(__LINK_STATE_PRESENT, &dev->state);

         /*下面為初始化網絡設備排隊規則,並注冊到網絡設備的

         鏈表和相關散列表中*/

         dev->next = NULL;

         dev_init_scheduler(dev);

         write_lock_bh(&dev_base_lock);

         *dev_tail = dev;

         dev_tail = &dev->next;

         hlist_add_head(&dev->name_hlist, head);

         hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));

         dev_hold(dev);

         write_unlock_bh(&dev_base_lock);

         /* Notify protocols, that a new device appeared. */

         /*通知所有對設備注冊感興趣的其他內核模塊*/

         raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);

         ret = 0;

out:

         return ret;

}

PCI驅動中的其他函數意義同其名,我們看看掛起操作e100_suspend

static int e100_suspend(struct pci_dev *pdev, pm_message_t state)

{

         struct net_device *netdev = pci_get_drvdata(pdev);

         struct nic *nic = netdev_priv(netdev);

         /*如果網絡設備處於激活狀態,則等待網絡設備完成輪詢接收數據包*/

         if (netif_running(netdev))

                   netif_poll_disable(nic->netdev);

         /*刪除監視網絡設備工作狀態的定時器*/

         del_timer_sync(&nic->watchdog);

         /*使設備驅動處於不可傳遞數據狀態,並關閉網絡設備的隊列功能*/

         netif_carrier_off(nic->netdev);

         netif_device_detach(netdev);

         pci_save_state(pdev);

         /*

         這個函數的第二個參數表示一種電源狀態,咱們看到傳遞的一次是PCI_D3hot,一次是PCI_D3cold,

         這就是使得設備可以從這兩種狀態中產生PME#信號.(PME#就是Power Management Event Signal,即電源管理事件信號

         .)PME#信號是PCI Power Spec中出鏡率最高的一個名詞.如果一個設備希望改變它的電源狀態,它就可以發送

         一個PME#信號.而設備是否允許發送信號也是有開關的,並且每種狀態都有一個開關.

         所以這裡的做法就是為D3hot和D3cold打開開關.而這裡pci_enable_wake的第三個參數是表示開還是關.

         即傳遞1進去就是enable,傳遞0進去就是disable.

         */

         if ((nic->flags & wol_magic) | e100_asf(nic)) {

                   pci_enable_wake(pdev, PCI_D3hot, 1);

                   pci_enable_wake(pdev, PCI_D3cold, 1);

         } else {

                   pci_enable_wake(pdev, PCI_D3hot, 0);

                   pci_enable_wake(pdev, PCI_D3cold, 0);

         }

         /*禁用設備*/

         pci_disable_device(pdev);

         /*釋放中斷*/

         free_irq(pdev->irq, netdev);

         /*設置PCI的電源狀態*/

         pci_set_power_state(pdev, PCI_D3hot);

         return 0;

}

       這樣,網絡設備的驅動框架就搭建起來了,驅動程序在模塊初始化函數中注冊網卡的PCI驅動,在probe函數中注冊網卡設備驅動,初始化相關數據結構和函數指針。對於特定的網卡需要特定的數據結構來保存信息,硬件相關的操作需要按照對應網卡的約定來實現。對於e100系列網卡,數據結構nic保存了該網卡的所有信息。另外net_device中提供的函數指針在e100_probe中做了初始化,如e100_open,依據他們的名字我們可以猜到他們的意思和用途(e100_open做網卡的打開、啟動、中斷的注冊等操作)。這裡就不再深入了,如果對他們的實現細節感興趣,需要參看其網卡的硬件手冊。

後面我們在分析上層代碼中會遇到一些操作特定網卡的函數指針,在這裡就能找到其實現。

Copyright © Linux教程網 All Rights Reserved