
网卡收到一个数据包的时候,是如何传给应用层的
发布日期:2021-05-09 16:03:17
浏览次数:22
分类:精选文章
本文共 20028 字,大约阅读时间需要 66 分钟。
这里以3c501网卡为例,每个设备对应一个device的结构体,下面代码即对3c501网卡的数据结构进行初始化,包括发送函数,注册中断回调,mac头长度等。
/* The actual probe. */ static intel1_probe1(struct device *dev, int ioaddr){ #ifndef MODULE char *mname; /* Vendor name */ unsigned char station_addr[6]; int autoirq = 0; int i; /* Read the station address PROM data from the special port. */ for (i = 0; i < 6; i++) { outw(i, ioaddr + EL1_DATAPTR); station_addr[i] = inb(ioaddr + EL1_SAPROM); } /* Check the first three octets of the S.A. for 3Com's prefix, or for the Sager NP943 prefix. */ if (station_addr[0] == 0x02 && station_addr[1] == 0x60 && station_addr[2] == 0x8c) { mname = "3c501"; } else if (station_addr[0] == 0x00 && station_addr[1] == 0x80 && station_addr[2] == 0xC8) { mname = "NP943"; } else return ENODEV; /* Grab the region so we can find the another board if autoIRQ fails. */ request_region(ioaddr, EL1_IO_EXTENT,"3c501"); /* We auto-IRQ by shutting off the interrupt line and letting it float high. */ if (dev->irq < 2) { autoirq_setup(2); inb(RX_STATUS); /* Clear pending interrupts. */ inb(TX_STATUS); outb(AX_LOOP + 1, AX_CMD); outb(0x00, AX_CMD); autoirq = autoirq_report(1); if (autoirq == 0) { printk("%s probe at %#x failed to detect IRQ line.\n", mname, ioaddr); return EAGAIN; } } outb(AX_RESET+AX_LOOP, AX_CMD); /* Loopback mode. */ dev->base_addr = ioaddr; memcpy(dev->dev_addr, station_addr, ETH_ALEN); if (dev->mem_start & 0xf) el_debug = dev->mem_start & 0x7; if (autoirq) dev->irq = autoirq; printk("%s: %s EtherLink at %#lx, using %sIRQ %d.\n", dev->name, mname, dev->base_addr, autoirq ? "auto":"assigned ", dev->irq); #ifdef CONFIG_IP_MULTICAST printk("WARNING: Use of the 3c501 in a multicast kernel is NOT recommended.\n");#endif if (el_debug) printk("%s", version); /* Initialize the device structure. */ if (dev->priv == NULL) dev->priv = kmalloc(sizeof(struct net_local), GFP_KERNEL); memset(dev->priv, 0, sizeof(struct net_local)); /* The EL1-specific entries in the device structure. */ dev->open = &el_open; // 发送函数 dev->hard_start_xmit = &el_start_xmit; dev->stop = &el1_close; dev->get_stats = &el1_get_stats; dev->set_multicast_list = &set_multicast_list; /* Setup the generic properties */ ether_setup(dev);#endif /* !MODULE */ return 0;}void ether_setup(struct device *dev){ int i; /* Fill in the fields of the device structure with ethernet-generic values. This should be in a common file instead of per-driver. */ for (i = 0; i < DEV_NUMBUFFS; i++) skb_queue_head_init(&dev->buffs[i]); /* register boot-defined "eth" devices */ if (dev->name && (strncmp(dev->name, "eth", 3) == 0)) { i = simple_strtoul(dev->name + 3, NULL, 0); if (ethdev_index[i] == NULL) { ethdev_index[i] = dev; } else if (dev != ethdev_index[i]) { /* Really shouldn't happen! */ printk("ether_setup: Ouch! Someone else took %s\n", dev->name); } } dev->hard_header = eth_header; dev->rebuild_header = eth_rebuild_header; dev->type_trans = eth_type_trans; dev->type = ARPHRD_ETHER; dev->hard_header_len = ETH_HLEN; dev->mtu = 1500; /* eth_mtu */ dev->addr_len = ETH_ALEN; for (i = 0; i < ETH_ALEN; i++) { dev->broadcast[i]=0xff; } /* New-style flags. */ dev->flags = IFF_BROADCAST|IFF_MULTICAST; dev->family = AF_INET; dev->pa_addr = 0; dev->pa_brdaddr = 0; dev->pa_mask = 0; dev->pa_alen = sizeof(unsigned long);}/* Open/initialize the board. */static int el_open(struct device *dev){ int ioaddr = dev->base_addr; if (el_debug > 2) printk("%s: Doing el_open()...", dev->name); // 设置中断的回调是el_interrupt函数,网络收到数据包后会触发系统中断,系统会执行该函数 if (request_irq(dev->irq, &el_interrupt, 0, "3c501")) { return -EAGAIN; } irq2dev_map[dev->irq] = dev; el_reset(dev); dev->start = 1; outb(AX_RX, AX_CMD); /* Aux control, irq and receive enabled */ MOD_INC_USE_COUNT; return 0;}
设置完网卡对应的数据结构后,如果有数据包到达,由驱动程序中的这两个函数处理。
/* The typical workload of the driver: Handle the ether interface interrupts. */static voidel_interrupt(int irq, struct pt_regs *regs){ struct device *dev = (struct device *)(irq2dev_map[irq]); struct net_local *lp; int ioaddr; int axsr; /* Aux. status reg. */ if (dev == NULL || dev->irq != irq) { printk ("3c501 driver: irq %d for unknown device.\n", irq); return; } ioaddr = dev->base_addr; lp = (struct net_local *)dev->priv; axsr = inb(AX_STATUS); if (el_debug > 3) printk("%s: el_interrupt() aux=%#02x", dev->name, axsr); if (dev->interrupt) printk("%s: Reentering the interrupt driver!\n", dev->name); dev->interrupt = 1; if (dev->tbusy) { /* * Board in transmit mode. */ int txsr = inb(TX_STATUS); if (el_debug > 6) printk(" txsr=%02x gp=%04x rp=%04x", txsr, inw(GP_LOW), inw(RX_LOW)); if ((axsr & 0x80) && (txsr & TX_READY) == 0) { /* * FIXME: is there a logic to whether to keep on trying or * reset immediately ? */ printk("%s: Unusual interrupt during Tx, txsr=%02x axsr=%02x" " gp=%03x rp=%03x.\n", dev->name, txsr, axsr, inw(ioaddr + EL1_DATAPTR), inw(ioaddr + EL1_RXPTR)); dev->tbusy = 0; mark_bh(NET_BH); } else if (txsr & TX_16COLLISIONS) { /* * Timed out */ if (el_debug) printk("%s: Transmit failed 16 times, ethernet jammed?\n", dev->name); outb(AX_SYS, AX_CMD); lp->stats.tx_aborted_errors++; } else if (txsr & TX_COLLISION) { /* Retrigger xmit. */ if (el_debug > 6) printk(" retransmitting after a collision.\n"); /* * Poor little chip can't reset its own start pointer */ outb(AX_SYS, AX_CMD); outw(lp->tx_pkt_start, GP_LOW); outb(AX_XMIT, AX_CMD); lp->stats.collisions++; dev->interrupt = 0; return; } else { /* * It worked.. we will now fall through and receive */ lp->stats.tx_packets++; if (el_debug > 6) printk(" Tx succeeded %s\n", (txsr & TX_RDY) ? "." : "but tx is busy!"); /* * This is safe the interrupt is atomic WRT itself. */ dev->tbusy = 0; mark_bh(NET_BH); /* In case more to transmit */ } } else { /* * In receive mode. */ int rxsr = inb(RX_STATUS); if (el_debug > 5) printk(" rxsr=%02x txsr=%02x rp=%04x", rxsr, inb(TX_STATUS), inw(RX_LOW)); /* * Just reading rx_status fixes most errors. */ if (rxsr & RX_MISSED) lp->stats.rx_missed_errors++; if (rxsr & RX_RUNT) { /* Handled to avoid board lock-up. */ lp->stats.rx_length_errors++; if (el_debug > 5) printk(" runt.\n"); } else if (rxsr & RX_GOOD) { /* * Receive worked. */ // 成功收到数据包后执行到这 el_receive(dev); } else { /* Nothing? Something is broken! */ if (el_debug > 2) printk("%s: No packet seen, rxsr=%02x **resetting 3c501***\n", dev->name, rxsr); el_reset(dev); } if (el_debug > 3) printk(".\n"); } /* * Move into receive mode */ outb(AX_RX, AX_CMD); outw(0x00, RX_BUF_CLR); inb(RX_STATUS); /* Be certain that interrupts are cleared. */ inb(TX_STATUS); dev->interrupt = 0; return;}/* We have a good packet. Well, not really "good", just mostly not broken. We must check everything to see if it is good. */static voidel_receive(struct device *dev){ struct net_local *lp = (struct net_local *)dev->priv; int ioaddr = dev->base_addr; int pkt_len; struct sk_buff *skb; // 包长度 pkt_len = inw(RX_LOW); if (el_debug > 4) printk(" el_receive %d.\n", pkt_len); // 包太大或太小 if ((pkt_len < 60) || (pkt_len > 1536)) { if (el_debug) printk("%s: bogus packet, length=%d\n", dev->name, pkt_len); lp->stats.rx_over_errors++; return; } /* * Command mode so we can empty the buffer */ outb(AX_SYS, AX_CMD); // 分配一个承载数据的skb skb = alloc_skb(pkt_len, GFP_ATOMIC); /* * Start of frame */ outw(0x00, GP_LOW); if (skb == NULL) { printk("%s: Memory squeeze, dropping packet.\n", dev->name); lp->stats.rx_dropped++; return; } else { // 记录数据包长度和收到该包的设备 skb->len = pkt_len; skb->dev = dev; /* * The read increments through the bytes. The interrupt * handler will fix the pointer when it returns to * receive mode. */ // 读取数据到skb中 insb(DATAPORT, skb->data, pkt_len); // 传给mac层 netif_rx(skb); lp->stats.rx_packets++; } return;}
驱动层处理生成一个skb结构体,然后通过netif_rx函数传给链路层。netif_rx直接把skb挂载到backlog队列中,然后结束中断处理,等下半部分再进行数据包的具体处理。由sock_init函数的代码我们知道,下半部分的处理函数是net_bh。
/* * Receive a packet from a device driver and queue it for the upper * (protocol) levels. It always succeeds. This is the recommended * interface to use. */void netif_rx(struct sk_buff *skb){ static int dropping = 0; /* * Any received buffers are un-owned and should be discarded * when freed. These will be updated later as the frames get * owners. */ skb->sk = NULL; skb->free = 1; if(skb->stamp.tv_sec==0) skb->stamp = xtime; /* * Check that we aren't overdoing things. */ // 是否过载 if (!backlog_size) dropping = 0; else if (backlog_size > 300) dropping = 1; // 过载则丢弃 if (dropping) { kfree_skb(skb, FREE_READ); return; } /* * Add it to the "backlog" queue. */#ifdef CONFIG_SKB_CHECK IS_SKB(skb);#endif // 加到backlog队列 skb_queue_tail(&backlog,skb); backlog_size++; /* * If any packet arrived, mark it for processing after the * hardware interrupt returns. */ // 激活下半部分,处理数据包 mark_bh(NET_BH); return;}
/* * When we are called the queue is ready to grab, the interrupts are * on and hardware can interrupt and queue to the receive queue a we * run with no problems. * This is run as a bottom half after an interrupt handler that does * mark_bh(NET_BH); */ void net_bh(void *tmp){ struct sk_buff *skb; struct packet_type *ptype; struct packet_type *pt_prev; unsigned short type; /* * Atomically check and mark our BUSY state. */ // 防止重入 if (set_bit(1, (void*)&in_bh)) return; /* * Can we send anything now? We want to clear the * decks for any more sends that get done as we * process the input. */ // 发送缓存的数据包 dev_transmit(); /* * Any data left to process. This may occur because a * mark_bh() is done after we empty the queue including * that from the device which does a mark_bh() just after */ cli(); /* * While the queue is not empty */ // backlog队列的数据包来源于网卡收到的数据包 while((skb=skb_dequeue(&backlog))!=NULL) { /* * We have a packet. Therefore the queue has shrunk */ backlog_size--; sti(); /* * Bump the pointer to the next structure. * This assumes that the basic 'skb' pointer points to * the MAC header, if any (as indicated by its "length" * field). Take care now! */ // 指向ip头 skb->h.raw = skb->data + skb->dev->hard_header_len; // 减去mac头长度 skb->len -= skb->dev->hard_header_len; /* * Fetch the packet protocol ID. This is also quite ugly, as * it depends on the protocol driver (the interface itself) to * know what the type is, or where to get it from. The Ethernet * interfaces fetch the ID from the two bytes in the Ethernet MAC * header (the h_proto field in struct ethhdr), but other drivers * may either use the ethernet ID's or extra ones that do not * clash (eg ETH_P_AX25). We could set this before we queue the * frame. In fact I may change this when I have time. */ // 判断上层协议 type = skb->dev->type_trans(skb, skb->dev); /* * We got a packet ID. Now loop over the "known protocols" * table (which is actually a linked list, but this will * change soon if I get my way- FvK), and forward the packet * to anyone who wants it. * * [FvK didn't get his way but he is right this ought to be * hashed so we typically get a single hit. The speed cost * here is minimal but no doubt adds up at the 4,000+ pkts/second * rate we can hit flat out] */ pt_prev = NULL; for (ptype = ptype_base; ptype != NULL; ptype = ptype->next) { if ((ptype->type == type || ptype->type == htons(ETH_P_ALL)) && (!ptype->dev || ptype->dev==skb->dev)) { /* * We already have a match queued. Deliver * to it and then remember the new match */ // 如果有匹配的项则要单独复制一份skb if(pt_prev) { struct sk_buff *skb2; skb2=skb_clone(skb, GFP_ATOMIC); /* * Kick the protocol handler. This should be fast * and efficient code. */ if(skb2) pt_prev->func(skb2, skb->dev, pt_prev); } /* Remember the current last to do */ // 记录最近匹配的项 pt_prev=ptype; } } /* End of protocol list loop */ /* * Is there a last item to send to ? */ // 把数据包交给上层协议处理,大于一个匹配项,则把skb复制给最后一项,否则销毁skb if(pt_prev) pt_prev->func(skb, skb->dev, pt_prev); /* * Has an unknown packet has been received ? */ else kfree_skb(skb, FREE_WRITE); /* * Again, see if we can transmit anything now. * [Ought to take this out judging by tests it slows * us down not speeds us up] */ dev_transmit(); cli(); } /* End of queue loop */ /* * We have emptied the queue */ // 处理完毕 in_bh = 0; sti(); /* * One last output flush. */ dev_transmit();}
这里假设上层协议是ip,ip层处理函数是ip_rcv,代码如下
/* * This function receives all incoming IP datagrams. */int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt){ struct iphdr *iph = skb->h.iph; struct sock *raw_sk=NULL; unsigned char hash; unsigned char flag = 0; unsigned char opts_p = 0; /* Set iff the packet has options. */ struct inet_protocol *ipprot; static struct options opt; /* since we don't use these yet, and they take up stack space. */ int brd=IS_MYADDR; int is_frag=0;#ifdef CONFIG_IP_FIREWALL int err;#endif ip_statistics.IpInReceives++; /* * Tag the ip header of this packet so we can find it */ skb->ip_hdr = iph; /* * Is the datagram acceptable? * * 1. Length at least the size of an ip header * 2. Version of 4 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] * (4. We ought to check for IP multicast addresses and undefined types.. does this matter ?) */ // 参数检查 if (skb->lenihl<5 || iph->version != 4 || skb->len tot_len) || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0) { ip_statistics.IpInHdrErrors++; kfree_skb(skb, FREE_WRITE); return(0); } /* * See if the firewall wants to dispose of the packet. */// 配置了防火墙,则先检查是否符合防火墙的过滤规则,否则则丢掉#ifdef CONFIG_IP_FIREWALL if ((err=ip_fw_chk(iph,dev,ip_fw_blk_chain,ip_fw_blk_policy, 0))!=1) { if(err==-1) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev); kfree_skb(skb, FREE_WRITE); return 0; }#endif /* * Our transport medium may have padded the buffer out. Now we know it * is IP we can trim to the true length of the frame. */ skb->len=ntohs(iph->tot_len); /* * Next analyse the packet for options. Studies show under one packet in * a thousand have options.... */ // ip头超过20字节,说明有选项 if (iph->ihl != 5) { /* Fast path for the typical optionless IP packet. */ memset((char *) &opt, 0, sizeof(opt)); if (do_options(iph, &opt) != 0) return 0; opts_p = 1; } /* * Remember if the frame is fragmented. */ // 非0则说明是分片 if(iph->frag_off) { // 是否禁止分片,是的话is_frag等于1 if (iph->frag_off & 0x0020) is_frag|=1; /* * Last fragment ? */ // 非0说明有偏移,即不是第一个块分片 if (ntohs(iph->frag_off) & 0x1fff) is_frag|=2; } /* * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday. * * This is inefficient. While finding out if it is for us we could also compute * the routing table entry. This is where the great unified cache theory comes * in as and when someone implements it * * For most hosts over 99% of packets match the first conditional * and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at * function entry. */ if ( iph->daddr != skb->dev->pa_addr && (brd = ip_chk_addr(iph->daddr)) == 0) { /* * Don't forward multicast or broadcast frames. */ if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST) { kfree_skb(skb,FREE_WRITE); return 0; } /* * The packet is for another target. Forward the frame */#ifdef CONFIG_IP_FORWARD ip_forward(skb, dev, is_frag);#else/* printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n", iph->saddr,iph->daddr);*/ ip_statistics.IpInAddrErrors++;#endif /* * The forwarder is inefficient and copies the packet. We * free the original now. */ kfree_skb(skb, FREE_WRITE); return(0); } #ifdef CONFIG_IP_MULTICAST if(brd==IS_MULTICAST && iph->daddr!=IGMP_ALL_HOSTS && !(dev->flags&IFF_LOOPBACK)) { /* * Check it is for one of our groups */ struct ip_mc_list *ip_mc=dev->ip_mc_list; do { if(ip_mc==NULL) { kfree_skb(skb, FREE_WRITE); return 0; } if(ip_mc->multiaddr==iph->daddr) break; ip_mc=ip_mc->next; } while(1); }#endif /* * Account for the packet */ #ifdef CONFIG_IP_ACCT ip_acct_cnt(iph,dev, ip_acct_chain);#endif /* * Reassemble IP fragments. */ // 分片重组 if(is_frag) { /* Defragment. Obtain the complete packet if there is one */ skb=ip_defrag(iph,skb,dev); if(skb==NULL) return 0; skb->dev = dev; iph=skb->h.iph; } /* * Point into the IP datagram, just past the header. */ skb->ip_hdr = iph; // 往上层传之前先指向上层的头 skb->h.raw += iph->ihl*4; /* * Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies. */ hash = iph->protocol & (SOCK_ARRAY_SIZE-1); /* If there maybe a raw socket we must check - if not we don't care less */ if((raw_sk=raw_prot.sock_array[hash])!=NULL) { struct sock *sknext=NULL; struct sk_buff *skb1; // 找对应的socket raw_sk=get_sock_raw(raw_sk, hash, iph->saddr, iph->daddr); if(raw_sk) /* Any raw sockets */ { do { /* Find the next */ // 从队列中raw_sk的下一个节点开始找满足条件的socket,因为之前的的肯定不满足条件了 sknext=get_sock_raw(raw_sk->next, hash, iph->saddr, iph->daddr); // 复制一份skb给符合条件的socket if(sknext) skb1=skb_clone(skb, GFP_ATOMIC); else break; /* One pending raw socket left */ if(skb1) raw_rcv(raw_sk, skb1, dev, iph->saddr,iph->daddr); // 记录最近符合条件的socket raw_sk=sknext; } while(raw_sk!=NULL); /* Here either raw_sk is the last raw socket, or NULL if none */ /* We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy */ } } /* * skb->h.raw now points at the protocol beyond the IP header. */ // 传给ip层的上传协议 hash = iph->protocol & (MAX_INET_PROTOS -1); // 获取哈希链表中的一个队列,遍历 for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next) { struct sk_buff *skb2; if (ipprot->protocol != iph->protocol) continue; /* * See if we need to make a copy of it. This will * only be set if more than one protocol wants it. * and then not for the last one. If there is a pending * raw delivery wait for that */ /* 是否需要复制一份skb,copy字段这个版本中都是0,有多个一样的协议才需要复制一份, 否则一份就够,因为只有一个协议需要使用,raw_sk的值是上面代码决定的 */ if (ipprot->copy || raw_sk) { skb2 = skb_clone(skb, GFP_ATOMIC); if(skb2==NULL) continue; } else { skb2 = skb; } // 找到了处理该数据包的上层协议 flag = 1; /* * Pass on the datagram to each protocol that wants it, * based on the datagram protocol. We should really * check the protocol handler's return values here... */ ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr, (ntohs(iph->tot_len) - (iph->ihl * 4)), iph->saddr, 0, ipprot); } /* * All protocols checked. * If this packet was a broadcast, we may *not* reply to it, since that * causes (proven, grin) ARP storms and a leakage of memory (i.e. all * ICMP reply messages get queued up for transmission...) */ if(raw_sk!=NULL) /* Shift to last raw user */ raw_rcv(raw_sk, skb, dev, iph->saddr, iph->daddr); // 没找到处理该数据包的上层协议,报告错误 else if (!flag) /* Free and report errors */ { // 不是广播不是多播,发送目的地不可达的icmp包 if (brd != IS_BROADCAST && brd!=IS_MULTICAST) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0, dev); kfree_skb(skb, FREE_WRITE); } return(0);}
ip层遍历inet_protos数组,找到和ip头中指定的协议相等的协议,把数据包交给该节点处理。比如tcp协议对应的处理函数是tcp_rcv,该函数把skb挂载到socket的接收队列等待读取,获取建立一个连接等。应用层使用read函数进行读取的时候,就从接收队列摘下一个skb。至此,一个数据包从网卡到应用层的过程就结束了。
发表评论
最新留言
感谢大佬
[***.8.128.20]2025年03月30日 16时21分28秒
关于作者

喝酒易醉,品茶养心,人生如梦,品茶悟道,何以解忧?唯有杜康!
-- 愿君每日到此一游!
推荐文章
vue 导出Excel乱码问题解决方案
2021-05-10
eggjs validate no function 解决方案
2021-05-10
Permission denied 解决方案
2021-05-10
iOS_图片添加水印_文本倾斜
2021-05-10
iOS_Runtime3_动态添加方法
2021-05-10
iOS_Runtime4_动态添加属性
2021-05-10
Docker配置文件
2021-05-10
JWT的介绍、代码实现与解决方案
2021-05-10
PNFT邮票数字资产化,科技、美学与价值的完美融合
2021-05-10
5G赋能新文旅 巡天遥看一千河
2021-05-10
Dijkstra算法之matlab实现
2021-05-10
嵌入式中使用命令行调试程序
2021-05-10
Unsupported SFP+ Module
2021-05-10
linux 中改变运行程序的 stdout
2021-05-10
dpdk 程序创建 kni 虚拟网络接口失败的问题
2021-05-10
uio 设备文件创建以及 iommu 导致 x710 网卡初始化失败问题
2021-05-10
patchelf 的功能以及使用 patchelf 修改 rpath 以解决动态库问题
2021-05-10
关调度与关中断
2021-05-10
rt-thread 使用心得
2021-05-10