1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * The Internet Protocol (IP) output module.
7 *
8 * Version: @(#)ip.c 1.0.16b 9/1/93
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Donald Becker, <becker@super.org>
13 * Alan Cox, <Alan.Cox@linux.org>
14 * Richard Underwood
15 * Stefan Becker, <stefanb@yello.ping.de>
16 * Jorge Cwik, <jorge@laser.satlink.net>
17 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
18 *
19 * See ip_input.c for original log
20 *
21 * Fixes:
22 * Alan Cox : Missing nonblock feature in ip_build_xmit.
23 */
24
25 #include <asm/segment.h>
26 #include <asm/system.h>
27 #include <linux/types.h>
28 #include <linux/kernel.h>
29 #include <linux/sched.h>
30 #include <linux/mm.h>
31 #include <linux/string.h>
32 #include <linux/errno.h>
33 #include <linux/config.h>
34
35 #include <linux/socket.h>
36 #include <linux/sockios.h>
37 #include <linux/in.h>
38 #include <linux/inet.h>
39 #include <linux/netdevice.h>
40 #include <linux/etherdevice.h>
41 #include <linux/proc_fs.h>
42 #include <linux/stat.h>
43
44 #include <net/snmp.h>
45 #include <net/ip.h>
46 #include <net/protocol.h>
47 #include <net/route.h>
48 #include <net/tcp.h>
49 #include <net/udp.h>
50 #include <linux/skbuff.h>
51 #include <net/sock.h>
52 #include <net/arp.h>
53 #include <net/icmp.h>
54 #include <net/raw.h>
55 #include <net/checksum.h>
56 #include <linux/igmp.h>
57 #include <linux/ip_fw.h>
58 #include <linux/firewall.h>
59 #include <linux/mroute.h>
60 #include <net/netlink.h>
61
62 /*
63 * Loop a packet back to the sender.
64 */
65
66 static void ip_loopback(struct device *old_dev, struct sk_buff *skb)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
67 {
68 struct device *dev=&loopback_dev;
69 int len=ntohs(skb->ip_hdr->tot_len);
70 struct sk_buff *newskb=dev_alloc_skb(len+dev->hard_header_len+15);
71
72 if(newskb==NULL)
73 return;
74
75 newskb->link3=NULL;
76 newskb->sk=NULL;
77 newskb->dev=dev;
78 newskb->saddr=skb->saddr;
79 newskb->daddr=skb->daddr;
80 newskb->raddr=skb->raddr;
81 newskb->free=1;
82 newskb->lock=0;
83 newskb->users=0;
84 newskb->pkt_type=skb->pkt_type;
85
86 /*
87 * Put a MAC header on the packet
88 */
89 ip_send(NULL,newskb, skb->ip_hdr->daddr, len, dev, skb->ip_hdr->saddr);
90 /*
91 * Add the rest of the data space.
92 */
93 newskb->ip_hdr=(struct iphdr *)skb_put(newskb, len);
94 memcpy(newskb->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
95
96 /*
97 * Copy the data
98 */
99 memcpy(newskb->ip_hdr,skb->ip_hdr,len);
100
101 /* Recurse. The device check against IFF_LOOPBACK will stop infinite recursion */
102
103 /*printk("Loopback output queued [%lX to %lX].\n", newskb->ip_hdr->saddr,newskb->ip_hdr->daddr);*/
104 ip_queue_xmit(NULL, dev, newskb, 1);
105 }
106
107
108
109 /*
110 * Take an skb, and fill in the MAC header.
111 */
112
113 int ip_send(struct rtable * rt, struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
114 {
115 int mac = 0;
116
117 skb->dev = dev;
118 skb->arp = 1;
119 skb->protocol = htons(ETH_P_IP);
120 if (dev->hard_header)
121 {
122 /*
123 * Build a hardware header. Source address is our mac, destination unknown
124 * (rebuild header will sort this out)
125 */
126 skb_reserve(skb,(dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */
127 if (rt && dev == rt->rt_dev && rt->rt_hh)
128 {
129 memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
130 if (rt->rt_hh->hh_uptodate)
131 return dev->hard_header_len;
132 #if RT_CACHE_DEBUG >= 2
133 printk("ip_send: hh miss %08x via %08x\n", daddr, rt->rt_gateway);
134 #endif
135 skb->arp = 0;
136 skb->raddr = daddr;
137 return -dev->hard_header_len;
138 }
139 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
140 if (mac < 0)
141 {
142 mac = -mac;
143 skb->arp = 0;
144 skb->raddr = daddr; /* next routing address */
145 }
146 }
147 return mac;
148 }
149
150 static int ip_send_room(struct rtable * rt, struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
151 {
152 int mac = 0;
153
154 skb->dev = dev;
155 skb->arp = 1;
156 skb->protocol = ETH_P_IP;
157 if (dev->hard_header)
158 {
159 skb_reserve(skb,MAX_HEADER);
160 if (rt && dev == rt->rt_dev && rt->rt_hh)
161 {
162 memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
163 if (rt->rt_hh->hh_uptodate)
164 return dev->hard_header_len;
165 #if RT_CACHE_DEBUG >= 2
166 printk("ip_send_room: hh miss %08x via %08x\n", daddr, rt->rt_gateway);
167 #endif
168 skb->arp = 0;
169 skb->raddr = daddr;
170 return -dev->hard_header_len;
171 }
172 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
173 if (mac < 0)
174 {
175 mac = -mac;
176 skb->arp = 0;
177 skb->raddr = daddr; /* next routing address */
178 }
179 }
180 return mac;
181 }
182
183 int ip_id_count = 0;
184
185 /*
186 * This routine builds the appropriate hardware/IP headers for
187 * the routine. It assumes that if *dev != NULL then the
188 * protocol knows what it's doing, otherwise it uses the
189 * routing/ARP tables to select a device struct.
190 */
191 int ip_build_header(struct sk_buff *skb, __u32 saddr, __u32 daddr,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
192 struct device **dev, int type, struct options *opt,
193 int len, int tos, int ttl, struct rtable ** rp)
194 {
195 struct rtable *rt;
196 __u32 raddr;
197 int tmp;
198 struct iphdr *iph;
199 __u32 final_daddr = daddr;
200
201
202 if (opt && opt->srr)
203 daddr = opt->faddr;
204
205 /*
206 * See if we need to look up the device.
207 */
208
209 #ifdef CONFIG_IP_MULTICAST
210 if(MULTICAST(daddr) && *dev==NULL && skb->sk && *skb->sk->ip_mc_name)
211 *dev=dev_get(skb->sk->ip_mc_name);
212 #endif
213 if (rp)
214 {
215 rt = ip_check_route(rp, daddr, skb->localroute);
216 /*
217 * If rp != NULL rt_put following below should not
218 * release route, so that...
219 */
220 if (rt)
221 ATOMIC_INCR(&rt->rt_refcnt);
222 }
223 else
224 rt = ip_rt_route(daddr, skb->localroute);
225
226
227 if (*dev == NULL)
228 {
229 if (rt == NULL)
230 {
231 ip_statistics.IpOutNoRoutes++;
232 return(-ENETUNREACH);
233 }
234
235 *dev = rt->rt_dev;
236 }
237
238 if ((LOOPBACK(saddr) && !LOOPBACK(daddr)) || !saddr)
239 saddr = rt ? rt->rt_src : (*dev)->pa_addr;
240
241 raddr = rt ? rt->rt_gateway : 0;
242
243 if (opt && opt->is_strictroute && rt && (rt->rt_flags & RTF_GATEWAY))
244 {
245 ip_rt_put(rt);
246 ip_statistics.IpOutNoRoutes++;
247 return -ENETUNREACH;
248 }
249
250 /*
251 * No gateway so aim at the real destination
252 */
253
254 if (raddr == 0)
255 raddr = daddr;
256
257 /*
258 * Now build the MAC header.
259 */
260
261 if (type==IPPROTO_TCP)
262 tmp = ip_send_room(rt, skb, raddr, len, *dev, saddr);
263 else
264 tmp = ip_send(rt, skb, raddr, len, *dev, saddr);
265
266 ip_rt_put(rt);
267
268 /*
269 * Book keeping
270 */
271
272 skb->dev = *dev;
273 skb->saddr = saddr;
274
275 /*
276 * Now build the IP header.
277 */
278
279 /*
280 * If we are using IPPROTO_RAW, then we don't need an IP header, since
281 * one is being supplied to us by the user
282 */
283
284 if(type == IPPROTO_RAW)
285 return (tmp);
286
287 /*
288 * Build the IP addresses
289 */
290
291 if (opt)
292 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr) + opt->optlen);
293 else
294 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr));
295
296 iph->version = 4;
297 iph->ihl = 5;
298 iph->tos = tos;
299 iph->frag_off = 0;
300 iph->ttl = ttl;
301 iph->daddr = daddr;
302 iph->saddr = saddr;
303 iph->protocol = type;
304 skb->ip_hdr = iph;
305
306 if (!opt || !opt->optlen)
307 return sizeof(struct iphdr) + tmp;
308 iph->ihl += opt->optlen>>2;
309 ip_options_build(skb, opt, final_daddr, (*dev)->pa_addr, 0);
310 return iph->ihl*4 + tmp;
311 }
312
313
314 /*
315 * Generate a checksum for an outgoing IP datagram.
316 */
317
318 void ip_send_check(struct iphdr *iph)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
319 {
320 iph->check = 0;
321 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
322 }
323
324 /*
325 * Queues a packet to be sent, and starts the transmitter
326 * if necessary. if free = 1 then we free the block after
327 * transmit, otherwise we don't. If free==2 we not only
328 * free the block but also don't assign a new ip seq number.
329 * This routine also needs to put in the total length,
330 * and compute the checksum
331 */
332
333 void ip_queue_xmit(struct sock *sk, struct device *dev,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
334 struct sk_buff *skb, int free)
335 {
336 struct iphdr *iph;
337 /* unsigned char *ptr;*/
338
339 /* Sanity check */
340 if (dev == NULL)
341 {
342 NETDEBUG(printk("IP: ip_queue_xmit dev = NULL\n"));
343 return;
344 }
345
346 IS_SKB(skb);
347
348 /*
349 * Do some book-keeping in the packet for later
350 */
351
352
353 skb->dev = dev;
354 skb->when = jiffies;
355
356 /*
357 * Find the IP header and set the length. This is bad
358 * but once we get the skb data handling code in the
359 * hardware will push its header sensibly and we will
360 * set skb->ip_hdr to avoid this mess and the fixed
361 * header length problem
362 */
363
364 iph = skb->ip_hdr;
365 iph->tot_len = ntohs(skb->len-(((unsigned char *)iph)-skb->data));
366
367 #ifdef CONFIG_FIREWALL
368 if(call_out_firewall(PF_INET, skb, iph) < FW_ACCEPT)
369 /* just don't send this packet */
370 return;
371 #endif
372
373 /*
374 * No reassigning numbers to fragments...
375 */
376
377 if(free!=2)
378 iph->id = htons(ip_id_count++);
379 else
380 free=1;
381
382 /* All buffers without an owner socket get freed */
383 if (sk == NULL)
384 free = 1;
385
386 skb->free = free;
387
388 /*
389 * Do we need to fragment. Again this is inefficient.
390 * We need to somehow lock the original buffer and use
391 * bits of it.
392 */
393
394 if(ntohs(iph->tot_len)> dev->mtu)
395 {
396 ip_fragment(sk,skb,dev,0);
397 IS_SKB(skb);
398 kfree_skb(skb,FREE_WRITE);
399 return;
400 }
401
402 /*
403 * Add an IP checksum
404 */
405
406 ip_send_check(iph);
407
408 /*
409 * Print the frame when debugging
410 */
411
412 /*
413 * More debugging. You cannot queue a packet already on a list
414 * Spot this and moan loudly.
415 */
416 if (skb->next != NULL)
417 {
418 NETDEBUG(printk("ip_queue_xmit: next != NULL\n"));
419 skb_unlink(skb);
420 }
421
422 /*
423 * If a sender wishes the packet to remain unfreed
424 * we add it to his send queue. This arguably belongs
425 * in the TCP level since nobody else uses it. BUT
426 * remember IPng might change all the rules.
427 */
428
429 if (!free)
430 {
431 unsigned long flags;
432 /* The socket now has more outstanding blocks */
433
434 sk->packets_out++;
435
436 /* Protect the list for a moment */
437 save_flags(flags);
438 cli();
439
440 if (skb->link3 != NULL)
441 {
442 NETDEBUG(printk("ip.c: link3 != NULL\n"));
443 skb->link3 = NULL;
444 }
445 if (sk->send_head == NULL)
446 {
447 sk->send_tail = skb;
448 sk->send_head = skb;
449 }
450 else
451 {
452 sk->send_tail->link3 = skb;
453 sk->send_tail = skb;
454 }
455 /* skb->link3 is NULL */
456
457 /* Interrupt restore */
458 restore_flags(flags);
459 }
460 else
461 /* Remember who owns the buffer */
462 skb->sk = sk;
463
464 /*
465 * If the indicated interface is up and running, send the packet.
466 */
467
468 ip_statistics.IpOutRequests++;
469 #ifdef CONFIG_IP_ACCT
470 ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
471 #endif
472
473 #ifdef CONFIG_IP_MULTICAST
474
475 /*
476 * Multicasts are looped back for other local users
477 */
478
479 if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK))
480 {
481 if(sk==NULL || sk->ip_mc_loop)
482 {
483 if(iph->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
484 {
485 ip_loopback(dev,skb);
486 }
487 else
488 {
489 struct ip_mc_list *imc=dev->ip_mc_list;
490 while(imc!=NULL)
491 {
492 if(imc->multiaddr==iph->daddr)
493 {
494 ip_loopback(dev,skb);
495 break;
496 }
497 imc=imc->next;
498 }
499 }
500 }
501 /* Multicasts with ttl 0 must not go beyond the host */
502
503 if(skb->ip_hdr->ttl==0)
504 {
505 kfree_skb(skb, FREE_READ);
506 return;
507 }
508 }
509 #endif
510 if((dev->flags&IFF_BROADCAST) && (iph->daddr==dev->pa_brdaddr||iph->daddr==0xFFFFFFFF) && !(dev->flags&IFF_LOOPBACK))
511 ip_loopback(dev,skb);
512
513 if (dev->flags & IFF_UP)
514 {
515 /*
516 * If we have an owner use its priority setting,
517 * otherwise use NORMAL
518 */
519
520 if (sk != NULL)
521 {
522 dev_queue_xmit(skb, dev, sk->priority);
523 }
524 else
525 {
526 dev_queue_xmit(skb, dev, SOPRI_NORMAL);
527 }
528 }
529 else
530 {
531 if(sk)
532 sk->err = ENETDOWN;
533 ip_statistics.IpOutDiscards++;
534 if (free)
535 kfree_skb(skb, FREE_WRITE);
536 }
537 }
538
539
540 /*
541 * Build and send a packet, with as little as one copy
542 *
543 * Doesn't care much about ip options... option length can be
544 * different for fragment at 0 and other fragments.
545 *
546 * Note that the fragment at the highest offset is sent first,
547 * so the getfrag routine can fill in the TCP/UDP checksum header
548 * field in the last fragment it sends... actually it also helps
549 * the reassemblers, they can put most packets in at the head of
550 * the fragment queue, and they know the total size in advance. This
551 * last feature will measurable improve the Linux fragment handler.
552 *
553 * The callback has five args, an arbitrary pointer (copy of frag),
554 * the source IP address (may depend on the routing table), the
555 * destination adddress (char *), the offset to copy from, and the
556 * length to be copied.
557 *
558 */
559
560 int ip_build_xmit(struct sock *sk,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
561 void getfrag (const void *,
562 __u32,
563 char *,
564 unsigned int,
565 unsigned int),
566 const void *frag,
567 unsigned short int length,
568 __u32 daddr,
569 __u32 user_saddr,
570 struct options * opt,
571 int flags,
572 int type,
573 int noblock)
574 {
575 struct rtable *rt;
576 unsigned int fraglen, maxfraglen, fragheaderlen;
577 int offset, mf;
578 __u32 saddr;
579 unsigned short id;
580 struct iphdr *iph;
581 __u32 raddr;
582 struct device *dev = NULL;
583 struct hh_cache * hh=NULL;
584 int nfrags=0;
585 __u32 true_daddr = daddr;
586
587 if (opt && opt->srr && !sk->ip_hdrincl)
588 daddr = opt->faddr;
589
590 ip_statistics.IpOutRequests++;
591
592 #ifdef CONFIG_IP_MULTICAST
593 if(sk && MULTICAST(daddr) && *sk->ip_mc_name)
594 {
595 dev=dev_get(sk->ip_mc_name);
596 if(!dev)
597 return -ENODEV;
598 rt=NULL;
599 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
600 saddr = sk->saddr;
601 else
602 saddr = dev->pa_addr;
603 }
604 else
605 {
606 #endif
607 rt = ip_check_route(&sk->ip_route_cache, daddr,
608 sk->localroute || (flags&MSG_DONTROUTE) ||
609 (opt && opt->is_strictroute));
610 if (rt == NULL)
611 {
612 ip_statistics.IpOutNoRoutes++;
613 return(-ENETUNREACH);
614 }
615 saddr = rt->rt_src;
616
617 hh = rt->rt_hh;
618
619 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
620 saddr = sk->saddr;
621
622 dev=rt->rt_dev;
623 #ifdef CONFIG_IP_MULTICAST
624 }
625 if (rt && !dev)
626 dev = rt->rt_dev;
627 #endif
628 if (user_saddr)
629 saddr = user_saddr;
630
631 raddr = rt ? rt->rt_gateway : daddr;
632 /*
633 * Now compute the buffer space we require
634 */
635
636 /*
637 * Try the simple case first. This leaves broadcast, multicast, fragmented frames, and by
638 * choice RAW frames within 20 bytes of maximum size(rare) to the long path
639 */
640
641 length += sizeof(struct iphdr);
642 if (!sk->ip_hdrincl && opt)
643 length += opt->optlen;
644
645 if(length <= dev->mtu && !MULTICAST(daddr) && daddr!=0xFFFFFFFF && daddr!=dev->pa_brdaddr)
646 {
647 int error;
648 struct sk_buff *skb=sock_alloc_send_skb(sk, length+15+dev->hard_header_len,0, noblock, &error);
649 if(skb==NULL)
650 {
651 ip_statistics.IpOutDiscards++;
652 return error;
653 }
654 skb->dev=dev;
655 skb->free=1;
656 skb->when=jiffies;
657 skb->sk=sk;
658 skb->arp=0;
659 skb->saddr=saddr;
660 skb->raddr = raddr;
661 skb_reserve(skb,(dev->hard_header_len+15)&~15);
662 if (hh)
663 {
664 skb->arp=1;
665 memcpy(skb_push(skb,dev->hard_header_len),hh->hh_data,dev->hard_header_len);
666 if (!hh->hh_uptodate)
667 {
668 skb->arp = 0;
669 #if RT_CACHE_DEBUG >= 2
670 printk("ip_build_xmit: hh miss %08x via %08x\n", rt->rt_dst, rt->rt_gateway);
671 #endif
672 }
673 }
674 else if(dev->hard_header)
675 {
676 if(dev->hard_header(skb,dev,ETH_P_IP,NULL,NULL,0)>0)
677 skb->arp=1;
678 }
679 else
680 skb->arp=1;
681 skb->ip_hdr=iph=(struct iphdr *)skb_put(skb,length);
682 dev_lock_list();
683 if(!sk->ip_hdrincl)
684 {
685 iph->version=4;
686 iph->ihl=5;
687 iph->tos=sk->ip_tos;
688 iph->tot_len = htons(length);
689 iph->id=htons(ip_id_count++);
690 iph->frag_off = 0;
691 iph->ttl=sk->ip_ttl;
692 iph->protocol=type;
693 iph->saddr=saddr;
694 iph->daddr=daddr;
695 if (opt)
696 {
697 iph->ihl += opt->optlen>>2;
698 ip_options_build(skb, opt,
699 true_daddr, dev->pa_addr, 0);
700 }
701 iph->check=0;
702 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
703 getfrag(frag,saddr,((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
704 }
705 else
706 getfrag(frag,saddr,(void *)iph,0,length-20);
707 dev_unlock_list();
708 #ifdef CONFIG_FIREWALL
709 if(call_out_firewall(PF_INET, skb, iph)< FW_ACCEPT)
710 {
711 kfree_skb(skb, FREE_WRITE);
712 return -EPERM;
713 }
714 #endif
715 #ifdef CONFIG_IP_ACCT
716 ip_fw_chk((void *)skb->data,dev,ip_acct_chain, IP_FW_F_ACCEPT,1);
717 #endif
718 if(dev->flags&IFF_UP)
719 dev_queue_xmit(skb,dev,sk->priority);
720 else
721 {
722 ip_statistics.IpOutDiscards++;
723 kfree_skb(skb, FREE_WRITE);
724 }
725 return 0;
726 }
727 length -= sizeof(struct iphdr);
728 if (sk && !sk->ip_hdrincl && opt)
729 {
730 length -= opt->optlen;
731 fragheaderlen = dev->hard_header_len + sizeof(struct iphdr) + opt->optlen;
732 maxfraglen = ((dev->mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
733 }
734 else
735 {
736 fragheaderlen = dev->hard_header_len;
737 if(!sk->ip_hdrincl)
738 fragheaderlen += 20;
739
740 /*
741 * Fragheaderlen is the size of 'overhead' on each buffer. Now work
742 * out the size of the frames to send.
743 */
744
745 maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen;
746 }
747
748 /*
749 * Start at the end of the frame by handling the remainder.
750 */
751
752 offset = length - (length % (maxfraglen - fragheaderlen));
753
754 /*
755 * Amount of memory to allocate for final fragment.
756 */
757
758 fraglen = length - offset + fragheaderlen;
759
760 if(length-offset==0)
761 {
762 fraglen = maxfraglen;
763 offset -= maxfraglen-fragheaderlen;
764 }
765
766
767 /*
768 * The last fragment will not have MF (more fragments) set.
769 */
770
771 mf = 0;
772
773 /*
774 * Can't fragment raw packets
775 */
776
777 if (sk->ip_hdrincl && offset > 0)
778 return(-EMSGSIZE);
779
780 /*
781 * Lock the device lists.
782 */
783
784 dev_lock_list();
785
786 /*
787 * Get an identifier
788 */
789
790 id = htons(ip_id_count++);
791
792 /*
793 * Being outputting the bytes.
794 */
795
796 do
797 {
798 struct sk_buff * skb;
799 int error;
800 char *data;
801
802 /*
803 * Get the memory we require with some space left for alignment.
804 */
805
806 skb = sock_alloc_send_skb(sk, fraglen+15, 0, noblock, &error);
807 if (skb == NULL)
808 {
809 ip_statistics.IpOutDiscards++;
810 if(nfrags>1)
811 ip_statistics.IpFragCreates++;
812 dev_unlock_list();
813 return(error);
814 }
815
816 /*
817 * Fill in the control structures
818 */
819
820 skb->next = skb->prev = NULL;
821 skb->dev = dev;
822 skb->when = jiffies;
823 skb->free = 1; /* dubious, this one */
824 skb->sk = sk;
825 skb->arp = 0;
826 skb->saddr = saddr;
827 skb->raddr = raddr;
828 skb_reserve(skb,(dev->hard_header_len+15)&~15);
829 data = skb_put(skb, fraglen-dev->hard_header_len);
830
831 /*
832 * Save us ARP and stuff. In the optimal case we do no route lookup (route cache ok)
833 * no ARP lookup (arp cache ok) and output. The cache checks are still too slow but
834 * this can be fixed later. For gateway routes we ought to have a rt->.. header cache
835 * pointer to speed header cache builds for identical targets.
836 */
837
838 if (hh)
839 {
840 skb->arp=1;
841 memcpy(skb_push(skb,dev->hard_header_len),hh->hh_data,dev->hard_header_len);
842 if (!hh->hh_uptodate)
843 {
844 skb->arp = 0;
845 #if RT_CACHE_DEBUG >= 2
846 printk("ip_build_xmit: hh miss %08x via %08x\n", rt->rt_dst, rt->rt_gateway);
847 #endif
848 }
849 }
850 else if (dev->hard_header)
851 {
852 if(dev->hard_header(skb, dev, ETH_P_IP,
853 NULL, NULL, 0)>0)
854 skb->arp=1;
855 }
856
857 /*
858 * Find where to start putting bytes.
859 */
860
861 skb->ip_hdr = iph = (struct iphdr *)data;
862
863 /*
864 * Only write IP header onto non-raw packets
865 */
866
867 if(!sk->ip_hdrincl)
868 {
869
870 iph->version = 4;
871 iph->ihl = 5; /* ugh */
872 if (opt) {
873 iph->ihl += opt->optlen>>2;
874 ip_options_build(skb, opt,
875 true_daddr, dev->pa_addr, offset);
876 }
877 iph->tos = sk->ip_tos;
878 iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
879 iph->id = id;
880 iph->frag_off = htons(offset>>3);
881 iph->frag_off |= mf;
882 #ifdef CONFIG_IP_MULTICAST
883 if (MULTICAST(daddr))
884 iph->ttl = sk->ip_mc_ttl;
885 else
886 #endif
887 iph->ttl = sk->ip_ttl;
888 iph->protocol = type;
889 iph->check = 0;
890 iph->saddr = saddr;
891 iph->daddr = daddr;
892 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
893 data += iph->ihl*4;
894
895 /*
896 * Any further fragments will have MF set.
897 */
898
899 mf = htons(IP_MF);
900 }
901
902 /*
903 * User data callback
904 */
905
906 getfrag(frag, saddr, data, offset, fraglen-fragheaderlen);
907
908 /*
909 * Account for the fragment.
910 */
911
912 #ifdef CONFIG_FIREWALL
913 if(!offset && call_out_firewall(PF_INET, skb, iph) < FW_ACCEPT)
914 {
915 kfree_skb(skb, FREE_WRITE);
916 dev_unlock_list();
917 return -EPERM;
918 }
919 #endif
920 #ifdef CONFIG_IP_ACCT
921 if(!offset)
922 ip_fw_chk(iph, dev, ip_acct_chain, IP_FW_F_ACCEPT, 1);
923 #endif
924 offset -= (maxfraglen-fragheaderlen);
925 fraglen = maxfraglen;
926
927 #ifdef CONFIG_IP_MULTICAST
928
929 /*
930 * Multicasts are looped back for other local users
931 */
932
933 if (MULTICAST(daddr) && !(dev->flags&IFF_LOOPBACK))
934 {
935 /*
936 * Loop back any frames. The check for IGMP_ALL_HOSTS is because
937 * you are always magically a member of this group.
938 *
939 * Always loop back all host messages when running as a multicast router.
940 */
941
942 if(sk==NULL || sk->ip_mc_loop)
943 {
944 if(skb->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
945 ip_loopback(dev,skb);
946 else
947 {
948 struct ip_mc_list *imc=dev->ip_mc_list;
949 while(imc!=NULL)
950 {
951 if(imc->multiaddr==daddr)
952 {
953 ip_loopback(dev,skb);
954 break;
955 }
956 imc=imc->next;
957 }
958 }
959 }
960
961 /*
962 * Multicasts with ttl 0 must not go beyond the host. Fixme: avoid the
963 * extra clone.
964 */
965
966 if(skb->ip_hdr->ttl==0)
967 kfree_skb(skb, FREE_READ);
968 }
969 #endif
970
971 nfrags++;
972
973 /*
974 * BSD loops broadcasts
975 */
976
977 if((dev->flags&IFF_BROADCAST) && (daddr==0xFFFFFFFF || daddr==dev->pa_brdaddr) && !(dev->flags&IFF_LOOPBACK))
978 ip_loopback(dev,skb);
979
980 /*
981 * Now queue the bytes into the device.
982 */
983
984 if (dev->flags & IFF_UP)
985 {
986 dev_queue_xmit(skb, dev, sk->priority);
987 }
988 else
989 {
990 /*
991 * Whoops...
992 */
993
994 ip_statistics.IpOutDiscards++;
995 if(nfrags>1)
996 ip_statistics.IpFragCreates+=nfrags;
997 kfree_skb(skb, FREE_WRITE);
998 dev_unlock_list();
999 /*
1000 * BSD behaviour.
1001 */
1002 if(sk!=NULL)
1003 sk->err=ENETDOWN;
1004 return(0); /* lose rest of fragments */
1005 }
1006 }
1007 while (offset >= 0);
1008 if(nfrags>1)
1009 ip_statistics.IpFragCreates+=nfrags;
1010 dev_unlock_list();
1011 return(0);
1012 }
1013
1014
1015 /*
1016 * IP protocol layer initialiser
1017 */
1018
1019 static struct packet_type ip_packet_type =
1020 {
1021 0, /* MUTTER ntohs(ETH_P_IP),*/
1022 NULL, /* All devices */
1023 ip_rcv,
1024 NULL,
1025 NULL,
1026 };
1027
1028 #ifdef CONFIG_RTNETLINK
1029
1030 /*
1031 * Netlink hooks for IP
1032 */
1033
1034 void ip_netlink_msg(unsigned long msg, __u32 daddr, __u32 gw, __u32 mask, short flags, short metric, char *name)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1035 {
1036 struct sk_buff *skb=alloc_skb(sizeof(struct netlink_rtinfo), GFP_ATOMIC);
1037 struct netlink_rtinfo *nrt;
1038 struct sockaddr_in *s;
1039 if(skb==NULL)
1040 return;
1041 nrt=(struct netlink_rtinfo *)skb_put(skb, sizeof(struct netlink_rtinfo));
1042 nrt->rtmsg_type=msg;
1043 s=(struct sockaddr_in *)&nrt->rtmsg_dst;
1044 s->sin_family=AF_INET;
1045 s->sin_addr.s_addr=daddr;
1046 s=(struct sockaddr_in *)&nrt->rtmsg_gateway;
1047 s->sin_family=AF_INET;
1048 s->sin_addr.s_addr=gw;
1049 s=(struct sockaddr_in *)&nrt->rtmsg_genmask;
1050 s->sin_family=AF_INET;
1051 s->sin_addr.s_addr=mask;
1052 nrt->rtmsg_flags=flags;
1053 nrt->rtmsg_metric=metric;
1054 strcpy(nrt->rtmsg_device,name);
1055 netlink_post(NETLINK_ROUTE, skb);
1056 }
1057
1058 #endif
1059
1060 /*
1061 * Device notifier
1062 */
1063
1064 static int ip_rt_event(struct notifier_block *this, unsigned long event, void *ptr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1065 {
1066 struct device *dev=ptr;
1067 if(event==NETDEV_DOWN)
1068 {
1069 ip_netlink_msg(RTMSG_DELDEVICE, 0,0,0,0,0,dev->name);
1070 ip_rt_flush(dev);
1071 }
1072 /*
1073 * Join the intial group if multicast.
1074 */
1075 if(event==NETDEV_UP)
1076 {
1077 #ifdef CONFIG_IP_MULTICAST
1078 ip_mc_allhost(dev);
1079 #endif
1080 ip_netlink_msg(RTMSG_NEWDEVICE, 0,0,0,0,0,dev->name);
1081 }
1082 return NOTIFY_DONE;
1083 }
1084
1085 struct notifier_block ip_rt_notifier={
1086 ip_rt_event,
1087 NULL,
1088 0
1089 };
1090
1091 /*
1092 * IP registers the packet type and then calls the subprotocol initialisers
1093 */
1094
1095 void ip_init(void)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1096 {
1097 ip_packet_type.type=htons(ETH_P_IP);
1098 dev_add_pack(&ip_packet_type);
1099
1100 /* So we flush routes when a device is downed */
1101 register_netdevice_notifier(&ip_rt_notifier);
1102
1103 /* ip_raw_init();
1104 ip_packet_init();
1105 ip_tcp_init();
1106 ip_udp_init();*/
1107
1108 #ifdef CONFIG_IP_MULTICAST
1109 proc_net_register(&(struct proc_dir_entry) {
1110 PROC_NET_IGMP, 4, "igmp",
1111 S_IFREG | S_IRUGO, 1, 0, 0,
1112 0, &proc_net_inode_operations,
1113 ip_mc_procinfo
1114 });
1115 #endif
1116 }
1117