1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * The Internet Protocol (IP) output module.
7 *
8 * Version: @(#)ip.c 1.0.16b 9/1/93
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Donald Becker, <becker@super.org>
13 * Alan Cox, <Alan.Cox@linux.org>
14 * Richard Underwood
15 * Stefan Becker, <stefanb@yello.ping.de>
16 * Jorge Cwik, <jorge@laser.satlink.net>
17 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
18 *
19 * See ip_input.c for original log
20 *
21 * Fixes:
22 * Alan Cox : Missing nonblock feature in ip_build_xmit.
23 * Mike Kilburn : htons() missing in ip_build_xmit.
24 */
25
26 #include <asm/segment.h>
27 #include <asm/system.h>
28 #include <linux/types.h>
29 #include <linux/kernel.h>
30 #include <linux/sched.h>
31 #include <linux/mm.h>
32 #include <linux/string.h>
33 #include <linux/errno.h>
34 #include <linux/config.h>
35
36 #include <linux/socket.h>
37 #include <linux/sockios.h>
38 #include <linux/in.h>
39 #include <linux/inet.h>
40 #include <linux/netdevice.h>
41 #include <linux/etherdevice.h>
42 #include <linux/proc_fs.h>
43 #include <linux/stat.h>
44
45 #include <net/snmp.h>
46 #include <net/ip.h>
47 #include <net/protocol.h>
48 #include <net/route.h>
49 #include <net/tcp.h>
50 #include <net/udp.h>
51 #include <linux/skbuff.h>
52 #include <net/sock.h>
53 #include <net/arp.h>
54 #include <net/icmp.h>
55 #include <net/raw.h>
56 #include <net/checksum.h>
57 #include <linux/igmp.h>
58 #include <linux/ip_fw.h>
59 #include <linux/firewall.h>
60 #include <linux/mroute.h>
61 #include <net/netlink.h>
62
63 /*
64 * Loop a packet back to the sender.
65 */
66
67 static void ip_loopback(struct device *old_dev, struct sk_buff *skb)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
68 {
69 struct device *dev=&loopback_dev;
70 int len=ntohs(skb->ip_hdr->tot_len);
71 struct sk_buff *newskb=dev_alloc_skb(len+dev->hard_header_len+15);
72
73 if(newskb==NULL)
74 return;
75
76 newskb->link3=NULL;
77 newskb->sk=NULL;
78 newskb->dev=dev;
79 newskb->saddr=skb->saddr;
80 newskb->daddr=skb->daddr;
81 newskb->raddr=skb->raddr;
82 newskb->free=1;
83 newskb->lock=0;
84 newskb->users=0;
85 newskb->pkt_type=skb->pkt_type;
86
87 /*
88 * Put a MAC header on the packet
89 */
90 ip_send(NULL,newskb, skb->ip_hdr->daddr, len, dev, skb->ip_hdr->saddr);
91 /*
92 * Add the rest of the data space.
93 */
94 newskb->ip_hdr=(struct iphdr *)skb_put(newskb, len);
95 memcpy(newskb->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
96
97 /*
98 * Copy the data
99 */
100 memcpy(newskb->ip_hdr,skb->ip_hdr,len);
101
102 /* Recurse. The device check against IFF_LOOPBACK will stop infinite recursion */
103
104 /*printk("Loopback output queued [%lX to %lX].\n", newskb->ip_hdr->saddr,newskb->ip_hdr->daddr);*/
105 ip_queue_xmit(NULL, dev, newskb, 1);
106 }
107
108
109
110 /*
111 * Take an skb, and fill in the MAC header.
112 */
113
114 int ip_send(struct rtable * rt, struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
115 {
116 int mac = 0;
117
118 skb->dev = dev;
119 skb->arp = 1;
120 skb->protocol = htons(ETH_P_IP);
121 if (dev->hard_header)
122 {
123 /*
124 * Build a hardware header. Source address is our mac, destination unknown
125 * (rebuild header will sort this out)
126 */
127 skb_reserve(skb,(dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */
128 if (rt && dev == rt->rt_dev && rt->rt_hh)
129 {
130 memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
131 if (rt->rt_hh->hh_uptodate)
132 return dev->hard_header_len;
133 #if RT_CACHE_DEBUG >= 2
134 printk("ip_send: hh miss %08x via %08x\n", daddr, rt->rt_gateway);
135 #endif
136 skb->arp = 0;
137 skb->raddr = daddr;
138 return -dev->hard_header_len;
139 }
140 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
141 if (mac < 0)
142 {
143 mac = -mac;
144 skb->arp = 0;
145 skb->raddr = daddr; /* next routing address */
146 }
147 }
148 return mac;
149 }
150
151 static int ip_send_room(struct rtable * rt, struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
152 {
153 int mac = 0;
154
155 skb->dev = dev;
156 skb->arp = 1;
157 skb->protocol = htons(ETH_P_IP);
158 if (dev->hard_header)
159 {
160 skb_reserve(skb,MAX_HEADER);
161 if (rt && dev == rt->rt_dev && rt->rt_hh)
162 {
163 memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
164 if (rt->rt_hh->hh_uptodate)
165 return dev->hard_header_len;
166 #if RT_CACHE_DEBUG >= 2
167 printk("ip_send_room: hh miss %08x via %08x\n", daddr, rt->rt_gateway);
168 #endif
169 skb->arp = 0;
170 skb->raddr = daddr;
171 return -dev->hard_header_len;
172 }
173 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
174 if (mac < 0)
175 {
176 mac = -mac;
177 skb->arp = 0;
178 skb->raddr = daddr; /* next routing address */
179 }
180 }
181 return mac;
182 }
183
184 int ip_id_count = 0;
185
186 /*
187 * This routine builds the appropriate hardware/IP headers for
188 * the routine. It assumes that if *dev != NULL then the
189 * protocol knows what it's doing, otherwise it uses the
190 * routing/ARP tables to select a device struct.
191 */
192 int ip_build_header(struct sk_buff *skb, __u32 saddr, __u32 daddr,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
193 struct device **dev, int type, struct options *opt,
194 int len, int tos, int ttl, struct rtable ** rp)
195 {
196 struct rtable *rt;
197 __u32 raddr;
198 int tmp;
199 struct iphdr *iph;
200 __u32 final_daddr = daddr;
201
202
203 if (opt && opt->srr)
204 daddr = opt->faddr;
205
206 /*
207 * See if we need to look up the device.
208 */
209
210 #ifdef CONFIG_IP_MULTICAST
211 if(MULTICAST(daddr) && *dev==NULL && skb->sk && *skb->sk->ip_mc_name)
212 *dev=dev_get(skb->sk->ip_mc_name);
213 #endif
214 if (rp)
215 {
216 rt = ip_check_route(rp, daddr, skb->localroute);
217 /*
218 * If rp != NULL rt_put following below should not
219 * release route, so that...
220 */
221 if (rt)
222 ATOMIC_INCR(&rt->rt_refcnt);
223 }
224 else
225 rt = ip_rt_route(daddr, skb->localroute);
226
227
228 if (*dev == NULL)
229 {
230 if (rt == NULL)
231 {
232 ip_statistics.IpOutNoRoutes++;
233 return(-ENETUNREACH);
234 }
235
236 *dev = rt->rt_dev;
237 }
238
239 if ((LOOPBACK(saddr) && !LOOPBACK(daddr)) || !saddr)
240 saddr = rt ? rt->rt_src : (*dev)->pa_addr;
241
242 raddr = rt ? rt->rt_gateway : 0;
243
244 if (opt && opt->is_strictroute && rt && (rt->rt_flags & RTF_GATEWAY))
245 {
246 ip_rt_put(rt);
247 ip_statistics.IpOutNoRoutes++;
248 return -ENETUNREACH;
249 }
250
251 /*
252 * No gateway so aim at the real destination
253 */
254
255 if (raddr == 0)
256 raddr = daddr;
257
258 /*
259 * Now build the MAC header.
260 */
261
262 if (type==IPPROTO_TCP)
263 tmp = ip_send_room(rt, skb, raddr, len, *dev, saddr);
264 else
265 tmp = ip_send(rt, skb, raddr, len, *dev, saddr);
266
267 ip_rt_put(rt);
268
269 /*
270 * Book keeping
271 */
272
273 skb->dev = *dev;
274 skb->saddr = saddr;
275
276 /*
277 * Now build the IP header.
278 */
279
280 /*
281 * If we are using IPPROTO_RAW, then we don't need an IP header, since
282 * one is being supplied to us by the user
283 */
284
285 if(type == IPPROTO_RAW)
286 return (tmp);
287
288 /*
289 * Build the IP addresses
290 */
291
292 if (opt)
293 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr) + opt->optlen);
294 else
295 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr));
296
297 iph->version = 4;
298 iph->ihl = 5;
299 iph->tos = tos;
300 iph->frag_off = 0;
301 iph->ttl = ttl;
302 iph->daddr = daddr;
303 iph->saddr = saddr;
304 iph->protocol = type;
305 skb->ip_hdr = iph;
306
307 if (!opt || !opt->optlen)
308 return sizeof(struct iphdr) + tmp;
309 iph->ihl += opt->optlen>>2;
310 ip_options_build(skb, opt, final_daddr, (*dev)->pa_addr, 0);
311 return iph->ihl*4 + tmp;
312 }
313
314
315 /*
316 * Generate a checksum for an outgoing IP datagram.
317 */
318
319 void ip_send_check(struct iphdr *iph)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
320 {
321 iph->check = 0;
322 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
323 }
324
325 /*
326 * Queues a packet to be sent, and starts the transmitter
327 * if necessary. if free = 1 then we free the block after
328 * transmit, otherwise we don't. If free==2 we not only
329 * free the block but also don't assign a new ip seq number.
330 * This routine also needs to put in the total length,
331 * and compute the checksum
332 */
333
334 void ip_queue_xmit(struct sock *sk, struct device *dev,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
335 struct sk_buff *skb, int free)
336 {
337 struct iphdr *iph;
338 /* unsigned char *ptr;*/
339
340 /* Sanity check */
341 if (dev == NULL)
342 {
343 NETDEBUG(printk("IP: ip_queue_xmit dev = NULL\n"));
344 return;
345 }
346
347 IS_SKB(skb);
348
349 /*
350 * Do some book-keeping in the packet for later
351 */
352
353
354 skb->dev = dev;
355 skb->when = jiffies;
356
357 /*
358 * Find the IP header and set the length. This is bad
359 * but once we get the skb data handling code in the
360 * hardware will push its header sensibly and we will
361 * set skb->ip_hdr to avoid this mess and the fixed
362 * header length problem
363 */
364
365 iph = skb->ip_hdr;
366 iph->tot_len = ntohs(skb->len-(((unsigned char *)iph)-skb->data));
367
368 #ifdef CONFIG_FIREWALL
369 if(call_out_firewall(PF_INET, skb, iph) < FW_ACCEPT)
370 /* just don't send this packet */
371 return;
372 #endif
373
374 /*
375 * No reassigning numbers to fragments...
376 */
377
378 if(free!=2)
379 iph->id = htons(ip_id_count++);
380 else
381 free=1;
382
383 /* All buffers without an owner socket get freed */
384 if (sk == NULL)
385 free = 1;
386
387 skb->free = free;
388
389 /*
390 * Do we need to fragment. Again this is inefficient.
391 * We need to somehow lock the original buffer and use
392 * bits of it.
393 */
394
395 if(ntohs(iph->tot_len)> dev->mtu)
396 {
397 ip_fragment(sk,skb,dev,0);
398 IS_SKB(skb);
399 kfree_skb(skb,FREE_WRITE);
400 return;
401 }
402
403 /*
404 * Add an IP checksum
405 */
406
407 ip_send_check(iph);
408
409 /*
410 * Print the frame when debugging
411 */
412
413 /*
414 * More debugging. You cannot queue a packet already on a list
415 * Spot this and moan loudly.
416 */
417 if (skb->next != NULL)
418 {
419 NETDEBUG(printk("ip_queue_xmit: next != NULL\n"));
420 skb_unlink(skb);
421 }
422
423 /*
424 * If a sender wishes the packet to remain unfreed
425 * we add it to his send queue. This arguably belongs
426 * in the TCP level since nobody else uses it. BUT
427 * remember IPng might change all the rules.
428 */
429
430 if (!free)
431 {
432 unsigned long flags;
433 /* The socket now has more outstanding blocks */
434
435 sk->packets_out++;
436
437 /* Protect the list for a moment */
438 save_flags(flags);
439 cli();
440
441 if (skb->link3 != NULL)
442 {
443 NETDEBUG(printk("ip.c: link3 != NULL\n"));
444 skb->link3 = NULL;
445 }
446 if (sk->send_head == NULL)
447 {
448 sk->send_tail = skb;
449 sk->send_head = skb;
450 }
451 else
452 {
453 sk->send_tail->link3 = skb;
454 sk->send_tail = skb;
455 }
456 /* skb->link3 is NULL */
457
458 /* Interrupt restore */
459 restore_flags(flags);
460 }
461 else
462 /* Remember who owns the buffer */
463 skb->sk = sk;
464
465 /*
466 * If the indicated interface is up and running, send the packet.
467 */
468
469 ip_statistics.IpOutRequests++;
470 #ifdef CONFIG_IP_ACCT
471 ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
472 #endif
473
474 #ifdef CONFIG_IP_MULTICAST
475
476 /*
477 * Multicasts are looped back for other local users
478 */
479
480 if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK))
481 {
482 if(sk==NULL || sk->ip_mc_loop)
483 {
484 if(iph->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
485 {
486 ip_loopback(dev,skb);
487 }
488 else
489 {
490 struct ip_mc_list *imc=dev->ip_mc_list;
491 while(imc!=NULL)
492 {
493 if(imc->multiaddr==iph->daddr)
494 {
495 ip_loopback(dev,skb);
496 break;
497 }
498 imc=imc->next;
499 }
500 }
501 }
502 /* Multicasts with ttl 0 must not go beyond the host */
503
504 if(skb->ip_hdr->ttl==0)
505 {
506 kfree_skb(skb, FREE_READ);
507 return;
508 }
509 }
510 #endif
511 if((dev->flags&IFF_BROADCAST) && (iph->daddr==dev->pa_brdaddr||iph->daddr==0xFFFFFFFF) && !(dev->flags&IFF_LOOPBACK))
512 ip_loopback(dev,skb);
513
514 if (dev->flags & IFF_UP)
515 {
516 /*
517 * If we have an owner use its priority setting,
518 * otherwise use NORMAL
519 */
520
521 if (sk != NULL)
522 {
523 dev_queue_xmit(skb, dev, sk->priority);
524 }
525 else
526 {
527 dev_queue_xmit(skb, dev, SOPRI_NORMAL);
528 }
529 }
530 else
531 {
532 if(sk)
533 sk->err = ENETDOWN;
534 ip_statistics.IpOutDiscards++;
535 if (free)
536 kfree_skb(skb, FREE_WRITE);
537 }
538 }
539
540
541 /*
542 * Build and send a packet, with as little as one copy
543 *
544 * Doesn't care much about ip options... option length can be
545 * different for fragment at 0 and other fragments.
546 *
547 * Note that the fragment at the highest offset is sent first,
548 * so the getfrag routine can fill in the TCP/UDP checksum header
549 * field in the last fragment it sends... actually it also helps
550 * the reassemblers, they can put most packets in at the head of
551 * the fragment queue, and they know the total size in advance. This
552 * last feature will measurable improve the Linux fragment handler.
553 *
554 * The callback has five args, an arbitrary pointer (copy of frag),
555 * the source IP address (may depend on the routing table), the
556 * destination adddress (char *), the offset to copy from, and the
557 * length to be copied.
558 *
559 */
560
561 int ip_build_xmit(struct sock *sk,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
562 void getfrag (const void *,
563 __u32,
564 char *,
565 unsigned int,
566 unsigned int),
567 const void *frag,
568 unsigned short int length,
569 __u32 daddr,
570 __u32 user_saddr,
571 struct options * opt,
572 int flags,
573 int type,
574 int noblock)
575 {
576 struct rtable *rt;
577 unsigned int fraglen, maxfraglen, fragheaderlen;
578 int offset, mf;
579 __u32 saddr;
580 unsigned short id;
581 struct iphdr *iph;
582 __u32 raddr;
583 struct device *dev = NULL;
584 struct hh_cache * hh=NULL;
585 int nfrags=0;
586 __u32 true_daddr = daddr;
587
588 if (opt && opt->srr && !sk->ip_hdrincl)
589 daddr = opt->faddr;
590
591 ip_statistics.IpOutRequests++;
592
593 #ifdef CONFIG_IP_MULTICAST
594 if(sk && MULTICAST(daddr) && *sk->ip_mc_name)
595 {
596 dev=dev_get(sk->ip_mc_name);
597 if(!dev)
598 return -ENODEV;
599 rt=NULL;
600 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
601 saddr = sk->saddr;
602 else
603 saddr = dev->pa_addr;
604 }
605 else
606 {
607 #endif
608 rt = ip_check_route(&sk->ip_route_cache, daddr,
609 sk->localroute || (flags&MSG_DONTROUTE) ||
610 (opt && opt->is_strictroute));
611 if (rt == NULL)
612 {
613 ip_statistics.IpOutNoRoutes++;
614 return(-ENETUNREACH);
615 }
616 saddr = rt->rt_src;
617
618 hh = rt->rt_hh;
619
620 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
621 saddr = sk->saddr;
622
623 dev=rt->rt_dev;
624 #ifdef CONFIG_IP_MULTICAST
625 }
626 if (rt && !dev)
627 dev = rt->rt_dev;
628 #endif
629 if (user_saddr)
630 saddr = user_saddr;
631
632 raddr = rt ? rt->rt_gateway : daddr;
633 /*
634 * Now compute the buffer space we require
635 */
636
637 /*
638 * Try the simple case first. This leaves broadcast, multicast, fragmented frames, and by
639 * choice RAW frames within 20 bytes of maximum size(rare) to the long path
640 */
641
642 length += sizeof(struct iphdr);
643 if (!sk->ip_hdrincl && opt)
644 length += opt->optlen;
645
646 if(length <= dev->mtu && !MULTICAST(daddr) && daddr!=0xFFFFFFFF && daddr!=dev->pa_brdaddr)
647 {
648 int error;
649 struct sk_buff *skb=sock_alloc_send_skb(sk, length+15+dev->hard_header_len,0, noblock, &error);
650 if(skb==NULL)
651 {
652 ip_statistics.IpOutDiscards++;
653 return error;
654 }
655 skb->dev=dev;
656 skb->protocol = htons(ETH_P_IP);
657 skb->free=1;
658 skb->when=jiffies;
659 skb->sk=sk;
660 skb->arp=0;
661 skb->saddr=saddr;
662 skb->raddr = raddr;
663 skb_reserve(skb,(dev->hard_header_len+15)&~15);
664 if (hh)
665 {
666 skb->arp=1;
667 memcpy(skb_push(skb,dev->hard_header_len),hh->hh_data,dev->hard_header_len);
668 if (!hh->hh_uptodate)
669 {
670 skb->arp = 0;
671 #if RT_CACHE_DEBUG >= 2
672 printk("ip_build_xmit: hh miss %08x via %08x\n", rt->rt_dst, rt->rt_gateway);
673 #endif
674 }
675 }
676 else if(dev->hard_header)
677 {
678 if(dev->hard_header(skb,dev,ETH_P_IP,NULL,NULL,0)>0)
679 skb->arp=1;
680 }
681 else
682 skb->arp=1;
683 skb->ip_hdr=iph=(struct iphdr *)skb_put(skb,length);
684 dev_lock_list();
685 if(!sk->ip_hdrincl)
686 {
687 iph->version=4;
688 iph->ihl=5;
689 iph->tos=sk->ip_tos;
690 iph->tot_len = htons(length);
691 iph->id=htons(ip_id_count++);
692 iph->frag_off = 0;
693 iph->ttl=sk->ip_ttl;
694 iph->protocol=type;
695 iph->saddr=saddr;
696 iph->daddr=daddr;
697 if (opt)
698 {
699 iph->ihl += opt->optlen>>2;
700 ip_options_build(skb, opt,
701 true_daddr, dev->pa_addr, 0);
702 }
703 iph->check=0;
704 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
705 getfrag(frag,saddr,((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
706 }
707 else
708 getfrag(frag,saddr,(void *)iph,0,length-20);
709 dev_unlock_list();
710 #ifdef CONFIG_FIREWALL
711 if(call_out_firewall(PF_INET, skb, iph)< FW_ACCEPT)
712 {
713 kfree_skb(skb, FREE_WRITE);
714 return -EPERM;
715 }
716 #endif
717 #ifdef CONFIG_IP_ACCT
718 ip_fw_chk(iph,dev,ip_acct_chain, IP_FW_F_ACCEPT,1);
719 #endif
720 if(dev->flags&IFF_UP)
721 dev_queue_xmit(skb,dev,sk->priority);
722 else
723 {
724 ip_statistics.IpOutDiscards++;
725 kfree_skb(skb, FREE_WRITE);
726 }
727 return 0;
728 }
729 length -= sizeof(struct iphdr);
730 if (sk && !sk->ip_hdrincl && opt)
731 {
732 length -= opt->optlen;
733 fragheaderlen = dev->hard_header_len + sizeof(struct iphdr) + opt->optlen;
734 maxfraglen = ((dev->mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
735 }
736 else
737 {
738 fragheaderlen = dev->hard_header_len;
739 if(!sk->ip_hdrincl)
740 fragheaderlen += 20;
741
742 /*
743 * Fragheaderlen is the size of 'overhead' on each buffer. Now work
744 * out the size of the frames to send.
745 */
746
747 maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen;
748 }
749
750 /*
751 * Start at the end of the frame by handling the remainder.
752 */
753
754 offset = length - (length % (maxfraglen - fragheaderlen));
755
756 /*
757 * Amount of memory to allocate for final fragment.
758 */
759
760 fraglen = length - offset + fragheaderlen;
761
762 if(length-offset==0)
763 {
764 fraglen = maxfraglen;
765 offset -= maxfraglen-fragheaderlen;
766 }
767
768
769 /*
770 * The last fragment will not have MF (more fragments) set.
771 */
772
773 mf = 0;
774
775 /*
776 * Can't fragment raw packets
777 */
778
779 if (sk->ip_hdrincl && offset > 0)
780 return(-EMSGSIZE);
781
782 /*
783 * Lock the device lists.
784 */
785
786 dev_lock_list();
787
788 /*
789 * Get an identifier
790 */
791
792 id = htons(ip_id_count++);
793
794 /*
795 * Being outputting the bytes.
796 */
797
798 do
799 {
800 struct sk_buff * skb;
801 int error;
802 char *data;
803
804 /*
805 * Get the memory we require with some space left for alignment.
806 */
807
808 skb = sock_alloc_send_skb(sk, fraglen+15, 0, noblock, &error);
809 if (skb == NULL)
810 {
811 ip_statistics.IpOutDiscards++;
812 if(nfrags>1)
813 ip_statistics.IpFragCreates++;
814 dev_unlock_list();
815 return(error);
816 }
817
818 /*
819 * Fill in the control structures
820 */
821
822 skb->dev = dev;
823 skb->protocol = htons(ETH_P_IP);
824 skb->when = jiffies;
825 skb->free = 1; /* dubious, this one */
826 skb->sk = sk;
827 skb->arp = 0;
828 skb->saddr = saddr;
829 skb->raddr = raddr;
830 skb_reserve(skb,(dev->hard_header_len+15)&~15);
831 data = skb_put(skb, fraglen-dev->hard_header_len);
832
833 /*
834 * Save us ARP and stuff. In the optimal case we do no route lookup (route cache ok)
835 * no ARP lookup (arp cache ok) and output. The cache checks are still too slow but
836 * this can be fixed later. For gateway routes we ought to have a rt->.. header cache
837 * pointer to speed header cache builds for identical targets.
838 */
839
840 if (hh)
841 {
842 skb->arp=1;
843 memcpy(skb_push(skb,dev->hard_header_len),hh->hh_data,dev->hard_header_len);
844 if (!hh->hh_uptodate)
845 {
846 skb->arp = 0;
847 #if RT_CACHE_DEBUG >= 2
848 printk("ip_build_xmit: hh miss %08x via %08x\n", rt->rt_dst, rt->rt_gateway);
849 #endif
850 }
851 }
852 else if (dev->hard_header)
853 {
854 if(dev->hard_header(skb, dev, ETH_P_IP,
855 NULL, NULL, 0)>0)
856 skb->arp=1;
857 }
858
859 /*
860 * Find where to start putting bytes.
861 */
862
863 skb->ip_hdr = iph = (struct iphdr *)data;
864
865 /*
866 * Only write IP header onto non-raw packets
867 */
868
869 if(!sk->ip_hdrincl)
870 {
871
872 iph->version = 4;
873 iph->ihl = 5; /* ugh */
874 if (opt) {
875 iph->ihl += opt->optlen>>2;
876 ip_options_build(skb, opt,
877 true_daddr, dev->pa_addr, offset);
878 }
879 iph->tos = sk->ip_tos;
880 iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
881 iph->id = id;
882 iph->frag_off = htons(offset>>3);
883 iph->frag_off |= mf;
884 #ifdef CONFIG_IP_MULTICAST
885 if (MULTICAST(daddr))
886 iph->ttl = sk->ip_mc_ttl;
887 else
888 #endif
889 iph->ttl = sk->ip_ttl;
890 iph->protocol = type;
891 iph->check = 0;
892 iph->saddr = saddr;
893 iph->daddr = daddr;
894 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
895 data += iph->ihl*4;
896
897 /*
898 * Any further fragments will have MF set.
899 */
900
901 mf = htons(IP_MF);
902 }
903
904 /*
905 * User data callback
906 */
907
908 getfrag(frag, saddr, data, offset, fraglen-fragheaderlen);
909
910 /*
911 * Account for the fragment.
912 */
913
914 #ifdef CONFIG_FIREWALL
915 if(!offset && call_out_firewall(PF_INET, skb, iph) < FW_ACCEPT)
916 {
917 kfree_skb(skb, FREE_WRITE);
918 dev_unlock_list();
919 return -EPERM;
920 }
921 #endif
922 #ifdef CONFIG_IP_ACCT
923 if(!offset)
924 ip_fw_chk(iph, dev, ip_acct_chain, IP_FW_F_ACCEPT, 1);
925 #endif
926 offset -= (maxfraglen-fragheaderlen);
927 fraglen = maxfraglen;
928
929 #ifdef CONFIG_IP_MULTICAST
930
931 /*
932 * Multicasts are looped back for other local users
933 */
934
935 if (MULTICAST(daddr) && !(dev->flags&IFF_LOOPBACK))
936 {
937 /*
938 * Loop back any frames. The check for IGMP_ALL_HOSTS is because
939 * you are always magically a member of this group.
940 *
941 * Always loop back all host messages when running as a multicast router.
942 */
943
944 if(sk==NULL || sk->ip_mc_loop)
945 {
946 if(skb->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
947 ip_loopback(dev,skb);
948 else
949 {
950 struct ip_mc_list *imc=dev->ip_mc_list;
951 while(imc!=NULL)
952 {
953 if(imc->multiaddr==daddr)
954 {
955 ip_loopback(dev,skb);
956 break;
957 }
958 imc=imc->next;
959 }
960 }
961 }
962
963 /*
964 * Multicasts with ttl 0 must not go beyond the host. Fixme: avoid the
965 * extra clone.
966 */
967
968 if(skb->ip_hdr->ttl==0)
969 kfree_skb(skb, FREE_READ);
970 }
971 #endif
972
973 nfrags++;
974
975 /*
976 * BSD loops broadcasts
977 */
978
979 if((dev->flags&IFF_BROADCAST) && (daddr==0xFFFFFFFF || daddr==dev->pa_brdaddr) && !(dev->flags&IFF_LOOPBACK))
980 ip_loopback(dev,skb);
981
982 /*
983 * Now queue the bytes into the device.
984 */
985
986 if (dev->flags & IFF_UP)
987 {
988 dev_queue_xmit(skb, dev, sk->priority);
989 }
990 else
991 {
992 /*
993 * Whoops...
994 */
995
996 ip_statistics.IpOutDiscards++;
997 if(nfrags>1)
998 ip_statistics.IpFragCreates+=nfrags;
999 kfree_skb(skb, FREE_WRITE);
1000 dev_unlock_list();
1001 /*
1002 * BSD behaviour.
1003 */
1004 if(sk!=NULL)
1005 sk->err=ENETDOWN;
1006 return(0); /* lose rest of fragments */
1007 }
1008 }
1009 while (offset >= 0);
1010 if(nfrags>1)
1011 ip_statistics.IpFragCreates+=nfrags;
1012 dev_unlock_list();
1013 return(0);
1014 }
1015
1016
1017 /*
1018 * IP protocol layer initialiser
1019 */
1020
1021 static struct packet_type ip_packet_type =
1022 {
1023 0, /* MUTTER ntohs(ETH_P_IP),*/
1024 NULL, /* All devices */
1025 ip_rcv,
1026 NULL,
1027 NULL,
1028 };
1029
1030 #ifdef CONFIG_RTNETLINK
1031
1032 /*
1033 * Netlink hooks for IP
1034 */
1035
1036 void ip_netlink_msg(unsigned long msg, __u32 daddr, __u32 gw, __u32 mask, short flags, short metric, char *name)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1037 {
1038 struct sk_buff *skb=alloc_skb(sizeof(struct netlink_rtinfo), GFP_ATOMIC);
1039 struct netlink_rtinfo *nrt;
1040 struct sockaddr_in *s;
1041 if(skb==NULL)
1042 return;
1043 skb->free=1;
1044 nrt=(struct netlink_rtinfo *)skb_put(skb, sizeof(struct netlink_rtinfo));
1045 nrt->rtmsg_type=msg;
1046 s=(struct sockaddr_in *)&nrt->rtmsg_dst;
1047 s->sin_family=AF_INET;
1048 s->sin_addr.s_addr=daddr;
1049 s=(struct sockaddr_in *)&nrt->rtmsg_gateway;
1050 s->sin_family=AF_INET;
1051 s->sin_addr.s_addr=gw;
1052 s=(struct sockaddr_in *)&nrt->rtmsg_genmask;
1053 s->sin_family=AF_INET;
1054 s->sin_addr.s_addr=mask;
1055 nrt->rtmsg_flags=flags;
1056 nrt->rtmsg_metric=metric;
1057 strcpy(nrt->rtmsg_device,name);
1058 netlink_post(NETLINK_ROUTE, skb);
1059 }
1060
1061 #endif
1062
1063 /*
1064 * Device notifier
1065 */
1066
1067 static int ip_rt_event(struct notifier_block *this, unsigned long event, void *ptr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1068 {
1069 struct device *dev=ptr;
1070 if(event==NETDEV_DOWN)
1071 {
1072 ip_netlink_msg(RTMSG_DELDEVICE, 0,0,0,0,0,dev->name);
1073 ip_rt_flush(dev);
1074 }
1075 /*
1076 * Join the intial group if multicast.
1077 */
1078 if(event==NETDEV_UP)
1079 {
1080 #ifdef CONFIG_IP_MULTICAST
1081 ip_mc_allhost(dev);
1082 #endif
1083 ip_netlink_msg(RTMSG_NEWDEVICE, 0,0,0,0,0,dev->name);
1084 }
1085 return NOTIFY_DONE;
1086 }
1087
1088 struct notifier_block ip_rt_notifier={
1089 ip_rt_event,
1090 NULL,
1091 0
1092 };
1093
1094 /*
1095 * IP registers the packet type and then calls the subprotocol initialisers
1096 */
1097
1098 void ip_init(void)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1099 {
1100 ip_packet_type.type=htons(ETH_P_IP);
1101 dev_add_pack(&ip_packet_type);
1102
1103 /* So we flush routes when a device is downed */
1104 register_netdevice_notifier(&ip_rt_notifier);
1105
1106 /* ip_raw_init();
1107 ip_packet_init();
1108 ip_tcp_init();
1109 ip_udp_init();*/
1110
1111 #ifdef CONFIG_IP_MULTICAST
1112 proc_net_register(&(struct proc_dir_entry) {
1113 PROC_NET_IGMP, 4, "igmp",
1114 S_IFREG | S_IRUGO, 1, 0, 0,
1115 0, &proc_net_inode_operations,
1116 ip_mc_procinfo
1117 });
1118 #endif
1119 }
1120