1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * The Internet Protocol (IP) module. 7 * 8 * Version: @(#)ip.c 1.0.16b 9/1/93 9 * 10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu> 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Donald Becker, <becker@super.org> 13 * Alan Cox, <gw4pts@gw4pts.ampr.org> 14 * Richard Underwood 15 * Stefan Becker, <stefanb@yello.ping.de> 16 * Jorge Cwik, <jorge@laser.satlink.net> 17 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 18 * 19 * 20 * Fixes: 21 * Alan Cox : Commented a couple of minor bits of surplus code 22 * Alan Cox : Undefining IP_FORWARD doesn't include the code 23 * (just stops a compiler warning). 24 * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes 25 * are junked rather than corrupting things. 26 * Alan Cox : Frames to bad broadcast subnets are dumped 27 * We used to process them non broadcast and 28 * boy could that cause havoc. 29 * Alan Cox : ip_forward sets the free flag on the 30 * new frame it queues. Still crap because 31 * it copies the frame but at least it 32 * doesn't eat memory too. 33 * Alan Cox : Generic queue code and memory fixes. 34 * Fred Van Kempen : IP fragment support (borrowed from NET2E) 35 * Gerhard Koerting: Forward fragmented frames correctly. 36 * Gerhard Koerting: Fixes to my fix of the above 8-). 37 * Gerhard Koerting: IP interface addressing fix. 38 * Linus Torvalds : More robustness checks 39 * Alan Cox : Even more checks: Still not as robust as it ought to be 40 * Alan Cox : Save IP header pointer for later 41 * Alan Cox : ip option setting 42 * Alan Cox : Use ip_tos/ip_ttl settings 43 * Alan Cox : Fragmentation bogosity removed 44 * (Thanks to Mark.Bush@prg.ox.ac.uk) 45 * Dmitry Gorodchanin : Send of a raw packet crash fix. 46 * Alan Cox : Silly ip bug when an overlength 47 * fragment turns up. Now frees the 48 * queue. 49 * Linus Torvalds/ : Memory leakage on fragmentation 50 * Alan Cox : handling. 51 * Gerhard Koerting: Forwarding uses IP priority hints 52 * Teemu Rantanen : Fragment problems. 53 * Alan Cox : General cleanup, comments and reformat 54 * Alan Cox : SNMP statistics 55 * Alan Cox : BSD address rule semantics. Also see 56 * UDP as there is a nasty checksum issue 57 * if you do things the wrong way. 58 * Alan Cox : Always defrag, moved IP_FORWARD to the config.in file 59 * Alan Cox : IP options adjust sk->priority. 60 * Pedro Roque : Fix mtu/length error in ip_forward. 61 * Alan Cox : Avoid ip_chk_addr when possible. 62 * Richard Underwood : IP multicasting. 63 * Alan Cox : Cleaned up multicast handlers. 64 * Alan Cox : RAW sockets demultiplex in the BSD style. 65 * Gunther Mayer : Fix the SNMP reporting typo 66 * Alan Cox : Always in group 224.0.0.1 67 * Pauline Middelink : Fast ip_checksum update when forwarding 68 * Masquerading support. 69 * Alan Cox : Multicast loopback error for 224.0.0.1 70 * Alan Cox : IP_MULTICAST_LOOP option. 71 * Alan Cox : Use notifiers. 72 * Bjorn Ekwall : Removed ip_csum (from slhc.c too) 73 * Bjorn Ekwall : Moved ip_fast_csum to ip.h (inline!) 74 * Stefan Becker : Send out ICMP HOST REDIRECT 75 * Arnt Gulbrandsen : ip_build_xmit 76 * Alan Cox : Per socket routing cache 77 * Alan Cox : Fixed routing cache, added header cache. 78 * Alan Cox : Loopback didnt work right in original ip_build_xmit - fixed it. 79 * Alan Cox : Only send ICMP_REDIRECT if src/dest are the same net. 80 * Alan Cox : Incoming IP option handling. 81 * Alan Cox : Set saddr on raw output frames as per BSD. 82 * Alan Cox : Stopped broadcast source route explosions. 83 * Alan Cox : Can disable source routing 84 * 85 * 86 * 87 * To Fix: 88 * IP option processing is mostly not needed. ip_forward needs to know about routing rules 89 * and time stamp but that's about all. Use the route mtu field here too 90 * IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient 91 * and could be made very efficient with the addition of some virtual memory hacks to permit 92 * the allocation of a buffer that can then be 'grown' by twiddling page tables. 93 * Output fragmentation wants updating along with the buffer management to use a single 94 * interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet 95 * output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause 96 * fragmentation anyway. 97 * 98 * FIXME: copy frag 0 iph to qp->iph 99 * 100 * This program is free software; you can redistribute it and/or 101 * modify it under the terms of the GNU General Public License 102 * as published by the Free Software Foundation; either version 103 * 2 of the License, or (at your option) any later version. 104 */ 105
106 #include <asm/segment.h>
107 #include <asm/system.h>
108 #include <linux/types.h>
109 #include <linux/kernel.h>
110 #include <linux/sched.h>
111 #include <linux/mm.h>
112 #include <linux/string.h>
113 #include <linux/errno.h>
114 #include <linux/config.h>
115
116 #include <linux/socket.h>
117 #include <linux/sockios.h>
118 #include <linux/in.h>
119 #include <linux/inet.h>
120 #include <linux/netdevice.h>
121 #include <linux/etherdevice.h>
122
123 #include <net/snmp.h>
124 #include <net/ip.h>
125 #include <net/protocol.h>
126 #include <net/route.h>
127 #include <net/tcp.h>
128 #include <net/udp.h>
129 #include <linux/skbuff.h>
130 #include <net/sock.h>
131 #include <net/arp.h>
132 #include <net/icmp.h>
133 #include <net/raw.h>
134 #include <net/checksum.h>
135 #include <linux/igmp.h>
136 #include <linux/ip_fw.h>
137
138 #define CONFIG_IP_DEFRAG
139
140 externintlast_retran;
141 externvoid sort_send(structsock *sk);
142
143 #definemin(a,b) ((a)<(b)?(a):(b))
144 #defineLOOPBACK(x) (((x) & htonl(0xff000000)) == htonl(0x7f000000))
145
146 /* 147 * SNMP management statistics 148 */ 149
150 #ifdefCONFIG_IP_FORWARD 151 structip_mibip_statistics={1,64,}; /* Forwarding=Yes, Default TTL=64 */ 152 #else 153 structip_mibip_statistics={0,64,}; /* Forwarding=No, Default TTL=64 */ 154 #endif 155
156 /* 157 * Handle the issuing of an ioctl() request 158 * for the ip device. This is scheduled to 159 * disappear 160 */ 161
162 intip_ioctl(structsock *sk, intcmd, unsignedlongarg)
/* */ 163 { 164 switch(cmd)
165 { 166 default:
167 return(-EINVAL);
168 } 169 } 170
171
172 /* 173 * Take an skb, and fill in the MAC header. 174 */ 175
176 staticintip_send(structsk_buff *skb, unsignedlongdaddr, intlen, structdevice *dev, unsignedlongsaddr)
/* */ 177 { 178 intmac = 0;
179
180 skb->dev = dev;
181 skb->arp = 1;
182 if (dev->hard_header)
183 { 184 /* 185 * Build a hardware header. Source address is our mac, destination unknown 186 * (rebuild header will sort this out) 187 */ 188 skb_reserve(skb,(dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */ 189 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
190 if (mac < 0)
191 { 192 mac = -mac;
193 skb->arp = 0;
194 skb->raddr = daddr; /* next routing address */ 195 } 196 } 197 returnmac;
198 } 199
200 intip_id_count = 0;
201
202 /* 203 * This routine builds the appropriate hardware/IP headers for 204 * the routine. It assumes that if *dev != NULL then the 205 * protocol knows what it's doing, otherwise it uses the 206 * routing/ARP tables to select a device struct. 207 */ 208 intip_build_header(structsk_buff *skb, unsignedlongsaddr, unsignedlongdaddr,
/* */ 209 structdevice **dev, inttype, structoptions *opt, intlen, inttos, intttl)
210 { 211 structrtable *rt;
212 unsignedlongraddr;
213 inttmp;
214 unsignedlongsrc;
215 structiphdr *iph;
216
217 /* 218 * See if we need to look up the device. 219 */ 220
221 #ifdefCONFIG_INET_MULTICAST 222 if(MULTICAST(daddr) && *dev==NULL && skb->sk && *skb->sk->ip_mc_name)
223 *dev=dev_get(skb->sk->ip_mc_name);
224 #endif 225 if (*dev == NULL)
226 { 227 if(skb->localroute)
228 rt = ip_rt_local(daddr, NULL, &src);
229 else 230 rt = ip_rt_route(daddr, NULL, &src);
231 if (rt == NULL)
232 { 233 ip_statistics.IpOutNoRoutes++;
234 return(-ENETUNREACH);
235 } 236
237 *dev = rt->rt_dev;
238 /* 239 * If the frame is from us and going off machine it MUST MUST MUST 240 * have the output device ip address and never the loopback 241 */ 242 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
243 saddr = src;/*rt->rt_dev->pa_addr;*/ 244 raddr = rt->rt_gateway;
245
246 } 247 else 248 { 249 /* 250 * We still need the address of the first hop. 251 */ 252 if(skb->localroute)
253 rt = ip_rt_local(daddr, NULL, &src);
254 else 255 rt = ip_rt_route(daddr, NULL, &src);
256 /* 257 * If the frame is from us and going off machine it MUST MUST MUST 258 * have the output device ip address and never the loopback 259 */ 260 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
261 saddr = src;/*rt->rt_dev->pa_addr;*/ 262
263 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
264 } 265
266 /* 267 * No source addr so make it our addr 268 */ 269 if (saddr == 0)
270 saddr = src;
271
272 /* 273 * No gateway so aim at the real destination 274 */ 275 if (raddr == 0)
276 raddr = daddr;
277
278 /* 279 * Now build the MAC header. 280 */ 281
282 tmp = ip_send(skb, raddr, len, *dev, saddr);
283
284 /* 285 * Book keeping 286 */ 287
288 skb->dev = *dev;
289 skb->saddr = saddr;
290 if (skb->sk)
291 skb->sk->saddr = saddr;
292
293 /* 294 * Now build the IP header. 295 */ 296
297 /* 298 * If we are using IPPROTO_RAW, then we don't need an IP header, since 299 * one is being supplied to us by the user 300 */ 301
302 if(type == IPPROTO_RAW)
303 return (tmp);
304
305 /* 306 * Build the IP addresses 307 */ 308
309 iph=(structiphdr *)skb_put(skb,sizeof(structiphdr));
310
311 iph->version = 4;
312 iph->ihl = 5;
313 iph->tos = tos;
314 iph->frag_off = 0;
315 iph->ttl = ttl;
316 iph->daddr = daddr;
317 iph->saddr = saddr;
318 iph->protocol = type;
319 skb->ip_hdr = iph;
320
321 return(20 + tmp); /* IP header plus MAC header size */ 322 } 323
324
325 /* 326 * Generate a checksum for an outgoing IP datagram. 327 */ 328
329 voidip_send_check(structiphdr *iph)
/* */ 330 { 331 iph->check = 0;
332 iph->check = ip_fast_csum((unsignedchar *)iph, iph->ihl);
333 } 334
335 /************************ Fragment Handlers From NET2E **********************************/ 336
337
338 /* 339 * This fragment handler is a bit of a heap. On the other hand it works quite 340 * happily and handles things quite well. 341 */ 342
343 staticstructipq *ipqueue = NULL; /* IP fragment queue */ 344
345 /* 346 * Create a new fragment entry. 347 */ 348
349 staticstructipfrag *ip_frag_create(intoffset, intend, structsk_buff *skb, unsignedchar *ptr)
/* */ 350 { 351 structipfrag *fp;
352
353 fp = (structipfrag *) kmalloc(sizeof(structipfrag), GFP_ATOMIC);
354 if (fp == NULL)
355 { 356 NETDEBUG(printk("IP: frag_create: no memory left !\n"));
357 return(NULL);
358 } 359 memset(fp, 0, sizeof(structipfrag));
360
361 /* Fill in the structure. */ 362 fp->offset = offset;
363 fp->end = end;
364 fp->len = end - offset;
365 fp->skb = skb;
366 fp->ptr = ptr;
367
368 return(fp);
369 } 370
371
372 /* 373 * Find the correct entry in the "incomplete datagrams" queue for 374 * this IP datagram, and return the queue entry address if found. 375 */ 376
377 staticstructipq *ip_find(structiphdr *iph)
/* */ 378 { 379 structipq *qp;
380 structipq *qplast;
381
382 cli();
383 qplast = NULL;
384 for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
385 { 386 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
387 iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
388 { 389 del_timer(&qp->timer); /* So it doesn't vanish on us. The timer will be reset anyway */ 390 sti();
391 return(qp);
392 } 393 } 394 sti();
395 return(NULL);
396 } 397
398
399 /* 400 * Remove an entry from the "incomplete datagrams" queue, either 401 * because we completed, reassembled and processed it, or because 402 * it timed out. 403 */ 404
405 staticvoidip_free(structipq *qp)
/* */ 406 { 407 structipfrag *fp;
408 structipfrag *xp;
409
410 /* 411 * Stop the timer for this entry. 412 */ 413
414 del_timer(&qp->timer);
415
416 /* Remove this entry from the "incomplete datagrams" queue. */ 417 cli();
418 if (qp->prev == NULL)
419 { 420 ipqueue = qp->next;
421 if (ipqueue != NULL)
422 ipqueue->prev = NULL;
423 } 424 else 425 { 426 qp->prev->next = qp->next;
427 if (qp->next != NULL)
428 qp->next->prev = qp->prev;
429 } 430
431 /* Release all fragment data. */ 432
433 fp = qp->fragments;
434 while (fp != NULL)
435 { 436 xp = fp->next;
437 IS_SKB(fp->skb);
438 kfree_skb(fp->skb,FREE_READ);
439 kfree_s(fp, sizeof(structipfrag));
440 fp = xp;
441 } 442
443 /* Release the IP header. */ 444 kfree_s(qp->iph, 64 + 8);
445
446 /* Finally, release the queue descriptor itself. */ 447 kfree_s(qp, sizeof(structipq));
448 sti();
449 } 450
451
452 /* 453 * Oops- a fragment queue timed out. Kill it and send an ICMP reply. 454 */ 455
456 staticvoidip_expire(unsignedlongarg)
/* */ 457 { 458 structipq *qp;
459
460 qp = (structipq *)arg;
461
462 /* 463 * Send an ICMP "Fragment Reassembly Timeout" message. 464 */ 465
466 ip_statistics.IpReasmTimeout++;
467 ip_statistics.IpReasmFails++;
468 /* This if is always true... shrug */ 469 if(qp->fragments!=NULL)
470 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
471 ICMP_EXC_FRAGTIME, 0, qp->dev);
472
473 /* 474 * Nuke the fragment queue. 475 */ 476 ip_free(qp);
477 } 478
479
480 /* 481 * Add an entry to the 'ipq' queue for a newly received IP datagram. 482 * We will (hopefully :-) receive all other fragments of this datagram 483 * in time, so we just create a queue for this datagram, in which we 484 * will insert the received fragments at their respective positions. 485 */ 486
487 staticstructipq *ip_create(structsk_buff *skb, structiphdr *iph, structdevice *dev)
/* */ 488 { 489 structipq *qp;
490 intihlen;
491
492 qp = (structipq *) kmalloc(sizeof(structipq), GFP_ATOMIC);
493 if (qp == NULL)
494 { 495 NETDEBUG(printk("IP: create: no memory left !\n"));
496 return(NULL);
497 skb->dev = qp->dev;
498 } 499 memset(qp, 0, sizeof(structipq));
500
501 /* 502 * Allocate memory for the IP header (plus 8 octets for ICMP). 503 */ 504
505 ihlen = iph->ihl * 4;
506 qp->iph = (structiphdr *) kmalloc(64 + 8, GFP_ATOMIC);
507 if (qp->iph == NULL)
508 { 509 NETDEBUG(printk("IP: create: no memory left !\n"));
510 kfree_s(qp, sizeof(structipq));
511 return(NULL);
512 } 513
514 memcpy(qp->iph, iph, ihlen + 8);
515 qp->len = 0;
516 qp->ihlen = ihlen;
517 qp->fragments = NULL;
518 qp->dev = dev;
519
520 /* Start a timer for this entry. */ 521 qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */ 522 qp->timer.data = (unsignedlong) qp; /* pointer to queue */ 523 qp->timer.function = ip_expire; /* expire function */ 524 add_timer(&qp->timer);
525
526 /* Add this entry to the queue. */ 527 qp->prev = NULL;
528 cli();
529 qp->next = ipqueue;
530 if (qp->next != NULL)
531 qp->next->prev = qp;
532 ipqueue = qp;
533 sti();
534 return(qp);
535 } 536
537
538 /* 539 * See if a fragment queue is complete. 540 */ 541
542 staticintip_done(structipq *qp)
/* */ 543 { 544 structipfrag *fp;
545 intoffset;
546
547 /* Only possible if we received the final fragment. */ 548 if (qp->len == 0)
549 return(0);
550
551 /* Check all fragment offsets to see if they connect. */ 552 fp = qp->fragments;
553 offset = 0;
554 while (fp != NULL)
555 { 556 if (fp->offset > offset)
557 return(0); /* fragment(s) missing */ 558 offset = fp->end;
559 fp = fp->next;
560 } 561
562 /* All fragments are present. */ 563 return(1);
564 } 565
566
567 /* 568 * Build a new IP datagram from all its fragments. 569 * 570 * FIXME: We copy here because we lack an effective way of handling lists 571 * of bits on input. Until the new skb data handling is in I'm not going 572 * to touch this with a bargepole. 573 */ 574
575 staticstructsk_buff *ip_glue(structipq *qp)
/* */ 576 { 577 structsk_buff *skb;
578 structiphdr *iph;
579 structipfrag *fp;
580 unsignedchar *ptr;
581 intcount, len;
582
583 /* 584 * Allocate a new buffer for the datagram. 585 */ 586 len = qp->ihlen + qp->len;
587
588 if ((skb = dev_alloc_skb(len)) == NULL)
589 { 590 ip_statistics.IpReasmFails++;
591 NETDEBUG(printk("IP: queue_glue: no memory for gluing queue %p\n", qp));
592 ip_free(qp);
593 return(NULL);
594 } 595
596 /* Fill in the basic details. */ 597 skb_put(skb,len);
598 skb->h.raw = skb->data;
599 skb->free = 1;
600
601 /* Copy the original IP headers into the new buffer. */ 602 ptr = (unsignedchar *) skb->h.raw;
603 memcpy(ptr, ((unsignedchar *) qp->iph), qp->ihlen);
604 ptr += qp->ihlen;
605
606 count = 0;
607
608 /* Copy the data portions of all fragments into the new buffer. */ 609 fp = qp->fragments;
610 while(fp != NULL)
611 { 612 if(count+fp->len > skb->len)
613 { 614 NETDEBUG(printk("Invalid fragment list: Fragment over size.\n"));
615 ip_free(qp);
616 kfree_skb(skb,FREE_WRITE);
617 ip_statistics.IpReasmFails++;
618 returnNULL;
619 } 620 memcpy((ptr + fp->offset), fp->ptr, fp->len);
621 count += fp->len;
622 fp = fp->next;
623 } 624
625 /* We glued together all fragments, so remove the queue entry. */ 626 ip_free(qp);
627
628 /* Done with all fragments. Fixup the new IP header. */ 629 iph = skb->h.iph;
630 iph->frag_off = 0;
631 iph->tot_len = htons((iph->ihl * 4) + count);
632 skb->ip_hdr = iph;
633
634 ip_statistics.IpReasmOKs++;
635 return(skb);
636 } 637
638
639 /* 640 * Process an incoming IP datagram fragment. 641 */ 642
643 staticstructsk_buff *ip_defrag(structiphdr *iph, structsk_buff *skb, structdevice *dev)
/* */ 644 { 645 structipfrag *prev, *next, *tmp;
646 structipfrag *tfp;
647 structipq *qp;
648 structsk_buff *skb2;
649 unsignedchar *ptr;
650 intflags, offset;
651 inti, ihl, end;
652
653 ip_statistics.IpReasmReqds++;
654
655 /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */ 656 qp = ip_find(iph);
657
658 /* Is this a non-fragmented datagram? */ 659 offset = ntohs(iph->frag_off);
660 flags = offset & ~IP_OFFSET;
661 offset &= IP_OFFSET;
662 if (((flags & IP_MF) == 0) && (offset == 0))
663 { 664 if (qp != NULL)
665 ip_free(qp); /* Huh? How could this exist?? */ 666 return(skb);
667 } 668
669 offset <<= 3; /* offset is in 8-byte chunks */ 670
671 /* 672 * If the queue already existed, keep restarting its timer as long 673 * as we still are receiving fragments. Otherwise, create a fresh 674 * queue entry. 675 */ 676
677 if (qp != NULL)
678 { 679 del_timer(&qp->timer);
680 qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */ 681 qp->timer.data = (unsignedlong) qp; /* pointer to queue */ 682 qp->timer.function = ip_expire; /* expire function */ 683 add_timer(&qp->timer);
684 } 685 else 686 { 687 /* 688 * If we failed to create it, then discard the frame 689 */ 690 if ((qp = ip_create(skb, iph, dev)) == NULL)
691 { 692 skb->sk = NULL;
693 kfree_skb(skb, FREE_READ);
694 ip_statistics.IpReasmFails++;
695 returnNULL;
696 } 697 } 698
699 /* 700 * Determine the position of this fragment. 701 */ 702
703 ihl = iph->ihl * 4;
704 end = offset + ntohs(iph->tot_len) - ihl;
705
706 /* 707 * Point into the IP datagram 'data' part. 708 */ 709
710 ptr = skb->data + ihl;
711
712 /* 713 * Is this the final fragment? 714 */ 715
716 if ((flags & IP_MF) == 0)
717 qp->len = end;
718
719 /* 720 * Find out which fragments are in front and at the back of us 721 * in the chain of fragments so far. We must know where to put 722 * this fragment, right? 723 */ 724
725 prev = NULL;
726 for(next = qp->fragments; next != NULL; next = next->next)
727 { 728 if (next->offset > offset)
729 break; /* bingo! */ 730 prev = next;
731 } 732
733 /* 734 * We found where to put this one. 735 * Check for overlap with preceding fragment, and, if needed, 736 * align things so that any overlaps are eliminated. 737 */ 738 if (prev != NULL && offset < prev->end)
739 { 740 i = prev->end - offset;
741 offset += i; /* ptr into datagram */ 742 ptr += i; /* ptr into fragment data */ 743 } 744
745 /* 746 * Look for overlap with succeeding segments. 747 * If we can merge fragments, do it. 748 */ 749
750 for(tmp=next; tmp != NULL; tmp = tfp)
751 { 752 tfp = tmp->next;
753 if (tmp->offset >= end)
754 break; /* no overlaps at all */ 755
756 i = end - next->offset; /* overlap is 'i' bytes */ 757 tmp->len -= i; /* so reduce size of */ 758 tmp->offset += i; /* next fragment */ 759 tmp->ptr += i;
760 /* 761 * If we get a frag size of <= 0, remove it and the packet 762 * that it goes with. 763 */ 764 if (tmp->len <= 0)
765 { 766 if (tmp->prev != NULL)
767 tmp->prev->next = tmp->next;
768 else 769 qp->fragments = tmp->next;
770
771 if (tfp->next != NULL)
772 tmp->next->prev = tmp->prev;
773
774 next=tfp; /* We have killed the original next frame */ 775
776 kfree_skb(tmp->skb,FREE_READ);
777 kfree_s(tmp, sizeof(structipfrag));
778 } 779 } 780
781 /* 782 * Insert this fragment in the chain of fragments. 783 */ 784
785 tfp = NULL;
786 tfp = ip_frag_create(offset, end, skb, ptr);
787
788 /* 789 * No memory to save the fragment - so throw the lot 790 */ 791
792 if (!tfp)
793 { 794 skb->sk = NULL;
795 kfree_skb(skb, FREE_READ);
796 returnNULL;
797 } 798 tfp->prev = prev;
799 tfp->next = next;
800 if (prev != NULL)
801 prev->next = tfp;
802 else 803 qp->fragments = tfp;
804
805 if (next != NULL)
806 next->prev = tfp;
807
808 /* 809 * OK, so we inserted this new fragment into the chain. 810 * Check if we now have a full IP datagram which we can 811 * bump up to the IP layer... 812 */ 813
814 if (ip_done(qp))
815 { 816 skb2 = ip_glue(qp); /* glue together the fragments */ 817 return(skb2);
818 } 819 return(NULL);
820 } 821
822
823 /* 824 * This IP datagram is too large to be sent in one piece. Break it up into 825 * smaller pieces (each of size equal to the MAC header plus IP header plus 826 * a block of the data of the original IP data part) that will yet fit in a 827 * single device frame, and queue such a frame for sending by calling the 828 * ip_queue_xmit(). Note that this is recursion, and bad things will happen 829 * if this function causes a loop... 830 * 831 * Yes this is inefficient, feel free to submit a quicker one. 832 * 833 * **Protocol Violation** 834 * We copy all the options to each fragment. !FIXME! 835 */ 836
837 voidip_fragment(structsock *sk, structsk_buff *skb, structdevice *dev, intis_frag)
/* */ 838 { 839 structiphdr *iph;
840 unsignedchar *raw;
841 unsignedchar *ptr;
842 structsk_buff *skb2;
843 intleft, mtu, hlen, len;
844 intoffset;
845 unsignedlongflags;
846
847 /* 848 * Point into the IP datagram header. 849 */ 850
851 raw = skb->data;
852 iph = (structiphdr *) (raw + dev->hard_header_len);
853
854 skb->ip_hdr = iph;
855
856 /* 857 * Setup starting values. 858 */ 859
860 hlen = iph->ihl * 4;
861 left = ntohs(iph->tot_len) - hlen; /* Space per frame */ 862 hlen += dev->hard_header_len; /* Total header size */ 863 mtu = (dev->mtu - hlen); /* Size of data space */ 864 ptr = (raw + hlen); /* Where to start from */ 865
866 /* 867 * Check for any "DF" flag. [DF means do not fragment] 868 */ 869
870 if (ntohs(iph->frag_off) & IP_DF)
871 { 872 /* 873 * Reply giving the MTU of the failed hop. 874 */ 875 ip_statistics.IpFragFails++;
876 icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev->mtu, dev);
877 return;
878 } 879
880 /* 881 * The protocol doesn't seem to say what to do in the case that the 882 * frame + options doesn't fit the mtu. As it used to fall down dead 883 * in this case we were fortunate it didn't happen 884 */ 885
886 if(mtu<8)
887 { 888 /* It's wrong but it's better than nothing */ 889 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev->mtu, dev);
890 ip_statistics.IpFragFails++;
891 return;
892 } 893
894 /* 895 * Fragment the datagram. 896 */ 897
898 /* 899 * The initial offset is 0 for a complete frame. When 900 * fragmenting fragments it's wherever this one starts. 901 */ 902
903 if (is_frag & 2)
904 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
905 else 906 offset = 0;
907
908
909 /* 910 * Keep copying data until we run out. 911 */ 912
913 while(left > 0)
914 { 915 len = left;
916 /* IF: it doesn't fit, use 'mtu' - the data space left */ 917 if (len > mtu)
918 len = mtu;
919 /* IF: we are not sending upto and including the packet end 920 then align the next start on an eight byte boundary */ 921 if (len < left)
922 { 923 len/=8;
924 len*=8;
925 } 926 /* 927 * Allocate buffer. 928 */ 929
930 if ((skb2 = alloc_skb(len + hlen+15,GFP_ATOMIC)) == NULL)
931 { 932 NETDEBUG(printk("IP: frag: no memory for new fragment!\n"));
933 ip_statistics.IpFragFails++;
934 return;
935 } 936
937 /* 938 * Set up data on packet 939 */ 940
941 skb2->arp = skb->arp;
942 if(skb->free==0)
943 printk("IP fragmenter: BUG free!=1 in fragmenter\n");
944 skb2->free = 1;
945 skb_put(skb2,len + hlen);
946 skb2->h.raw=(char *) skb2->data;
947 /* 948 * Charge the memory for the fragment to any owner 949 * it might possess 950 */ 951
952 save_flags(flags);
953 if (sk)
954 { 955 cli();
956 sk->wmem_alloc += skb2->truesize;
957 skb2->sk=sk;
958 } 959 restore_flags(flags);
960 skb2->raddr = skb->raddr; /* For rebuild_header - must be here */ 961
962 /* 963 * Copy the packet header into the new buffer. 964 */ 965
966 memcpy(skb2->h.raw, raw, hlen);
967
968 /* 969 * Copy a block of the IP datagram. 970 */ 971 memcpy(skb2->h.raw + hlen, ptr, len);
972 left -= len;
973
974 skb2->h.raw+=dev->hard_header_len;
975
976 /* 977 * Fill in the new header fields. 978 */ 979 iph = (structiphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
980 iph->frag_off = htons((offset >> 3));
981 /* 982 * Added AC : If we are fragmenting a fragment thats not the 983 * last fragment then keep MF on each bit 984 */ 985 if (left > 0 || (is_frag & 1))
986 iph->frag_off |= htons(IP_MF);
987 ptr += len;
988 offset += len;
989
990 /* 991 * Put this fragment into the sending queue. 992 */ 993
994 ip_statistics.IpFragCreates++;
995
996 ip_queue_xmit(sk, dev, skb2, 2);
997 } 998 ip_statistics.IpFragOKs++;
999 }1000
1001
1002
1003 #ifdefCONFIG_IP_FORWARD1004
1005 /*1006 * Forward an IP datagram to its next destination.1007 */1008
1009 voidip_forward(structsk_buff *skb, structdevice *dev, intis_frag, unsignedlongtarget_addr, inttarget_strict)
/* */1010 {1011 structdevice *dev2; /* Output device */1012 structiphdr *iph; /* Our header */1013 structsk_buff *skb2; /* Output packet */1014 structrtable *rt; /* Route we use */1015 unsignedchar *ptr; /* Data pointer */1016 unsignedlongraddr; /* Router IP address */1017 #ifdefCONFIG_IP_FIREWALL1018 intfw_res = 0; /* Forwarding result */1019
1020 /* 1021 * See if we are allowed to forward this.1022 * Note: demasqueraded fragments are always 'back'warded.1023 */1024
1025
1026 if(!(is_frag&4) && (fw_res=ip_fw_chk(skb->h.iph, dev, ip_fw_fwd_chain, ip_fw_fwd_policy, 0))!=1)
1027 {1028 if(fw_res==-1)
1029 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev);
1030 return;
1031 }1032 #endif1033 /*1034 * According to the RFC, we must first decrease the TTL field. If1035 * that reaches zero, we must reply an ICMP control message telling1036 * that the packet's lifetime expired.1037 *1038 * Exception:1039 * We may not generate an ICMP for an ICMP. icmp_send does the1040 * enforcement of this so we can forget it here. It is however1041 * sometimes VERY important.1042 */1043
1044 iph = skb->h.iph;
1045 iph->ttl--;
1046
1047 /*1048 * Re-compute the IP header checksum.1049 * This is inefficient. We know what has happened to the header1050 * and could thus adjust the checksum as Phil Karn does in KA9Q1051 */1052
1053 iph->check = ntohs(iph->check) + 0x0100;
1054 if ((iph->check & 0xFF00) == 0)
1055 iph->check++; /* carry overflow */1056 iph->check = htons(iph->check);
1057
1058 if (iph->ttl <= 0)
1059 {1060 /* Tell the sender its packet died... */1061 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0, dev);
1062 return;
1063 }1064
1065 /*1066 * OK, the packet is still valid. Fetch its destination address,1067 * and give it to the IP sender for further processing.1068 */1069
1070 rt = ip_rt_route(target_addr, NULL, NULL);
1071 if (rt == NULL)
1072 {1073 /*1074 * Tell the sender its packet cannot be delivered. Again1075 * ICMP is screened later.1076 */1077 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0, dev);
1078 return;
1079 }1080
1081
1082 /*1083 * Gosh. Not only is the packet valid; we even know how to1084 * forward it onto its final destination. Can we say this1085 * is being plain lucky?1086 * If the router told us that there is no GW, use the dest.1087 * IP address itself- we seem to be connected directly...1088 */1089
1090 raddr = rt->rt_gateway;
1091
1092 if (raddr != 0)
1093 {1094 /*1095 * Strict routing permits no gatewaying1096 */1097
1098 if(target_strict)
1099 {1100 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0, dev);
1101 kfree_skb(skb, FREE_READ);
1102 return;
1103 }1104
1105 /*1106 * There is a gateway so find the correct route for it.1107 * Gateways cannot in turn be gatewayed.1108 */1109
1110 rt = ip_rt_route(raddr, NULL, NULL);
1111 if (rt == NULL)
1112 {1113 /*1114 * Tell the sender its packet cannot be delivered...1115 */1116 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev);
1117 return;
1118 }1119 if (rt->rt_gateway != 0)
1120 raddr = rt->rt_gateway;
1121 }1122 else1123 raddr = target_addr;
1124
1125 /*1126 * Having picked a route we can now send the frame out.1127 */1128
1129 dev2 = rt->rt_dev;
1130
1131 /*1132 * In IP you never have to forward a frame on the interface that it 1133 * arrived upon. We now generate an ICMP HOST REDIRECT giving the route1134 * we calculated.1135 */1136 #ifndef CONFIG_IP_NO_ICMP_REDIRECT
1137 if (dev == dev2 && !((iph->saddr^iph->daddr)&dev->pa_mask) && rt->rt_flags&RTF_MODIFIED)
1138 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, raddr, dev);
1139 #endif1140
1141 /*1142 * We now allocate a new buffer, and copy the datagram into it.1143 * If the indicated interface is up and running, kick it.1144 */1145
1146 if (dev2->flags & IFF_UP)
1147 {1148 #ifdefCONFIG_IP_MASQUERADE1149 /*1150 * If this fragment needs masquerading, make it so...1151 * (Dont masquerade de-masqueraded fragments)1152 */1153 if (!(is_frag&4) && fw_res==2)
1154 ip_fw_masquerade(&skb, dev2);
1155 #endif1156
1157 /*1158 * Current design decrees we copy the packet. For identical header1159 * lengths we could avoid it. The new skb code will let us push1160 * data so the problem goes away then.1161 */1162
1163 skb2 = alloc_skb(dev2->hard_header_len + skb->len + 15, GFP_ATOMIC);
1164
1165 /*1166 * This is rare and since IP is tolerant of network failures1167 * quite harmless.1168 */1169
1170 if (skb2 == NULL)
1171 {1172 NETDEBUG(printk("\nIP: No memory available for IP forward\n"));
1173 return;
1174 }1175
1176
1177 /* Now build the MAC header. */1178 (void) ip_send(skb2, raddr, skb->len, dev2, dev2->pa_addr);
1179
1180 ptr = skb_put(skb2,skb->len);
1181 skb2->free = 1;
1182 skb2->h.raw = ptr;
1183
1184 /*1185 * Copy the packet data into the new buffer.1186 */1187 memcpy(ptr, skb->h.raw, skb->len);
1188
1189
1190 ip_statistics.IpForwDatagrams++;
1191
1192 /*1193 * See if it needs fragmenting. Note in ip_rcv we tagged1194 * the fragment type. This must be right so that1195 * the fragmenter does the right thing.1196 */1197
1198 if(skb2->len > dev2->mtu + dev2->hard_header_len)
1199 {1200 ip_fragment(NULL,skb2,dev2, is_frag);
1201 kfree_skb(skb2,FREE_WRITE);
1202 }1203 else1204 {1205 #ifdefCONFIG_IP_ACCT1206 /*1207 * Count mapping we shortcut1208 */1209
1210 ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
1211 #endif1212
1213 /*1214 * Map service types to priority. We lie about1215 * throughput being low priority, but it's a good1216 * choice to help improve general usage.1217 */1218 if(iph->tos & IPTOS_LOWDELAY)
1219 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
1220 elseif(iph->tos & IPTOS_THROUGHPUT)
1221 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
1222 else1223 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
1224 }1225 }1226 }1227
1228
1229 #endif1230
1231 /*1232 * This function receives all incoming IP datagrams.1233 *1234 * On entry skb->data points to the start of the IP header and1235 * the MAC header has been removed.1236 */1237
1238 intip_rcv(structsk_buff *skb, structdevice *dev, structpacket_type *pt)
/* */1239 {1240 structiphdr *iph = skb->h.iph;
1241 structsock *raw_sk=NULL;
1242 unsignedcharhash;
1243 unsignedcharflag = 0;
1244 structinet_protocol *ipprot;
1245 intbrd=IS_MYADDR;
1246 unsignedlongtarget_addr;
1247 inttarget_strict=0;
1248 intis_frag=0;
1249 #ifdefCONFIG_IP_FIREWALL1250 interr;
1251 #endif1252
1253 ip_statistics.IpInReceives++;
1254
1255 /*1256 * Tag the ip header of this packet so we can find it1257 */1258
1259 skb->ip_hdr = iph;
1260
1261 /*1262 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.1263 * RFC1122: 3.1.2.3 MUST discard a frame with invalid source address [NEEDS FIXING].1264 *1265 * Is the datagram acceptable?1266 *1267 * 1. Length at least the size of an ip header1268 * 2. Version of 41269 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums]1270 * 4. Doesn't have a bogus length1271 * (5. We ought to check for IP multicast addresses and undefined types.. does this matter ?)1272 */1273
1274 if (skb->len<sizeof(structiphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsignedchar *)iph, iph->ihl) !=0
1275 || skb->len < ntohs(iph->tot_len))
1276 {1277 ip_statistics.IpInHdrErrors++;
1278 kfree_skb(skb, FREE_WRITE);
1279 return(0);
1280 }1281
1282 /*1283 * Our transport medium may have padded the buffer out. Now we know it1284 * is IP we can trim to the true length of the frame.1285 */1286
1287 skb_trim(skb,ntohs(iph->tot_len));
1288
1289 /*1290 * See if the firewall wants to dispose of the packet. 1291 */1292
1293 #ifdefCONFIG_IP_FIREWALL1294
1295 if ((err=ip_fw_chk(iph,dev,ip_fw_blk_chain,ip_fw_blk_policy, 0))<1)
1296 {1297 if(err==-1)
1298 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev);
1299 kfree_skb(skb, FREE_WRITE);
1300 return 0;
1301 }1302
1303 #endif1304
1305
1306 /*1307 * Next analyse the packet for options. Studies show under one packet in1308 * a thousand have options....1309 */1310
1311 target_addr = iph->daddr;
1312
1313 if (iph->ihl != 5)
1314 {1315 /* Humph.. options. Lots of annoying fiddly bits */1316
1317 /*1318 * This is straight from the RFC. It might even be right ;)1319 *1320 * RFC 1122: 3.2.1.8 STREAMID option is obsolete and MUST be ignored.1321 * RFC 1122: 3.2.1.8 MUST NOT crash on a zero length option.1322 * RFC 1122: 3.2.1.8 MUST support acting as final destination of a source route.1323 */1324
1325 intopt_space=4*(iph->ihl-5);
1326 intopt_size;
1327 unsignedchar *opt_ptr=skb->h.raw+sizeof(structiphdr);
1328
1329 skb->ip_summed=0; /* Our free checksum is bogus for this case */1330
1331 while(opt_space>0)
1332 {1333 if(*opt_ptr==IPOPT_NOOP)
1334 {1335 opt_ptr++;
1336 opt_space--;
1337 continue;
1338 }1339 if(*opt_ptr==IPOPT_END)
1340 break; /* Done */1341 if(opt_space<2 || (opt_size=opt_ptr[1])<2 || opt_ptr[1]>opt_space)
1342 {1343 /*1344 * RFC 1122: 3.2.2.5 SHOULD send parameter problem reports.1345 */1346 icmp_send(skb, ICMP_PARAMETERPROB, 0, 0, skb->dev);
1347 kfree_skb(skb, FREE_READ);
1348 return -EINVAL;
1349 }1350 switch(opt_ptr[0])
1351 {1352 caseIPOPT_SEC:
1353 /* Should we drop this ?? */1354 break;
1355 caseIPOPT_SSRR: /* These work almost the same way */1356 target_strict=1;
1357 /* Fall through */1358 caseIPOPT_LSRR:
1359 #ifdef CONFIG_IP_NOSR
1360 kfree_skb(skb, FREE_READ);
1361 return -EINVAL;
1362 #endif1363 caseIPOPT_RR:
1364 /*1365 * RFC 1122: 3.2.1.8 Support for RR is OPTIONAL.1366 */1367 if (iph->daddr!=skb->dev->pa_addr && (brd = ip_chk_addr(iph->daddr)) == 0)
1368 break;
1369 if((opt_size<3) || ( opt_ptr[0]==IPOPT_RR && opt_ptr[2] > opt_size-4 ))
1370 {1371 if(ip_chk_addr(iph->daddr))
1372 icmp_send(skb, ICMP_PARAMETERPROB, 0, 0, skb->dev);
1373 kfree_skb(skb, FREE_READ);
1374 return -EINVAL;
1375 }1376 if(opt_ptr[2] > opt_size-4 )
1377 break;
1378 /* Bytes are [IPOPT_xxRR][Length][EntryPointer][Entry0][Entry1].... */1379 /* This isn't going to be too portable - FIXME */1380 if(opt_ptr[0]!=IPOPT_RR)
1381 {1382 intt;
1383 target_addr=*(u32 *)(&opt_ptr[opt_ptr[2]]); /* Get hop */1384 t=ip_chk_addr(target_addr);
1385 if(t==IS_MULTICAST||t==IS_BROADCAST)
1386 {1387 if(ip_chk_addr(iph->daddr))
1388 icmp_send(skb, ICMP_PARAMETERPROB, 0, 0, skb->dev);
1389 kfree_skb(skb,FREE_READ);
1390 return -EINVAL;
1391 }1392 }1393 *(u32 *)(&opt_ptr[opt_ptr[2]])=skb->dev->pa_addr; /* Record hop */1394 break;
1395 caseIPOPT_TIMESTAMP:
1396 /*1397 * RFC 1122: 3.2.1.8 The timestamp option is OPTIONAL but if implemented1398 * MUST meet various rules (read the spec).1399 */1400 NETDEBUG(printk("ICMP: Someone finish the timestamp routine ;)\n"));
1401 break;
1402 default:
1403 break;
1404 }1405 opt_ptr+=opt_size;
1406 opt_space-=opt_size;
1407 }1408
1409 }1410
1411
1412 /*1413 * Remember if the frame is fragmented.1414 */1415
1416 if(iph->frag_off)
1417 {1418 if (iph->frag_off & htons(IP_MF))
1419 is_frag|=1;
1420 /*1421 * Last fragment ?1422 */1423
1424 if (iph->frag_off & htons(IP_OFFSET))
1425 is_frag|=2;
1426 }1427
1428 /*1429 * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday.1430 *1431 * This is inefficient. While finding out if it is for us we could also compute1432 * the routing table entry. This is where the great unified cache theory comes1433 * in as and when someone implements it1434 *1435 * For most hosts over 99% of packets match the first conditional1436 * and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at1437 * function entry.1438 */1439
1440 if ( iph->daddr == skb->dev->pa_addr || (brd = ip_chk_addr(iph->daddr)) != 0)
1441 {1442 #ifdefCONFIG_IP_MULTICAST1443
1444 if(brd==IS_MULTICAST && iph->daddr!=IGMP_ALL_HOSTS && !(dev->flags&IFF_LOOPBACK))
1445 {1446 /*1447 * Check it is for one of our groups1448 */1449 structip_mc_list *ip_mc=dev->ip_mc_list;
1450 do1451 {1452 if(ip_mc==NULL)
1453 {1454 kfree_skb(skb, FREE_WRITE);
1455 return 0;
1456 }1457 if(ip_mc->multiaddr==iph->daddr)
1458 break;
1459 ip_mc=ip_mc->next;
1460 }1461 while(1);
1462 }1463 #endif1464
1465 #ifdefCONFIG_IP_MASQUERADE1466 /*1467 * Do we need to de-masquerade this fragment?1468 */1469 if (ip_fw_demasquerade(skb))
1470 {1471 structiphdr *iph=skb->h.iph;
1472 ip_forward(skb, dev, is_frag|4, iph->daddr, 0);
1473 kfree_skb(skb, FREE_WRITE);
1474 return(0);
1475 }1476 #endif1477
1478 /*1479 * Account for the packet1480 */1481
1482 #ifdefCONFIG_IP_ACCT1483 ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
1484 #endif1485
1486 /*1487 * Reassemble IP fragments.1488 */1489
1490 if(is_frag)
1491 {1492 /* Defragment. Obtain the complete packet if there is one */1493 skb=ip_defrag(iph,skb,dev);
1494 if(skb==NULL)
1495 return 0;
1496 skb->dev = dev;
1497 iph=skb->h.iph;
1498 }1499
1500 /*1501 * Point into the IP datagram, just past the header.1502 */1503
1504 skb->ip_hdr = iph;
1505 skb->h.raw += iph->ihl*4;
1506
1507 /*1508 * Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies.1509 *1510 * RFC 1122: SHOULD pass TOS value up to the transport layer.1511 */1512
1513 hash = iph->protocol & (SOCK_ARRAY_SIZE-1);
1514
1515 /* 1516 * If there maybe a raw socket we must check - if not we don't care less 1517 */1518
1519 if((raw_sk=raw_prot.sock_array[hash])!=NULL)
1520 {1521 structsock *sknext=NULL;
1522 structsk_buff *skb1;
1523 raw_sk=get_sock_raw(raw_sk, hash, iph->saddr, iph->daddr);
1524 if(raw_sk) /* Any raw sockets */1525 {1526 do1527 {1528 /* Find the next */1529 sknext=get_sock_raw(raw_sk->next, hash, iph->saddr, iph->daddr);
1530 if(sknext)
1531 skb1=skb_clone(skb, GFP_ATOMIC);
1532 else1533 break; /* One pending raw socket left */1534 if(skb1)
1535 raw_rcv(raw_sk, skb1, dev, iph->saddr,iph->daddr);
1536 raw_sk=sknext;
1537 }1538 while(raw_sk!=NULL);
1539
1540 /*1541 * Here either raw_sk is the last raw socket, or NULL if none 1542 */1543
1544 /*1545 * We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy 1546 */1547 }1548 }1549
1550 /*1551 * skb->h.raw now points at the protocol beyond the IP header.1552 */1553
1554 hash = iph->protocol & (MAX_INET_PROTOS -1);
1555 for (ipprot = (structinet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(structinet_protocol *)ipprot->next)
1556 {1557 structsk_buff *skb2;
1558
1559 if (ipprot->protocol != iph->protocol)
1560 continue;
1561 /*1562 * See if we need to make a copy of it. This will1563 * only be set if more than one protocol wants it.1564 * and then not for the last one. If there is a pending1565 * raw delivery wait for that1566 */1567
1568 if (ipprot->copy || raw_sk)
1569 {1570 skb2 = skb_clone(skb, GFP_ATOMIC);
1571 if(skb2==NULL)
1572 continue;
1573 }1574 else1575 {1576 skb2 = skb;
1577 }1578 flag = 1;
1579
1580 /*1581 * Pass on the datagram to each protocol that wants it,1582 * based on the datagram protocol. We should really1583 * check the protocol handler's return values here...1584 */1585
1586 ipprot->handler(skb2, dev, NULL, iph->daddr,
1587 (ntohs(iph->tot_len) - (iph->ihl * 4)),
1588 iph->saddr, 0, ipprot);
1589
1590 }1591
1592 /*1593 * All protocols checked.1594 * If this packet was a broadcast, we may *not* reply to it, since that1595 * causes (proven, grin) ARP storms and a leakage of memory (i.e. all1596 * ICMP reply messages get queued up for transmission...)1597 */1598
1599 if(raw_sk!=NULL) /* Shift to last raw user */1600 raw_rcv(raw_sk, skb, dev, iph->saddr, iph->daddr);
1601 elseif (!flag) /* Free and report errors */1602 {1603 if (brd != IS_BROADCAST && brd!=IS_MULTICAST)
1604 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0, dev);
1605 kfree_skb(skb, FREE_WRITE);
1606 }1607
1608 return(0);
1609 }1610
1611 /*1612 * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday.1613 *1614 * This is inefficient. While finding out if it is for us we could also compute1615 * the routing table entry. This is where the great unified cache theory comes1616 * in as and when someone implements it1617 *1618 * For most hosts over 99% of packets match the first conditional1619 * and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at1620 * function entry.1621 */1622
1623 /*1624 * Don't forward multicast or broadcast frames.1625 */1626
1627 if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST)
1628 {1629 kfree_skb(skb,FREE_WRITE);
1630 return 0;
1631 }1632
1633 /*1634 * The packet is for another target. Forward the frame1635 */1636
1637 #ifdefCONFIG_IP_FORWARD1638 ip_forward(skb, dev, is_frag, target_addr, target_strict);
1639 #else1640 /* printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n",1641 iph->saddr,iph->daddr);*/1642 ip_statistics.IpInAddrErrors++;
1643 #endif1644 /*1645 * The forwarder is inefficient and copies the packet. We1646 * free the original now.1647 */1648
1649 kfree_skb(skb, FREE_WRITE);
1650 return(0);
1651 }1652
1653
1654 /*1655 * Loop a packet back to the sender.1656 */1657
1658 staticvoidip_loopback(structdevice *old_dev, structsk_buff *skb)
/* */1659 {1660 externstructdeviceloopback_dev;
1661 structdevice *dev=&loopback_dev;
1662 intlen=skb->len-old_dev->hard_header_len;
1663 structsk_buff *newskb=dev_alloc_skb(len+dev->hard_header_len+15);
1664
1665 if(newskb==NULL)
1666 return;
1667
1668 newskb->link3=NULL;
1669 newskb->sk=NULL;
1670 newskb->dev=dev;
1671 newskb->saddr=skb->saddr;
1672 newskb->daddr=skb->daddr;
1673 newskb->raddr=skb->raddr;
1674 newskb->free=1;
1675 newskb->lock=0;
1676 newskb->users=0;
1677 newskb->pkt_type=skb->pkt_type;
1678
1679 /*1680 * Put a MAC header on the packet1681 */1682 ip_send(newskb, skb->ip_hdr->daddr, len, dev, skb->ip_hdr->saddr);
1683 /*1684 * Add the rest of the data space. 1685 */1686 newskb->ip_hdr=(structiphdr *)skb_put(skb, len);
1687 /*1688 * Copy the data1689 */1690 memcpy(newskb->ip_hdr,skb->ip_hdr,len);
1691
1692 /* Recurse. The device check against IFF_LOOPBACK will stop infinite recursion */1693
1694 /*printk("Loopback output queued [%lX to %lX].\n", newskb->ip_hdr->saddr,newskb->ip_hdr->daddr);*/1695 ip_queue_xmit(NULL, dev, newskb, 1);
1696 }1697
1698
1699 /*1700 * Queues a packet to be sent, and starts the transmitter1701 * if necessary. if free = 1 then we free the block after1702 * transmit, otherwise we don't. If free==2 we not only1703 * free the block but also don't assign a new ip seq number.1704 * This routine also needs to put in the total length,1705 * and compute the checksum1706 */1707
1708 voidip_queue_xmit(structsock *sk, structdevice *dev,
/* */1709 structsk_buff *skb, intfree)
1710 {1711 structiphdr *iph;
1712 unsignedchar *ptr;
1713
1714 /* Sanity check */1715 if (dev == NULL)
1716 {1717 NETDEBUG(printk("IP: ip_queue_xmit dev = NULL\n"));
1718 return;
1719 }1720
1721 IS_SKB(skb);
1722
1723 /*1724 * Do some book-keeping in the packet for later1725 */1726
1727
1728 skb->dev = dev;
1729 skb->when = jiffies;
1730
1731 /*1732 * Find the IP header and set the length. This is bad1733 * but once we get the skb data handling code in the1734 * hardware will push its header sensibly and we will1735 * set skb->ip_hdr to avoid this mess and the fixed1736 * header length problem1737 */1738
1739 ptr = skb->data;
1740 ptr += dev->hard_header_len;
1741 iph = (structiphdr *)ptr;
1742 skb->ip_hdr = iph;
1743 iph->tot_len = ntohs(skb->len-dev->hard_header_len);
1744
1745 #ifdefCONFIG_IP_FIREWALL1746 if(ip_fw_chk(iph, dev, ip_fw_blk_chain, ip_fw_blk_policy, 0) != 1)
1747 /* just don't send this packet */1748 return;
1749 #endif1750
1751 /*1752 * No reassigning numbers to fragments...1753 */1754
1755 if(free!=2)
1756 iph->id = htons(ip_id_count++);
1757 else1758 free=1;
1759
1760 /* All buffers without an owner socket get freed */1761 if (sk == NULL)
1762 free = 1;
1763
1764 skb->free = free;
1765
1766 /*1767 * Do we need to fragment. Again this is inefficient.1768 * We need to somehow lock the original buffer and use1769 * bits of it.1770 */1771
1772 if(skb->len > dev->mtu + dev->hard_header_len)
1773 {1774 ip_fragment(sk,skb,dev,0);
1775 IS_SKB(skb);
1776 kfree_skb(skb,FREE_WRITE);
1777 return;
1778 }1779
1780 /*1781 * Add an IP checksum1782 */1783
1784 ip_send_check(iph);
1785
1786 /*1787 * Print the frame when debugging1788 */1789
1790 /*1791 * More debugging. You cannot queue a packet already on a list1792 * Spot this and moan loudly.1793 */1794 if (skb->next != NULL)
1795 {1796 NETDEBUG(printk("ip_queue_xmit: next != NULL\n"));
1797 skb_unlink(skb);
1798 }1799
1800 /*1801 * If a sender wishes the packet to remain unfreed1802 * we add it to his send queue. This arguably belongs1803 * in the TCP level since nobody else uses it. BUT1804 * remember IPng might change all the rules.1805 */1806
1807 if (!free)
1808 {1809 unsignedlongflags;
1810 /* The socket now has more outstanding blocks */1811
1812 sk->packets_out++;
1813
1814 /* Protect the list for a moment */1815 save_flags(flags);
1816 cli();
1817
1818 if (skb->link3 != NULL)
1819 {1820 NETDEBUG(printk("ip.c: link3 != NULL\n"));
1821 skb->link3 = NULL;
1822 }1823 if (sk->send_head == NULL)
1824 {1825 sk->send_tail = skb;
1826 sk->send_head = skb;
1827 }1828 else1829 {1830 sk->send_tail->link3 = skb;
1831 sk->send_tail = skb;
1832 }1833 /* skb->link3 is NULL */1834
1835 /* Interrupt restore */1836 restore_flags(flags);
1837 }1838 else1839 /* Remember who owns the buffer */1840 skb->sk = sk;
1841
1842 /*1843 * If the indicated interface is up and running, send the packet.1844 */1845
1846 ip_statistics.IpOutRequests++;
1847 #ifdefCONFIG_IP_ACCT1848 ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
1849 #endif1850
1851 #ifdefCONFIG_IP_MULTICAST1852
1853 /*1854 * Multicasts are looped back for other local users1855 */1856
1857 if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK))
1858 {1859 if(sk==NULL || sk->ip_mc_loop)
1860 {1861 if(iph->daddr==IGMP_ALL_HOSTS)
1862 ip_loopback(dev,skb);
1863 else1864 {1865 structip_mc_list *imc=dev->ip_mc_list;
1866 while(imc!=NULL)
1867 {1868 if(imc->multiaddr==iph->daddr)
1869 {1870 ip_loopback(dev,skb);
1871 break;
1872 }1873 imc=imc->next;
1874 }1875 }1876 }1877 /* Multicasts with ttl 0 must not go beyond the host */1878
1879 if(skb->ip_hdr->ttl==0)
1880 {1881 kfree_skb(skb, FREE_READ);
1882 return;
1883 }1884 }1885 #endif1886 if((dev->flags&IFF_BROADCAST) && iph->daddr==dev->pa_brdaddr && !(dev->flags&IFF_LOOPBACK))
1887 ip_loopback(dev,skb);
1888
1889 if (dev->flags & IFF_UP)
1890 {1891 /*1892 * If we have an owner use its priority setting,1893 * otherwise use NORMAL1894 */1895
1896 if (sk != NULL)
1897 {1898 dev_queue_xmit(skb, dev, sk->priority);
1899 }1900 else1901 {1902 dev_queue_xmit(skb, dev, SOPRI_NORMAL);
1903 }1904 }1905 else1906 {1907 ip_statistics.IpOutDiscards++;
1908 if (free)
1909 kfree_skb(skb, FREE_WRITE);
1910 }1911 }1912
1913
1914
1915 #ifdefCONFIG_IP_MULTICAST1916
1917 /*1918 * Write an multicast group list table for the IGMP daemon to1919 * read.1920 */1921
1922 intip_mc_procinfo(char *buffer, char **start, off_toffset, intlength)
/* */1923 {1924 off_tpos=0, begin=0;
1925 structip_mc_list *im;
1926 unsignedlongflags;
1927 intlen=0;
1928 structdevice *dev;
1929
1930 len=sprintf(buffer,"Device : Count\tGroup Users Timer\n");
1931 save_flags(flags);
1932 cli();
1933
1934 for(dev = dev_base; dev; dev = dev->next)
1935 {1936 if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST))
1937 {1938 len+=sprintf(buffer+len,"%-10s: %5d\n",
1939 dev->name, dev->mc_count);
1940 for(im = dev->ip_mc_list; im; im = im->next)
1941 {1942 len+=sprintf(buffer+len,
1943 "\t\t\t%08lX %5d %d:%08lX\n",
1944 im->multiaddr, im->users,
1945 im->tm_running, im->timer.expires);
1946 pos=begin+len;
1947 if(pos<offset)
1948 {1949 len=0;
1950 begin=pos;
1951 }1952 if(pos>offset+length)
1953 break;
1954 }1955 }1956 }1957 restore_flags(flags);
1958 *start=buffer+(offset-begin);
1959 len-=(offset-begin);
1960 if(len>length)
1961 len=length;
1962 returnlen;
1963 }1964
1965
1966 #endif1967 /*1968 * Socket option code for IP. This is the end of the line after any TCP,UDP etc options on1969 * an IP socket.1970 *1971 * We implement IP_TOS (type of service), IP_TTL (time to live).1972 *1973 * Next release we will sort out IP_OPTIONS since for some people are kind of important.1974 */1975
1976 staticstructdevice *ip_mc_find_devfor(unsignedlongaddr)
/* */1977 {1978 structdevice *dev;
1979 for(dev = dev_base; dev; dev = dev->next)
1980 {1981 if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST)&&
1982 (dev->pa_addr==addr))
1983 returndev;
1984 }1985
1986 returnNULL;
1987 }1988
1989 intip_setsockopt(structsock *sk, intlevel, intoptname, char *optval, intoptlen)
/* */1990 {1991 intval,err;
1992 unsignedcharucval;
1993 #ifdefined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT)
1994 structip_fwtmp_fw;
1995 #endif1996 if (optval == NULL)
1997 return(-EINVAL);
1998
1999 err=verify_area(VERIFY_READ, optval, sizeof(int));
2000 if(err)
2001 returnerr;
2002
2003 val = get_user((int *) optval);
2004 ucval=get_user((unsignedchar *) optval);
2005
2006 if(level!=SOL_IP)
2007 return -EOPNOTSUPP;
2008
2009 switch(optname)
2010 {2011 caseIP_TOS:
2012 if(val<0||val>255)
2013 return -EINVAL;
2014 sk->ip_tos=val;
2015 if(val==IPTOS_LOWDELAY)
2016 sk->priority=SOPRI_INTERACTIVE;
2017 if(val==IPTOS_THROUGHPUT)
2018 sk->priority=SOPRI_BACKGROUND;
2019 return 0;
2020 caseIP_TTL:
2021 if(val<1||val>255)
2022 return -EINVAL;
2023 sk->ip_ttl=val;
2024 return 0;
2025 #ifdefCONFIG_IP_MULTICAST2026 caseIP_MULTICAST_TTL:
2027 {2028 sk->ip_mc_ttl=(int)ucval;
2029 return 0;
2030 }2031 caseIP_MULTICAST_LOOP:
2032 {2033 if(ucval!=0 && ucval!=1)
2034 return -EINVAL;
2035 sk->ip_mc_loop=(int)ucval;
2036 return 0;
2037 }2038 caseIP_MULTICAST_IF:
2039 {2040 structin_addraddr;
2041 structdevice *dev=NULL;
2042
2043 /*2044 * Check the arguments are allowable2045 */2046
2047 err=verify_area(VERIFY_READ, optval, sizeof(addr));
2048 if(err)
2049 returnerr;
2050
2051 memcpy_fromfs(&addr,optval,sizeof(addr));
2052
2053
2054 /*2055 * What address has been requested2056 */2057
2058 if(addr.s_addr==INADDR_ANY) /* Default */2059 {2060 sk->ip_mc_name[0]=0;
2061 return 0;
2062 }2063
2064 /*2065 * Find the device2066 */2067
2068 dev=ip_mc_find_devfor(addr.s_addr);
2069
2070 /*2071 * Did we find one2072 */2073
2074 if(dev)
2075 {2076 strcpy(sk->ip_mc_name,dev->name);
2077 return 0;
2078 }2079 return -EADDRNOTAVAIL;
2080 }2081
2082 caseIP_ADD_MEMBERSHIP:
2083 {2084
2085 /*2086 * FIXME: Add/Del membership should have a semaphore protecting them from re-entry2087 */2088 structip_mreqmreq;
2089 unsignedlongroute_src;
2090 structrtable *rt;
2091 structdevice *dev=NULL;
2092
2093 /*2094 * Check the arguments.2095 */2096
2097 err=verify_area(VERIFY_READ, optval, sizeof(mreq));
2098 if(err)
2099 returnerr;
2100
2101 memcpy_fromfs(&mreq,optval,sizeof(mreq));
2102
2103 /* 2104 * Get device for use later2105 */2106
2107 if(mreq.imr_interface.s_addr==INADDR_ANY)
2108 {2109 /*2110 * Not set so scan.2111 */2112 if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,NULL, &route_src))!=NULL)
2113 {2114 dev=rt->rt_dev;
2115 rt->rt_use--;
2116 }2117 }2118 else2119 {2120 /*2121 * Find a suitable device.2122 */2123
2124 dev=ip_mc_find_devfor(mreq.imr_interface.s_addr);
2125 }2126
2127 /*2128 * No device, no cookies.2129 */2130
2131 if(!dev)
2132 return -ENODEV;
2133
2134 /*2135 * Join group.2136 */2137
2138 returnip_mc_join_group(sk,dev,mreq.imr_multiaddr.s_addr);
2139 }2140
2141 caseIP_DROP_MEMBERSHIP:
2142 {2143 structip_mreqmreq;
2144 structrtable *rt;
2145 unsignedlongroute_src;
2146 structdevice *dev=NULL;
2147
2148 /*2149 * Check the arguments2150 */2151
2152 err=verify_area(VERIFY_READ, optval, sizeof(mreq));
2153 if(err)
2154 returnerr;
2155
2156 memcpy_fromfs(&mreq,optval,sizeof(mreq));
2157
2158 /*2159 * Get device for use later 2160 */2161
2162 if(mreq.imr_interface.s_addr==INADDR_ANY)
2163 {2164 if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,NULL, &route_src))!=NULL)
2165 {2166 dev=rt->rt_dev;
2167 rt->rt_use--;
2168 }2169 }2170 else2171 {2172
2173 dev=ip_mc_find_devfor(mreq.imr_interface.s_addr);
2174 }2175
2176 /*2177 * Did we find a suitable device.2178 */2179
2180 if(!dev)
2181 return -ENODEV;
2182
2183 /*2184 * Leave group2185 */2186
2187 returnip_mc_leave_group(sk,dev,mreq.imr_multiaddr.s_addr);
2188 }2189 #endif2190 #ifdefCONFIG_IP_FIREWALL2191 caseIP_FW_ADD_BLK:
2192 caseIP_FW_DEL_BLK:
2193 caseIP_FW_ADD_FWD:
2194 caseIP_FW_DEL_FWD:
2195 caseIP_FW_CHK_BLK:
2196 caseIP_FW_CHK_FWD:
2197 caseIP_FW_FLUSH_BLK:
2198 caseIP_FW_FLUSH_FWD:
2199 caseIP_FW_ZERO_BLK:
2200 caseIP_FW_ZERO_FWD:
2201 caseIP_FW_POLICY_BLK:
2202 caseIP_FW_POLICY_FWD:
2203 if(!suser())
2204 return -EPERM;
2205 if(optlen>sizeof(tmp_fw) || optlen<1)
2206 return -EINVAL;
2207 err=verify_area(VERIFY_READ,optval,optlen);
2208 if(err)
2209 returnerr;
2210 memcpy_fromfs(&tmp_fw,optval,optlen);
2211 err=ip_fw_ctl(optname, &tmp_fw,optlen);
2212 return -err; /* -0 is 0 after all */2213
2214 #endif2215 #ifdefCONFIG_IP_ACCT2216 caseIP_ACCT_DEL:
2217 caseIP_ACCT_ADD:
2218 caseIP_ACCT_FLUSH:
2219 caseIP_ACCT_ZERO:
2220 if(!suser())
2221 return -EPERM;
2222 if(optlen>sizeof(tmp_fw) || optlen<1)
2223 return -EINVAL;
2224 err=verify_area(VERIFY_READ,optval,optlen);
2225 if(err)
2226 returnerr;
2227 memcpy_fromfs(&tmp_fw, optval,optlen);
2228 err=ip_acct_ctl(optname, &tmp_fw,optlen);
2229 return -err; /* -0 is 0 after all */2230 #endif2231 /* IP_OPTIONS and friends go here eventually */2232 default:
2233 return(-ENOPROTOOPT);
2234 }2235 }2236
2237 /*2238 * Get the options. Note for future reference. The GET of IP options gets the2239 * _received_ ones. The set sets the _sent_ ones.2240 */2241
2242 intip_getsockopt(structsock *sk, intlevel, intoptname, char *optval, int *optlen)
/* */2243 {2244 intval,err;
2245 #ifdefCONFIG_IP_MULTICAST2246 intlen;
2247 #endif2248
2249 if(level!=SOL_IP)
2250 return -EOPNOTSUPP;
2251
2252 switch(optname)
2253 {2254 caseIP_TOS:
2255 val=sk->ip_tos;
2256 break;
2257 caseIP_TTL:
2258 val=sk->ip_ttl;
2259 break;
2260 #ifdefCONFIG_IP_MULTICAST2261 caseIP_MULTICAST_TTL:
2262 val=sk->ip_mc_ttl;
2263 break;
2264 caseIP_MULTICAST_LOOP:
2265 val=sk->ip_mc_loop;
2266 break;
2267 caseIP_MULTICAST_IF:
2268 err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
2269 if(err)
2270 returnerr;
2271 len=strlen(sk->ip_mc_name);
2272 err=verify_area(VERIFY_WRITE, optval, len);
2273 if(err)
2274 returnerr;
2275 put_user(len,(int *) optlen);
2276 memcpy_tofs((void *)optval,sk->ip_mc_name, len);
2277 return 0;
2278 #endif2279 default:
2280 return(-ENOPROTOOPT);
2281 }2282 err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
2283 if(err)
2284 returnerr;
2285 put_user(sizeof(int),(int *) optlen);
2286
2287 err=verify_area(VERIFY_WRITE, optval, sizeof(int));
2288 if(err)
2289 returnerr;
2290 put_user(val,(int *) optval);
2291
2292 return(0);
2293 }2294
2295 /*2296 * Build and send a packet, with as little as one copy2297 *2298 * Doesn't care much about ip options... option length can be2299 * different for fragment at 0 and other fragments.2300 *2301 * Note that the fragment at the highest offset is sent first,2302 * so the getfrag routine can fill in the TCP/UDP checksum header2303 * field in the last fragment it sends... actually it also helps2304 * the reassemblers, they can put most packets in at the head of2305 * the fragment queue, and they know the total size in advance. This2306 * last feature will measurable improve the Linux fragment handler.2307 *2308 * The callback has five args, an arbitrary pointer (copy of frag),2309 * the source IP address (may depend on the routing table), the 2310 * destination adddress (char *), the offset to copy from, and the2311 * length to be copied.2312 * 2313 */2314
2315 intip_build_xmit(structsock *sk,
/* */2316 voidgetfrag (void *,
2317 int,
2318 char *,
2319 unsignedint,
2320 unsignedint),
2321 void *frag,
2322 unsignedshortintlength,
2323 intdaddr,
2324 intflags,
2325 inttype)
2326 {2327 structrtable *rt;
2328 unsignedintfraglen, maxfraglen, fragheaderlen;
2329 intoffset, mf;
2330 unsignedlongsaddr;
2331 unsignedshortid;
2332 structiphdr *iph;
2333 intlocal=0;
2334 structdevice *dev;
2335
2336
2337 #ifdefCONFIG_INET_MULTICAST2338 if(sk && MULTICAST(daddr) && *sk->ip_mc_name)
2339 {2340 dev=dev_get(skb->ip_mc_name);
2341 if(!dev)
2342 return -ENODEV;
2343 rt=NULL;
2344 }2345 else2346 {2347 #endif2348 /*2349 * Perform the IP routing decisions2350 */2351
2352 if(sk->localroute || flags&MSG_DONTROUTE)
2353 local=1;
2354
2355 rt = sk->ip_route_cache;
2356
2357 /*2358 * See if the routing cache is outdated. We need to clean this up once we are happy it is reliable2359 * by doing the invalidation actively in the route change and header change.2360 */2361
2362 saddr=sk->ip_route_saddr;
2363 if(!rt || sk->ip_route_stamp != rt_stamp || daddr!=sk->ip_route_daddr || sk->ip_route_local!=local || sk->saddr!=sk->ip_route_saddr)
2364 {2365 if(local)
2366 rt = ip_rt_local(daddr, NULL, &saddr);
2367 else2368 rt = ip_rt_route(daddr, NULL, &saddr);
2369 sk->ip_route_local=local;
2370 sk->ip_route_daddr=daddr;
2371 sk->ip_route_saddr=saddr;
2372 sk->ip_route_stamp=rt_stamp;
2373 sk->ip_route_cache=rt;
2374 sk->ip_hcache_ver=NULL;
2375 sk->ip_hcache_state= 0;
2376 }2377 elseif(rt)
2378 {2379 /*2380 * Attempt header caches only if the cached route is being reused. Header cache2381 * is not ultra cheap to set up. This means we only set it up on the second packet,2382 * so one shot communications are not slowed. We assume (seems reasonable) that 2 is2383 * probably going to be a stream of data.2384 */2385 if(rt->rt_dev->header_cache && sk->ip_hcache_state!= -1)
2386 {2387 if(sk->ip_hcache_ver==NULL || sk->ip_hcache_stamp!=*sk->ip_hcache_ver)
2388 rt->rt_dev->header_cache(rt->rt_dev,sk,saddr,daddr);
2389 else2390 /* Can't cache. Remember this */2391 sk->ip_hcache_state= -1;
2392 }2393 }2394
2395 if (rt == NULL)
2396 {2397 ip_statistics.IpOutNoRoutes++;
2398 return(-ENETUNREACH);
2399 }2400
2401 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
2402 saddr = sk->saddr;
2403
2404 dev=rt->rt_dev;
2405 #ifdefCONFIG_INET_MULTICAST2406 }2407 #endif2408
2409 /*2410 * Now compute the buffer space we require2411 */2412
2413 fragheaderlen = dev->hard_header_len;
2414 if(type != IPPROTO_RAW)
2415 fragheaderlen += 20;
2416
2417 /*2418 * Fragheaderlen is the size of 'overhead' on each buffer. Now work2419 * out the size of the frames to send.2420 */2421
2422 maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen;
2423
2424 /*2425 * Start at the end of the frame by handling the remainder.2426 */2427
2428 offset = length - (length % (maxfraglen - fragheaderlen));
2429
2430 /*2431 * Amount of memory to allocate for final fragment.2432 */2433
2434 fraglen = length - offset + fragheaderlen;
2435
2436 if(fraglen==0)
2437 {2438 fraglen = maxfraglen;
2439 offset -= maxfraglen-fragheaderlen;
2440 }2441
2442
2443 /*2444 * The last fragment will not have MF (more fragments) set.2445 */2446
2447 mf = 0;
2448
2449 /*2450 * Can't fragment raw packets 2451 */2452
2453 if (type == IPPROTO_RAW && offset > 0)
2454 return(-EMSGSIZE);
2455
2456 /*2457 * Get an identifier2458 */2459
2460 id = htons(ip_id_count++);
2461
2462 /*2463 * Being outputting the bytes.2464 */2465
2466 do2467 {2468 structsk_buff * skb;
2469 interror;
2470 char *data;
2471
2472 /*2473 * Get the memory we require with some space left for alignment.2474 */2475
2476 skb = sock_alloc_send_skb(sk, fraglen+15, 0, &error);
2477 if (skb == NULL)
2478 return(error);
2479
2480 /*2481 * Fill in the control structures2482 */2483
2484 skb->next = skb->prev = NULL;
2485 skb->dev = dev;
2486 skb->when = jiffies;
2487 skb->free = 1; /* dubious, this one */2488 skb->sk = sk;
2489 skb->arp = 0;
2490 skb->saddr = saddr;
2491 skb->raddr = (rt&&rt->rt_gateway) ? rt->rt_gateway : daddr;
2492 skb_reserve(skb,(dev->hard_header_len+15)&~15);
2493 data = skb_put(skb, fraglen-dev->hard_header_len);
2494
2495 /*2496 * Save us ARP and stuff. In the optimal case we do no route lookup (route cache ok)2497 * no ARP lookup (arp cache ok) and output. The cache checks are still too slow but2498 * this can be fixed later. For gateway routes we ought to have a rt->.. header cache2499 * pointer to speed header cache builds for identical targets.2500 */2501
2502 if(sk->ip_hcache_state>0)
2503 {2504 memcpy(skb->data,sk->ip_hcache_data, dev->hard_header_len);
2505 skb->arp=1;
2506 }2507 elseif (dev->hard_header)
2508 {2509 if(dev->hard_header(skb, dev, ETH_P_IP,
2510 NULL, NULL, 0)>0)
2511 skb->arp=1;
2512 }2513
2514 /*2515 * Find where to start putting bytes.2516 */2517
2518 iph = (structiphdr *)data;
2519
2520 /*2521 * Only write IP header onto non-raw packets 2522 */2523
2524 if(type != IPPROTO_RAW)
2525 {2526
2527 iph->version = 4;
2528 iph->ihl = 5; /* ugh */2529 iph->tos = sk->ip_tos;
2530 iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
2531 iph->id = id;
2532 iph->frag_off = htons(offset>>3);
2533 iph->frag_off |= mf;
2534 #ifdefCONFIG_IP_MULTICAST2535 if (MULTICAST(daddr))
2536 iph->ttl = sk->ip_mc_ttl;
2537 else2538 #endif2539 iph->ttl = sk->ip_ttl;
2540 iph->protocol = type;
2541 iph->check = 0;
2542 iph->saddr = saddr;
2543 iph->daddr = daddr;
2544 iph->check = ip_fast_csum((unsignedchar *)iph, iph->ihl);
2545 data += iph->ihl*4;
2546
2547 /*2548 * Any further fragments will have MF set.2549 */2550
2551 mf = htons(IP_MF);
2552 }2553
2554 /*2555 * User data callback2556 */2557
2558 getfrag(frag, saddr, data, offset, fraglen-fragheaderlen);
2559
2560 /*2561 * Account for the fragment.2562 */2563
2564 #ifdefCONFIG_IP_ACCT2565 if(!offset)
2566 ip_fw_chk(iph, dev, ip_acct_chain, IP_FW_F_ACCEPT, 1);
2567 #endif2568 offset -= (maxfraglen-fragheaderlen);
2569 fraglen = maxfraglen;
2570
2571 #ifdefCONFIG_IP_MULTICAST2572
2573 /*2574 * Multicasts are looped back for other local users2575 */2576
2577 if (MULTICAST(daddr) && !(dev->flags&IFF_LOOPBACK))
2578 {2579 /*2580 * Loop back any frames. The check for IGMP_ALL_HOSTS is because2581 * you are always magically a member of this group.2582 */2583
2584 if(sk==NULL || sk->ip_mc_loop)
2585 {2586 if(skb->daddr==IGMP_ALL_HOSTS)
2587 ip_loopback(rt->rt_dev,skb);
2588 else2589 {2590 structip_mc_list *imc=rt->rt_dev->ip_mc_list;
2591 while(imc!=NULL)
2592 {2593 if(imc->multiaddr==daddr)
2594 {2595 ip_loopback(rt->rt_dev,skb);
2596 break;
2597 }2598 imc=imc->next;
2599 }2600 }2601 }2602
2603 /*2604 * Multicasts with ttl 0 must not go beyond the host. Fixme: avoid the2605 * extra clone.2606 */2607
2608 if(skb->ip_hdr->ttl==0)
2609 kfree_skb(skb, FREE_READ);
2610 }2611 #endif2612 /*2613 * Now queue the bytes into the device.2614 */2615
2616 if (dev->flags & IFF_UP)
2617 {2618 dev_queue_xmit(skb, dev, sk->priority);
2619 }2620 else2621 {2622 /*2623 * Whoops... 2624 *2625 * FIXME: There is a small nasty here. During the ip_build_xmit we could2626 * page fault between the route lookup and device send, the device might be2627 * removed and unloaded.... We need to add device locks on this.2628 */2629
2630 ip_statistics.IpOutDiscards++;
2631 kfree_skb(skb, FREE_WRITE);
2632 return(0); /* lose rest of fragments */2633 }2634 }2635 while (offset >= 0);
2636
2637 return(0);
2638 }2639
2640
2641 /*2642 * IP protocol layer initialiser2643 */2644
2645 staticstructpacket_typeip_packet_type =
2646 {2647 0, /* MUTTER ntohs(ETH_P_IP),*/2648 NULL, /* All devices */2649 ip_rcv,
2650 NULL,
2651 NULL,
2652 };
2653
2654 /*2655 * Device notifier2656 */2657
2658 staticintip_rt_event(unsignedlongevent, void *ptr)
/* */2659 {2660 if(event==NETDEV_DOWN)
2661 ip_rt_flush(ptr);
2662 returnNOTIFY_DONE;
2663 }2664
2665 structnotifier_blockip_rt_notifier={2666 ip_rt_event,
2667 NULL,
2668 0
2669 };
2670
2671 /*2672 * IP registers the packet type and then calls the subprotocol initialisers2673 */2674
2675 voidip_init(void)
/* */2676 {2677 ip_packet_type.type=htons(ETH_P_IP);
2678 dev_add_pack(&ip_packet_type);
2679
2680 /* So we flush routes when a device is downed */2681 register_netdevice_notifier(&ip_rt_notifier);
2682 /* ip_raw_init();2683 ip_packet_init();2684 ip_tcp_init();2685 ip_udp_init();*/2686 }2687