1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * The Internet Protocol (IP) module. 7 * 8 * Version: @(#)ip.c 1.0.16b 9/1/93 9 * 10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu> 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Donald Becker, <becker@super.org> 13 * Alan Cox, <gw4pts@gw4pts.ampr.org> 14 * Richard Underwood 15 * 16 * Fixes: 17 * Alan Cox : Commented a couple of minor bits of surplus code 18 * Alan Cox : Undefining IP_FORWARD doesn't include the code 19 * (just stops a compiler warning). 20 * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes 21 * are junked rather than corrupting things. 22 * Alan Cox : Frames to bad broadcast subnets are dumped 23 * We used to process them non broadcast and 24 * boy could that cause havoc. 25 * Alan Cox : ip_forward sets the free flag on the 26 * new frame it queues. Still crap because 27 * it copies the frame but at least it 28 * doesn't eat memory too. 29 * Alan Cox : Generic queue code and memory fixes. 30 * Fred Van Kempen : IP fragment support (borrowed from NET2E) 31 * Gerhard Koerting: Forward fragmented frames correctly. 32 * Gerhard Koerting: Fixes to my fix of the above 8-). 33 * Gerhard Koerting: IP interface addressing fix. 34 * Linus Torvalds : More robustness checks 35 * Alan Cox : Even more checks: Still not as robust as it ought to be 36 * Alan Cox : Save IP header pointer for later 37 * Alan Cox : ip option setting 38 * Alan Cox : Use ip_tos/ip_ttl settings 39 * Alan Cox : Fragmentation bogosity removed 40 * (Thanks to Mark.Bush@prg.ox.ac.uk) 41 * Dmitry Gorodchanin : Send of a raw packet crash fix. 42 * Alan Cox : Silly ip bug when an overlength 43 * fragment turns up. Now frees the 44 * queue. 45 * Linus Torvalds/ : Memory leakage on fragmentation 46 * Alan Cox : handling. 47 * Gerhard Koerting: Forwarding uses IP priority hints 48 * Teemu Rantanen : Fragment problems. 49 * Alan Cox : General cleanup, comments and reformat 50 * Alan Cox : SNMP statistics 51 * Alan Cox : BSD address rule semantics. Also see 52 * UDP as there is a nasty checksum issue 53 * if you do things the wrong way. 54 * Alan Cox : Always defrag, moved IP_FORWARD to the config.in file 55 * Alan Cox : IP options adjust sk->priority. 56 * Pedro Roque : Fix mtu/length error in ip_forward. 57 * Alan Cox : Avoid ip_chk_addr when possible. 58 * Richard Underwood : IP multicasting. 59 * Alan Cox : Cleaned up multicast handlers. 60 * Alan Cox : RAW sockets demultiplex in the BSD style. 61 * 62 * To Fix: 63 * IP option processing is mostly not needed. ip_forward needs to know about routing rules 64 * and time stamp but that's about all. Use the route mtu field here too 65 * 66 * This program is free software; you can redistribute it and/or 67 * modify it under the terms of the GNU General Public License 68 * as published by the Free Software Foundation; either version 69 * 2 of the License, or (at your option) any later version. 70 */ 71
72 #include <asm/segment.h>
73 #include <asm/system.h>
74 #include <linux/types.h>
75 #include <linux/kernel.h>
76 #include <linux/sched.h>
77 #include <linux/string.h>
78 #include <linux/errno.h>
79 #include <linux/socket.h>
80 #include <linux/sockios.h>
81 #include <linux/in.h>
82 #include <linux/inet.h>
83 #include <linux/netdevice.h>
84 #include <linux/etherdevice.h>
85 #include "snmp.h"
86 #include "ip.h"
87 #include "protocol.h"
88 #include "route.h"
89 #include "tcp.h"
90 #include <linux/skbuff.h>
91 #include "sock.h"
92 #include "arp.h"
93 #include "icmp.h"
94 #include "raw.h"
95 #include "igmp.h"
96 #include <linux/ip_fw.h>
97
98 #define CONFIG_IP_DEFRAG
99
100 externintlast_retran;
101 externvoid sort_send(structsock *sk);
102
103 #definemin(a,b) ((a)<(b)?(a):(b))
104 #defineLOOPBACK(x) (((x) & htonl(0xff000000)) == htonl(0x7f000000))
105
106 /* 107 * SNMP management statistics 108 */ 109
110 #ifdef CONFIG_IP_FORWARDING
111 structip_mibip_statistics={1,64,}; /* Forwarding=Yes, Default TTL=64 */ 112 #else 113 structip_mibip_statistics={1,64,}; /* Forwarding=No, Default TTL=64 */ 114 #endif 115
116 #ifdefCONFIG_IP_MULTICAST 117
118 structip_mc_list *ip_mc_head=NULL;
119
120 #endif 121
122 /* 123 * Handle the issuing of an ioctl() request 124 * for the ip device. This is scheduled to 125 * disappear 126 */ 127
128 intip_ioctl(structsock *sk, intcmd, unsignedlongarg)
/* */ 129 { 130 switch(cmd)
131 { 132 default:
133 return(-EINVAL);
134 } 135 } 136
137
138 /* these two routines will do routing. */ 139
140 staticvoid 141 strict_route(structiphdr *iph, structoptions *opt)
/* */ 142 { 143 } 144
145
146 staticvoid 147 loose_route(structiphdr *iph, structoptions *opt)
/* */ 148 { 149 } 150
151
152
153
154 /* This routine will check to see if we have lost a gateway. */ 155 void 156 ip_route_check(unsignedlongdaddr)
/* */ 157 { 158 } 159
160
161 #if 0
162 /* this routine puts the options at the end of an ip header. */ 163 staticint 164 build_options(structiphdr *iph, structoptions *opt)
/* */ 165 { 166 unsignedchar *ptr;
167 /* currently we don't support any options. */ 168 ptr = (unsignedchar *)(iph+1);
169 *ptr = 0;
170 return (4);
171 } 172 #endif 173
174
175 /* 176 * Take an skb, and fill in the MAC header. 177 */ 178
179 staticintip_send(structsk_buff *skb, unsignedlongdaddr, intlen, structdevice *dev, unsignedlongsaddr)
/* */ 180 { 181 intmac = 0;
182
183 skb->dev = dev;
184 skb->arp = 1;
185 if (dev->hard_header)
186 { 187 /* 188 * Build a hardware header. Source address is our mac, destination unknown 189 * (rebuild header will sort this out) 190 */ 191 mac = dev->hard_header(skb->data, dev, ETH_P_IP, NULL, NULL, len, skb);
192 if (mac < 0)
193 { 194 mac = -mac;
195 skb->arp = 0;
196 skb->raddr = daddr; /* next routing address */ 197 } 198 } 199 returnmac;
200 } 201
202 intip_id_count = 0;
203
204 /* 205 * This routine builds the appropriate hardware/IP headers for 206 * the routine. It assumes that if *dev != NULL then the 207 * protocol knows what it's doing, otherwise it uses the 208 * routing/ARP tables to select a device struct. 209 */ 210 intip_build_header(structsk_buff *skb, unsignedlongsaddr, unsignedlongdaddr,
/* */ 211 structdevice **dev, inttype, structoptions *opt, intlen, inttos, intttl)
212 { 213 staticstructoptionsoptmem;
214 structiphdr *iph;
215 structrtable *rt;
216 unsignedchar *buff;
217 unsignedlongraddr;
218 inttmp;
219 unsignedlongsrc;
220
221 buff = skb->data;
222
223 /* 224 * See if we need to look up the device. 225 */ 226
227 if (*dev == NULL)
228 { 229 if(skb->localroute)
230 rt = ip_rt_local(daddr, &optmem, &src);
231 else 232 rt = ip_rt_route(daddr, &optmem, &src);
233 if (rt == NULL)
234 { 235 ip_statistics.IpOutNoRoutes++;
236 return(-ENETUNREACH);
237 } 238
239 *dev = rt->rt_dev;
240 /* 241 * If the frame is from us and going off machine it MUST MUST MUST 242 * have the output device ip address and never the loopback 243 */ 244 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
245 saddr = src;/*rt->rt_dev->pa_addr;*/ 246 raddr = rt->rt_gateway;
247
248 opt = &optmem;
249 } 250 else 251 { 252 /* 253 * We still need the address of the first hop. 254 */ 255 if(skb->localroute)
256 rt = ip_rt_local(daddr, &optmem, &src);
257 else 258 rt = ip_rt_route(daddr, &optmem, &src);
259 /* 260 * If the frame is from us and going off machine it MUST MUST MUST 261 * have the output device ip address and never the loopback 262 */ 263 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
264 saddr = src;/*rt->rt_dev->pa_addr;*/ 265
266 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
267 } 268
269 /* 270 * No source addr so make it our addr 271 */ 272 if (saddr == 0)
273 saddr = src;
274
275 /* 276 * No gateway so aim at the real destination 277 */ 278 if (raddr == 0)
279 raddr = daddr;
280
281 /* 282 * Now build the MAC header. 283 */ 284
285 tmp = ip_send(skb, raddr, len, *dev, saddr);
286 buff += tmp;
287 len -= tmp;
288
289 /* 290 * Book keeping 291 */ 292
293 skb->dev = *dev;
294 skb->saddr = saddr;
295 if (skb->sk)
296 skb->sk->saddr = saddr;
297
298 /* 299 * Now build the IP header. 300 */ 301
302 /* 303 * If we are using IPPROTO_RAW, then we don't need an IP header, since 304 * one is being supplied to us by the user 305 */ 306
307 if(type == IPPROTO_RAW)
308 return (tmp);
309
310 iph = (structiphdr *)buff;
311 iph->version = 4;
312 iph->tos = tos;
313 iph->frag_off = 0;
314 iph->ttl = ttl;
315 iph->daddr = daddr;
316 iph->saddr = saddr;
317 iph->protocol = type;
318 iph->ihl = 5;
319
320 /* Setup the IP options. */ 321 #ifdef Not_Yet_Avail
322 build_options(iph, opt);
323 #endif 324
325 return(20 + tmp); /* IP header plus MAC header size */ 326 } 327
328
329 staticint 330 do_options(structiphdr *iph, structoptions *opt)
/* */ 331 { 332 unsignedchar *buff;
333 intdone = 0;
334 inti, len = sizeof(structiphdr);
335
336 /* Zero out the options. */ 337 opt->record_route.route_size = 0;
338 opt->loose_route.route_size = 0;
339 opt->strict_route.route_size = 0;
340 opt->tstamp.ptr = 0;
341 opt->security = 0;
342 opt->compartment = 0;
343 opt->handling = 0;
344 opt->stream = 0;
345 opt->tcc = 0;
346 return(0);
347
348 /* Advance the pointer to start at the options. */ 349 buff = (unsignedchar *)(iph + 1);
350
351 /* Now start the processing. */ 352 while (!done && len < iph->ihl*4) switch(*buff) { 353 caseIPOPT_END:
354 done = 1;
355 break;
356 caseIPOPT_NOOP:
357 buff++;
358 len++;
359 break;
360 caseIPOPT_SEC:
361 buff++;
362 if (*buff != 11) return(1);
363 buff++;
364 opt->security = ntohs(*(unsignedshort *)buff);
365 buff += 2;
366 opt->compartment = ntohs(*(unsignedshort *)buff);
367 buff += 2;
368 opt->handling = ntohs(*(unsignedshort *)buff);
369 buff += 2;
370 opt->tcc = ((*buff) << 16) + ntohs(*(unsignedshort *)(buff+1));
371 buff += 3;
372 len += 11;
373 break;
374 caseIPOPT_LSRR:
375 buff++;
376 if ((*buff - 3)% 4 != 0) return(1);
377 len += *buff;
378 opt->loose_route.route_size = (*buff -3)/4;
379 buff++;
380 if (*buff % 4 != 0) return(1);
381 opt->loose_route.pointer = *buff/4 - 1;
382 buff++;
383 buff++;
384 for (i = 0; i < opt->loose_route.route_size; i++) { 385 if(i>=MAX_ROUTE)
386 return(1);
387 opt->loose_route.route[i] = *(unsignedlong *)buff;
388 buff += 4;
389 } 390 break;
391 caseIPOPT_SSRR:
392 buff++;
393 if ((*buff - 3)% 4 != 0) return(1);
394 len += *buff;
395 opt->strict_route.route_size = (*buff -3)/4;
396 buff++;
397 if (*buff % 4 != 0) return(1);
398 opt->strict_route.pointer = *buff/4 - 1;
399 buff++;
400 buff++;
401 for (i = 0; i < opt->strict_route.route_size; i++) { 402 if(i>=MAX_ROUTE)
403 return(1);
404 opt->strict_route.route[i] = *(unsignedlong *)buff;
405 buff += 4;
406 } 407 break;
408 caseIPOPT_RR:
409 buff++;
410 if ((*buff - 3)% 4 != 0) return(1);
411 len += *buff;
412 opt->record_route.route_size = (*buff -3)/4;
413 buff++;
414 if (*buff % 4 != 0) return(1);
415 opt->record_route.pointer = *buff/4 - 1;
416 buff++;
417 buff++;
418 for (i = 0; i < opt->record_route.route_size; i++) { 419 if(i>=MAX_ROUTE)
420 return 1;
421 opt->record_route.route[i] = *(unsignedlong *)buff;
422 buff += 4;
423 } 424 break;
425 caseIPOPT_SID:
426 len += 4;
427 buff +=2;
428 opt->stream = *(unsignedshort *)buff;
429 buff += 2;
430 break;
431 caseIPOPT_TIMESTAMP:
432 buff++;
433 len += *buff;
434 if (*buff % 4 != 0) return(1);
435 opt->tstamp.len = *buff / 4 - 1;
436 buff++;
437 if ((*buff - 1) % 4 != 0) return(1);
438 opt->tstamp.ptr = (*buff-1)/4;
439 buff++;
440 opt->tstamp.x.full_char = *buff;
441 buff++;
442 for (i = 0; i < opt->tstamp.len; i++) { 443 opt->tstamp.data[i] = *(unsignedlong *)buff;
444 buff += 4;
445 } 446 break;
447 default:
448 return(1);
449 } 450
451 if (opt->record_route.route_size == 0) { 452 if (opt->strict_route.route_size != 0) { 453 memcpy(&(opt->record_route), &(opt->strict_route),
454 sizeof(opt->record_route));
455 }elseif (opt->loose_route.route_size != 0) { 456 memcpy(&(opt->record_route), &(opt->loose_route),
457 sizeof(opt->record_route));
458 } 459 } 460
461 if (opt->strict_route.route_size != 0 &&
462 opt->strict_route.route_size != opt->strict_route.pointer) { 463 strict_route(iph, opt);
464 return(0);
465 } 466
467 if (opt->loose_route.route_size != 0 &&
468 opt->loose_route.route_size != opt->loose_route.pointer) { 469 loose_route(iph, opt);
470 return(0);
471 } 472
473 return(0);
474 } 475
476 /* 477 * This is a version of ip_compute_csum() optimized for IP headers, which 478 * always checksum on 4 octet boundaries. 479 */ 480
481 staticinlineunsignedshortip_fast_csum(unsignedchar * buff, intwlen)
/* */ 482 { 483 unsignedlongsum = 0;
484
485 if (wlen)
486 { 487 unsignedlongbogus;
488 __asm__("clc\n"
489 "1:\t"
490 "lodsl\n\t"
491 "adcl %3, %0\n\t"
492 "decl %2\n\t"
493 "jne 1b\n\t"
494 "adcl $0, %0\n\t"
495 "movl %0, %3\n\t"
496 "shrl $16, %3\n\t"
497 "addw %w3, %w0\n\t"
498 "adcw $0, %w0"
499 : "=r" (sum), "=S" (buff), "=r" (wlen), "=a" (bogus)
500 : "0" (sum), "1" (buff), "2" (wlen));
501 } 502 return (~sum) & 0xffff;
503 } 504
505 /* 506 * This routine does all the checksum computations that don't 507 * require anything special (like copying or special headers). 508 */ 509
510 unsignedshortip_compute_csum(unsignedchar * buff, intlen)
/* */ 511 { 512 unsignedlongsum = 0;
513
514 /* Do the first multiple of 4 bytes and convert to 16 bits. */ 515 if (len > 3)
516 { 517 __asm__("clc\n"
518 "1:\t"
519 "lodsl\n\t"
520 "adcl %%eax, %%ebx\n\t"
521 "loop 1b\n\t"
522 "adcl $0, %%ebx\n\t"
523 "movl %%ebx, %%eax\n\t"
524 "shrl $16, %%eax\n\t"
525 "addw %%ax, %%bx\n\t"
526 "adcw $0, %%bx"
527 : "=b" (sum) , "=S" (buff)
528 : "0" (sum), "c" (len >> 2) ,"1" (buff)
529 : "ax", "cx", "si", "bx" );
530 } 531 if (len & 2)
532 { 533 __asm__("lodsw\n\t"
534 "addw %%ax, %%bx\n\t"
535 "adcw $0, %%bx"
536 : "=b" (sum), "=S" (buff)
537 : "0" (sum), "1" (buff)
538 : "bx", "ax", "si");
539 } 540 if (len & 1)
541 { 542 __asm__("lodsb\n\t"
543 "movb $0, %%ah\n\t"
544 "addw %%ax, %%bx\n\t"
545 "adcw $0, %%bx"
546 : "=b" (sum), "=S" (buff)
547 : "0" (sum), "1" (buff)
548 : "bx", "ax", "si");
549 } 550 sum =~sum;
551 return(sum & 0xffff);
552 } 553
554 /* 555 * Check the header of an incoming IP datagram. This version is still used in slhc.c. 556 */ 557
558 intip_csum(structiphdr *iph)
/* */ 559 { 560 returnip_fast_csum((unsignedchar *)iph, iph->ihl);
561 } 562
563 /* 564 * Generate a checksum for an outgoing IP datagram. 565 */ 566
567 staticvoidip_send_check(structiphdr *iph)
/* */ 568 { 569 iph->check = 0;
570 iph->check = ip_fast_csum((unsignedchar *)iph, iph->ihl);
571 } 572
573 /************************ Fragment Handlers From NET2E not yet with tweaks to beat 4K **********************************/ 574
575
576 /* 577 * This fragment handler is a bit of a heap. On the other hand it works quite 578 * happily and handles things quite well. 579 */ 580
581 staticstructipq *ipqueue = NULL; /* IP fragment queue */ 582
583 /* 584 * Create a new fragment entry. 585 */ 586
587 staticstructipfrag *ip_frag_create(intoffset, intend, structsk_buff *skb, unsignedchar *ptr)
/* */ 588 { 589 structipfrag *fp;
590
591 fp = (structipfrag *) kmalloc(sizeof(structipfrag), GFP_ATOMIC);
592 if (fp == NULL)
593 { 594 printk("IP: frag_create: no memory left !\n");
595 return(NULL);
596 } 597 memset(fp, 0, sizeof(structipfrag));
598
599 /* Fill in the structure. */ 600 fp->offset = offset;
601 fp->end = end;
602 fp->len = end - offset;
603 fp->skb = skb;
604 fp->ptr = ptr;
605
606 return(fp);
607 } 608
609
610 /* 611 * Find the correct entry in the "incomplete datagrams" queue for 612 * this IP datagram, and return the queue entry address if found. 613 */ 614
615 staticstructipq *ip_find(structiphdr *iph)
/* */ 616 { 617 structipq *qp;
618 structipq *qplast;
619
620 cli();
621 qplast = NULL;
622 for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
623 { 624 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
625 iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
626 { 627 del_timer(&qp->timer); /* So it doesn't vanish on us. The timer will be reset anyway */ 628 sti();
629 return(qp);
630 } 631 } 632 sti();
633 return(NULL);
634 } 635
636
637 /* 638 * Remove an entry from the "incomplete datagrams" queue, either 639 * because we completed, reassembled and processed it, or because 640 * it timed out. 641 */ 642
643 staticvoidip_free(structipq *qp)
/* */ 644 { 645 structipfrag *fp;
646 structipfrag *xp;
647
648 /* 649 * Stop the timer for this entry. 650 */ 651
652 del_timer(&qp->timer);
653
654 /* Remove this entry from the "incomplete datagrams" queue. */ 655 cli();
656 if (qp->prev == NULL)
657 { 658 ipqueue = qp->next;
659 if (ipqueue != NULL)
660 ipqueue->prev = NULL;
661 } 662 else 663 { 664 qp->prev->next = qp->next;
665 if (qp->next != NULL)
666 qp->next->prev = qp->prev;
667 } 668
669 /* Release all fragment data. */ 670
671 fp = qp->fragments;
672 while (fp != NULL)
673 { 674 xp = fp->next;
675 IS_SKB(fp->skb);
676 kfree_skb(fp->skb,FREE_READ);
677 kfree_s(fp, sizeof(structipfrag));
678 fp = xp;
679 } 680
681 /* Release the MAC header. */ 682 kfree_s(qp->mac, qp->maclen);
683
684 /* Release the IP header. */ 685 kfree_s(qp->iph, qp->ihlen + 8);
686
687 /* Finally, release the queue descriptor itself. */ 688 kfree_s(qp, sizeof(structipq));
689 sti();
690 } 691
692
693 /* 694 * Oops- a fragment queue timed out. Kill it and send an ICMP reply. 695 */ 696
697 staticvoidip_expire(unsignedlongarg)
/* */ 698 { 699 structipq *qp;
700
701 qp = (structipq *)arg;
702
703 /* 704 * Send an ICMP "Fragment Reassembly Timeout" message. 705 */ 706
707 ip_statistics.IpReasmTimeout++;
708 ip_statistics.IpReasmFails++;
709 /* This if is always true... shrug */ 710 if(qp->fragments!=NULL)
711 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
712 ICMP_EXC_FRAGTIME, qp->dev);
713
714 /* 715 * Nuke the fragment queue. 716 */ 717 ip_free(qp);
718 } 719
720
721 /* 722 * Add an entry to the 'ipq' queue for a newly received IP datagram. 723 * We will (hopefully :-) receive all other fragments of this datagram 724 * in time, so we just create a queue for this datagram, in which we 725 * will insert the received fragments at their respective positions. 726 */ 727
728 staticstructipq *ip_create(structsk_buff *skb, structiphdr *iph, structdevice *dev)
/* */ 729 { 730 structipq *qp;
731 intmaclen;
732 intihlen;
733
734 qp = (structipq *) kmalloc(sizeof(structipq), GFP_ATOMIC);
735 if (qp == NULL)
736 { 737 printk("IP: create: no memory left !\n");
738 return(NULL);
739 skb->dev = qp->dev;
740 } 741 memset(qp, 0, sizeof(structipq));
742
743 /* 744 * Allocate memory for the MAC header. 745 * 746 * FIXME: We have a maximum MAC address size limit and define 747 * elsewhere. We should use it here and avoid the 3 kmalloc() calls 748 */ 749
750 maclen = ((unsignedlong) iph) - ((unsignedlong) skb->data);
751 qp->mac = (unsignedchar *) kmalloc(maclen, GFP_ATOMIC);
752 if (qp->mac == NULL)
753 { 754 printk("IP: create: no memory left !\n");
755 kfree_s(qp, sizeof(structipq));
756 return(NULL);
757 } 758
759 /* 760 * Allocate memory for the IP header (plus 8 octets for ICMP). 761 */ 762
763 ihlen = (iph->ihl * sizeof(unsignedlong));
764 qp->iph = (structiphdr *) kmalloc(ihlen + 8, GFP_ATOMIC);
765 if (qp->iph == NULL)
766 { 767 printk("IP: create: no memory left !\n");
768 kfree_s(qp->mac, maclen);
769 kfree_s(qp, sizeof(structipq));
770 return(NULL);
771 } 772
773 /* Fill in the structure. */ 774 memcpy(qp->mac, skb->data, maclen);
775 memcpy(qp->iph, iph, ihlen + 8);
776 qp->len = 0;
777 qp->ihlen = ihlen;
778 qp->maclen = maclen;
779 qp->fragments = NULL;
780 qp->dev = dev;
781
782 /* Start a timer for this entry. */ 783 qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */ 784 qp->timer.data = (unsignedlong) qp; /* pointer to queue */ 785 qp->timer.function = ip_expire; /* expire function */ 786 add_timer(&qp->timer);
787
788 /* Add this entry to the queue. */ 789 qp->prev = NULL;
790 cli();
791 qp->next = ipqueue;
792 if (qp->next != NULL)
793 qp->next->prev = qp;
794 ipqueue = qp;
795 sti();
796 return(qp);
797 } 798
799
800 /* 801 * See if a fragment queue is complete. 802 */ 803
804 staticintip_done(structipq *qp)
/* */ 805 { 806 structipfrag *fp;
807 intoffset;
808
809 /* Only possible if we received the final fragment. */ 810 if (qp->len == 0)
811 return(0);
812
813 /* Check all fragment offsets to see if they connect. */ 814 fp = qp->fragments;
815 offset = 0;
816 while (fp != NULL)
817 { 818 if (fp->offset > offset)
819 return(0); /* fragment(s) missing */ 820 offset = fp->end;
821 fp = fp->next;
822 } 823
824 /* All fragments are present. */ 825 return(1);
826 } 827
828
829 /* 830 * Build a new IP datagram from all its fragments. 831 * 832 * FIXME: We copy here because we lack an effective way of handling lists 833 * of bits on input. Until the new skb data handling is in I'm not going 834 * to touch this with a bargepole. This also causes a 4Kish limit on 835 * packet sizes. 836 */ 837
838 staticstructsk_buff *ip_glue(structipq *qp)
/* */ 839 { 840 structsk_buff *skb;
841 structiphdr *iph;
842 structipfrag *fp;
843 unsignedchar *ptr;
844 intcount, len;
845
846 /* 847 * Allocate a new buffer for the datagram. 848 */ 849
850 len = qp->maclen + qp->ihlen + qp->len;
851
852 if ((skb = alloc_skb(len,GFP_ATOMIC)) == NULL)
853 { 854 ip_statistics.IpReasmFails++;
855 printk("IP: queue_glue: no memory for gluing queue 0x%X\n", (int) qp);
856 ip_free(qp);
857 return(NULL);
858 } 859
860 /* Fill in the basic details. */ 861 skb->len = (len - qp->maclen);
862 skb->h.raw = skb->data;
863 skb->free = 1;
864
865 /* Copy the original MAC and IP headers into the new buffer. */ 866 ptr = (unsignedchar *) skb->h.raw;
867 memcpy(ptr, ((unsignedchar *) qp->mac), qp->maclen);
868 ptr += qp->maclen;
869 memcpy(ptr, ((unsignedchar *) qp->iph), qp->ihlen);
870 ptr += qp->ihlen;
871 skb->h.raw += qp->maclen;
872
873 count = 0;
874
875 /* Copy the data portions of all fragments into the new buffer. */ 876 fp = qp->fragments;
877 while(fp != NULL)
878 { 879 if(count+fp->len > skb->len)
880 { 881 printk("Invalid fragment list: Fragment over size.\n");
882 ip_free(qp);
883 kfree_skb(skb,FREE_WRITE);
884 ip_statistics.IpReasmFails++;
885 returnNULL;
886 } 887 memcpy((ptr + fp->offset), fp->ptr, fp->len);
888 count += fp->len;
889 fp = fp->next;
890 } 891
892 /* We glued together all fragments, so remove the queue entry. */ 893 ip_free(qp);
894
895 /* Done with all fragments. Fixup the new IP header. */ 896 iph = skb->h.iph;
897 iph->frag_off = 0;
898 iph->tot_len = htons((iph->ihl * sizeof(unsignedlong)) + count);
899 skb->ip_hdr = iph;
900
901 ip_statistics.IpReasmOKs++;
902 return(skb);
903 } 904
905
906 /* 907 * Process an incoming IP datagram fragment. 908 */ 909
910 staticstructsk_buff *ip_defrag(structiphdr *iph, structsk_buff *skb, structdevice *dev)
/* */ 911 { 912 structipfrag *prev, *next;
913 structipfrag *tfp;
914 structipq *qp;
915 structsk_buff *skb2;
916 unsignedchar *ptr;
917 intflags, offset;
918 inti, ihl, end;
919
920 ip_statistics.IpReasmReqds++;
921
922 /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */ 923 qp = ip_find(iph);
924
925 /* Is this a non-fragmented datagram? */ 926 offset = ntohs(iph->frag_off);
927 flags = offset & ~IP_OFFSET;
928 offset &= IP_OFFSET;
929 if (((flags & IP_MF) == 0) && (offset == 0))
930 { 931 if (qp != NULL)
932 ip_free(qp); /* Huh? How could this exist?? */ 933 return(skb);
934 } 935
936 offset <<= 3; /* offset is in 8-byte chunks */ 937
938 /* 939 * If the queue already existed, keep restarting its timer as long 940 * as we still are receiving fragments. Otherwise, create a fresh 941 * queue entry. 942 */ 943
944 if (qp != NULL)
945 { 946 del_timer(&qp->timer);
947 qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */ 948 qp->timer.data = (unsignedlong) qp; /* pointer to queue */ 949 qp->timer.function = ip_expire; /* expire function */ 950 add_timer(&qp->timer);
951 } 952 else 953 { 954 /* 955 * If we failed to create it, then discard the frame 956 */ 957 if ((qp = ip_create(skb, iph, dev)) == NULL)
958 { 959 skb->sk = NULL;
960 kfree_skb(skb, FREE_READ);
961 ip_statistics.IpReasmFails++;
962 returnNULL;
963 } 964 } 965
966 /* 967 * Determine the position of this fragment. 968 */ 969
970 ihl = (iph->ihl * sizeof(unsignedlong));
971 end = offset + ntohs(iph->tot_len) - ihl;
972
973 /* 974 * Point into the IP datagram 'data' part. 975 */ 976
977 ptr = skb->data + dev->hard_header_len + ihl;
978
979 /* 980 * Is this the final fragment? 981 */ 982
983 if ((flags & IP_MF) == 0)
984 qp->len = end;
985
986 /* 987 * Find out which fragments are in front and at the back of us 988 * in the chain of fragments so far. We must know where to put 989 * this fragment, right? 990 */ 991
992 prev = NULL;
993 for(next = qp->fragments; next != NULL; next = next->next)
994 { 995 if (next->offset > offset)
996 break; /* bingo! */ 997 prev = next;
998 } 999
1000 /*1001 * We found where to put this one.1002 * Check for overlap with preceding fragment, and, if needed,1003 * align things so that any overlaps are eliminated.1004 */1005 if (prev != NULL && offset < prev->end)
1006 {1007 i = prev->end - offset;
1008 offset += i; /* ptr into datagram */1009 ptr += i; /* ptr into fragment data */1010 }1011
1012 /*1013 * Look for overlap with succeeding segments.1014 * If we can merge fragments, do it.1015 */1016
1017 for(; next != NULL; next = tfp)
1018 {1019 tfp = next->next;
1020 if (next->offset >= end)
1021 break; /* no overlaps at all */1022
1023 i = end - next->offset; /* overlap is 'i' bytes */1024 next->len -= i; /* so reduce size of */1025 next->offset += i; /* next fragment */1026 next->ptr += i;
1027
1028 /*1029 * If we get a frag size of <= 0, remove it and the packet1030 * that it goes with.1031 */1032 if (next->len <= 0)
1033 {1034 if (next->prev != NULL)
1035 next->prev->next = next->next;
1036 else1037 qp->fragments = next->next;
1038
1039 if (tfp->next != NULL)
1040 next->next->prev = next->prev;
1041
1042 kfree_skb(next->skb,FREE_READ);
1043 kfree_s(next, sizeof(structipfrag));
1044 }1045 }1046
1047 /*1048 * Insert this fragment in the chain of fragments.1049 */1050
1051 tfp = NULL;
1052 tfp = ip_frag_create(offset, end, skb, ptr);
1053
1054 /*1055 * No memory to save the fragment - so throw the lot1056 */1057
1058 if (!tfp)
1059 {1060 skb->sk = NULL;
1061 kfree_skb(skb, FREE_READ);
1062 returnNULL;
1063 }1064 tfp->prev = prev;
1065 tfp->next = next;
1066 if (prev != NULL)
1067 prev->next = tfp;
1068 else1069 qp->fragments = tfp;
1070
1071 if (next != NULL)
1072 next->prev = tfp;
1073
1074 /*1075 * OK, so we inserted this new fragment into the chain.1076 * Check if we now have a full IP datagram which we can1077 * bump up to the IP layer...1078 */1079
1080 if (ip_done(qp))
1081 {1082 skb2 = ip_glue(qp); /* glue together the fragments */1083 return(skb2);
1084 }1085 return(NULL);
1086 }1087
1088
1089 /*1090 * This IP datagram is too large to be sent in one piece. Break it up into1091 * smaller pieces (each of size equal to the MAC header plus IP header plus1092 * a block of the data of the original IP data part) that will yet fit in a1093 * single device frame, and queue such a frame for sending by calling the1094 * ip_queue_xmit(). Note that this is recursion, and bad things will happen1095 * if this function causes a loop...1096 *1097 * Yes this is inefficient, feel free to submit a quicker one.1098 *1099 * **Protocol Violation**1100 * We copy all the options to each fragment. !FIXME!1101 */1102 voidip_fragment(structsock *sk, structsk_buff *skb, structdevice *dev, intis_frag)
/* */1103 {1104 structiphdr *iph;
1105 unsignedchar *raw;
1106 unsignedchar *ptr;
1107 structsk_buff *skb2;
1108 intleft, mtu, hlen, len;
1109 intoffset;
1110 unsignedlongflags;
1111
1112 /*1113 * Point into the IP datagram header.1114 */1115
1116 raw = skb->data;
1117 iph = (structiphdr *) (raw + dev->hard_header_len);
1118
1119 skb->ip_hdr = iph;
1120
1121 /*1122 * Setup starting values.1123 */1124
1125 hlen = (iph->ihl * sizeof(unsignedlong));
1126 left = ntohs(iph->tot_len) - hlen; /* Space per frame */1127 hlen += dev->hard_header_len; /* Total header size */1128 mtu = (dev->mtu - hlen); /* Size of data space */1129 ptr = (raw + hlen); /* Where to start from */1130
1131 /*1132 * Check for any "DF" flag. [DF means do not fragment]1133 */1134
1135 if (ntohs(iph->frag_off) & IP_DF)
1136 {1137 ip_statistics.IpFragFails++;
1138 icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev);
1139 return;
1140 }1141
1142 /*1143 * The protocol doesn't seem to say what to do in the case that the1144 * frame + options doesn't fit the mtu. As it used to fall down dead1145 * in this case we were fortunate it didn't happen1146 */1147
1148 if(mtu<8)
1149 {1150 /* It's wrong but its better than nothing */1151 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev);
1152 ip_statistics.IpFragFails++;
1153 return;
1154 }1155
1156 /*1157 * Fragment the datagram.1158 */1159
1160 /*1161 * The initial offset is 0 for a complete frame. When1162 * fragmenting fragments its wherever this one starts.1163 */1164
1165 if (is_frag & 2)
1166 offset = (ntohs(iph->frag_off) & 0x1fff) << 3;
1167 else1168 offset = 0;
1169
1170
1171 /*1172 * Keep copying data until we run out.1173 */1174
1175 while(left > 0)
1176 {1177 len = left;
1178 /* IF: it doesn't fit, use 'mtu' - the data space left */1179 if (len > mtu)
1180 len = mtu;
1181 /* IF: we are not sending upto and including the packet end1182 then align the next start on an eight byte boundary */1183 if (len < left)
1184 {1185 len/=8;
1186 len*=8;
1187 }1188 /*1189 * Allocate buffer.1190 */1191
1192 if ((skb2 = alloc_skb(len + hlen,GFP_ATOMIC)) == NULL)
1193 {1194 printk("IP: frag: no memory for new fragment!\n");
1195 ip_statistics.IpFragFails++;
1196 return;
1197 }1198
1199 /*1200 * Set up data on packet1201 */1202
1203 skb2->arp = skb->arp;
1204 if(skb->free==0)
1205 printk("IP fragmenter: BUG free!=1 in fragmenter\n");
1206 skb2->free = 1;
1207 skb2->len = len + hlen;
1208 skb2->h.raw=(char *) skb2->data;
1209 /*1210 * Charge the memory for the fragment to any owner1211 * it might possess1212 */1213
1214 save_flags(flags);
1215 if (sk)
1216 {1217 cli();
1218 sk->wmem_alloc += skb2->mem_len;
1219 skb2->sk=sk;
1220 }1221 restore_flags(flags);
1222 skb2->raddr = skb->raddr; /* For rebuild_header - must be here */1223
1224 /*1225 * Copy the packet header into the new buffer.1226 */1227
1228 memcpy(skb2->h.raw, raw, hlen);
1229
1230 /*1231 * Copy a block of the IP datagram.1232 */1233 memcpy(skb2->h.raw + hlen, ptr, len);
1234 left -= len;
1235
1236 skb2->h.raw+=dev->hard_header_len;
1237
1238 /*1239 * Fill in the new header fields.1240 */1241 iph = (structiphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
1242 iph->frag_off = htons((offset >> 3));
1243 /*1244 * Added AC : If we are fragmenting a fragment thats not the1245 * last fragment then keep MF on each bit1246 */1247 if (left > 0 || (is_frag & 1))
1248 iph->frag_off |= htons(IP_MF);
1249 ptr += len;
1250 offset += len;
1251
1252 /*1253 * Put this fragment into the sending queue.1254 */1255
1256 ip_statistics.IpFragCreates++;
1257
1258 ip_queue_xmit(sk, dev, skb2, 2);
1259 }1260 ip_statistics.IpFragOKs++;
1261 }1262
1263
1264
1265 #ifdefCONFIG_IP_FORWARD1266
1267 /*1268 * Forward an IP datagram to its next destination.1269 */1270
1271 staticvoidip_forward(structsk_buff *skb, structdevice *dev, intis_frag)
/* */1272 {1273 structdevice *dev2; /* Output device */1274 structiphdr *iph; /* Our header */1275 structsk_buff *skb2; /* Output packet */1276 structrtable *rt; /* Route we use */1277 unsignedchar *ptr; /* Data pointer */1278 unsignedlongraddr; /* Router IP address */1279
1280 /* 1281 * See if we are allowed to forward this.1282 */1283
1284 #ifdefCONFIG_IP_FIREWALL1285 if(!ip_fw_chk(skb->h.iph, ip_fw_fwd_chain))
1286 {1287 return;
1288 }1289 #endif1290 /*1291 * According to the RFC, we must first decrease the TTL field. If1292 * that reaches zero, we must reply an ICMP control message telling1293 * that the packet's lifetime expired.1294 *1295 * Exception:1296 * We may not generate an ICMP for an ICMP. icmp_send does the1297 * enforcement of this so we can forget it here. It is however1298 * sometimes VERY important.1299 */1300
1301 iph = skb->h.iph;
1302 iph->ttl--;
1303 if (iph->ttl <= 0)
1304 {1305 /* Tell the sender its packet died... */1306 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, dev);
1307 return;
1308 }1309
1310 /*1311 * Re-compute the IP header checksum.1312 * This is inefficient. We know what has happened to the header1313 * and could thus adjust the checksum as Phil Karn does in KA9Q1314 */1315
1316 ip_send_check(iph);
1317
1318 /*1319 * OK, the packet is still valid. Fetch its destination address,1320 * and give it to the IP sender for further processing.1321 */1322
1323 rt = ip_rt_route(iph->daddr, NULL, NULL);
1324 if (rt == NULL)
1325 {1326 /*1327 * Tell the sender its packet cannot be delivered. Again1328 * ICMP is screened later.1329 */1330 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, dev);
1331 return;
1332 }1333
1334
1335 /*1336 * Gosh. Not only is the packet valid; we even know how to1337 * forward it onto its final destination. Can we say this1338 * is being plain lucky?1339 * If the router told us that there is no GW, use the dest.1340 * IP address itself- we seem to be connected directly...1341 */1342
1343 raddr = rt->rt_gateway;
1344
1345 if (raddr != 0)
1346 {1347 /*1348 * There is a gateway so find the correct route for it.1349 * Gateways cannot in turn be gatewayed.1350 */1351 rt = ip_rt_route(raddr, NULL, NULL);
1352 if (rt == NULL)
1353 {1354 /*1355 * Tell the sender its packet cannot be delivered...1356 */1357 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, dev);
1358 return;
1359 }1360 if (rt->rt_gateway != 0)
1361 raddr = rt->rt_gateway;
1362 }1363 else1364 raddr = iph->daddr;
1365
1366 /*1367 * Having picked a route we can now send the frame out.1368 */1369
1370 dev2 = rt->rt_dev;
1371
1372 /*1373 * In IP you never forward a frame on the interface that it arrived1374 * upon. We should generate an ICMP HOST REDIRECT giving the route1375 * we calculated.1376 * For now just dropping the packet is an acceptable compromise.1377 */1378
1379 if (dev == dev2)
1380 return;
1381
1382 /*1383 * We now allocate a new buffer, and copy the datagram into it.1384 * If the indicated interface is up and running, kick it.1385 */1386
1387 if (dev2->flags & IFF_UP)
1388 {1389
1390 /*1391 * Current design decrees we copy the packet. For identical header1392 * lengths we could avoid it. The new skb code will let us push1393 * data so the problem goes away then.1394 */1395
1396 skb2 = alloc_skb(dev2->hard_header_len + skb->len, GFP_ATOMIC);
1397 /*1398 * This is rare and since IP is tolerant of network failures1399 * quite harmless.1400 */1401 if (skb2 == NULL)
1402 {1403 printk("\nIP: No memory available for IP forward\n");
1404 return;
1405 }1406 ptr = skb2->data;
1407 skb2->free = 1;
1408 skb2->len = skb->len + dev2->hard_header_len;
1409 skb2->h.raw = ptr;
1410
1411 /*1412 * Copy the packet data into the new buffer.1413 */1414 memcpy(ptr + dev2->hard_header_len, skb->h.raw, skb->len);
1415
1416 /* Now build the MAC header. */1417 (void) ip_send(skb2, raddr, skb->len, dev2, dev2->pa_addr);
1418
1419 ip_statistics.IpForwDatagrams++;
1420
1421 /*1422 * See if it needs fragmenting. Note in ip_rcv we tagged1423 * the fragment type. This must be right so that1424 * the fragmenter does the right thing.1425 */1426
1427 if(skb2->len > dev2->mtu + dev2->hard_header_len)
1428 {1429 ip_fragment(NULL,skb2,dev2, is_frag);
1430 kfree_skb(skb2,FREE_WRITE);
1431 }1432 else1433 {1434 #ifdefCONFIG_IP_ACCT1435 /*1436 * Count mapping we shortcut1437 */1438
1439 ip_acct_cnt(iph,ip_acct_chain,1);
1440 #endif1441
1442 /*1443 * Map service types to priority. We lie about1444 * throughput being low priority, but its a good1445 * choice to help improve general usage.1446 */1447 if(iph->tos & IPTOS_LOWDELAY)
1448 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
1449 elseif(iph->tos & IPTOS_THROUGHPUT)
1450 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
1451 else1452 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
1453 }1454 }1455 }1456
1457
1458 #endif1459
1460 /*1461 * This function receives all incoming IP datagrams.1462 */1463
1464 intip_rcv(structsk_buff *skb, structdevice *dev, structpacket_type *pt)
/* */1465 {1466 structiphdr *iph = skb->h.iph;
1467 structsock *raw_sk=NULL;
1468 unsignedcharhash;
1469 unsignedcharflag = 0;
1470 unsignedcharopts_p = 0; /* Set iff the packet has options. */1471 structinet_protocol *ipprot;
1472 staticstructoptionsopt; /* since we don't use these yet, and they1473 take up stack space. */1474 intbrd=IS_MYADDR;
1475 intis_frag=0;
1476
1477 ip_statistics.IpInReceives++;
1478
1479 /*1480 * Tag the ip header of this packet so we can find it1481 */1482
1483 skb->ip_hdr = iph;
1484
1485 /*1486 * Is the datagram acceptable?1487 *1488 * 1. Length at least the size of an ip header1489 * 2. Version of 41490 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums]1491 * (4. We ought to check for IP multicast addresses and undefined types.. does this matter ?)1492 */1493
1494 if (skb->len<sizeof(structiphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsignedchar *)iph, iph->ihl) !=0)
1495 {1496 ip_statistics.IpInHdrErrors++;
1497 kfree_skb(skb, FREE_WRITE);
1498 return(0);
1499 }1500
1501 /*1502 * See if the firewall wants to dispose of the packet. 1503 */1504
1505 #ifdefCONFIG_IP_FIREWALL1506
1507 if(!LOOPBACK(iph->daddr) && !ip_fw_chk(iph,ip_fw_blk_chain))
1508 {1509 kfree_skb(skb, FREE_WRITE);
1510 return 0;
1511 }1512
1513 #endif1514
1515 /*1516 * Our transport medium may have padded the buffer out. Now we know it1517 * is IP we can trim to the true length of the frame.1518 */1519
1520 skb->len=ntohs(iph->tot_len);
1521
1522 /*1523 * Next analyse the packet for options. Studies show under one packet in1524 * a thousand have options....1525 */1526
1527 if (iph->ihl != 5)
1528 {/* Fast path for the typical optionless IP packet. */1529 memset((char *) &opt, 0, sizeof(opt));
1530 if (do_options(iph, &opt) != 0)
1531 return 0;
1532 opts_p = 1;
1533 }1534
1535 /*1536 * Remember if the frame is fragmented.1537 */1538
1539 if(iph->frag_off)
1540 {1541 if (iph->frag_off & 0x0020)
1542 is_frag|=1;
1543 /*1544 * Last fragment ?1545 */1546
1547 if (ntohs(iph->frag_off) & 0x1fff)
1548 is_frag|=2;
1549 }1550
1551 /*1552 * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday.1553 *1554 * This is inefficient. While finding out if it is for us we could also compute1555 * the routing table entry. This is where the great unified cache theory comes1556 * in as and when someone implements it1557 *1558 * For most hosts over 99% of packets match the first conditional1559 * and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at1560 * function entry.1561 */1562
1563 if ( iph->daddr != skb->dev->pa_addr && (brd = ip_chk_addr(iph->daddr)) == 0)
1564 {1565 /*1566 * Don't forward multicast or broadcast frames.1567 */1568
1569 if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST)
1570 {1571 kfree_skb(skb,FREE_WRITE);
1572 return 0;
1573 }1574
1575 /*1576 * The packet is for another target. Forward the frame1577 */1578
1579 #ifdefCONFIG_IP_FORWARD1580 ip_forward(skb, dev, is_frag);
1581 #else1582 /* printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n",1583 iph->saddr,iph->daddr);*/1584 ip_statistics.IpInAddrErrors++;
1585 #endif1586 /*1587 * The forwarder is inefficient and copies the packet. We1588 * free the original now.1589 */1590
1591 kfree_skb(skb, FREE_WRITE);
1592 return(0);
1593 }1594
1595 /*1596 * Account for the packet1597 */1598
1599 #ifdefCONFIG_IP_ACCT1600 ip_acct_cnt(iph,ip_acct_chain,1);
1601 #endif1602
1603 /*1604 * Reassemble IP fragments.1605 */1606
1607 if(is_frag)
1608 {1609 /* Defragment. Obtain the complete packet if there is one */1610 skb=ip_defrag(iph,skb,dev);
1611 if(skb==NULL)
1612 return 0;
1613 skb->dev = dev;
1614 iph=skb->h.iph;
1615 }1616
1617
1618
1619 /*1620 * Point into the IP datagram, just past the header.1621 */1622
1623 skb->ip_hdr = iph;
1624 skb->h.raw += iph->ihl*4;
1625
1626 /*1627 * Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies.1628 */1629
1630 hash = iph->protocol & (SOCK_ARRAY_SIZE-1);
1631
1632 /* If there maybe a raw socket we must check - if not we don't care less */1633 if((raw_sk=raw_prot.sock_array[hash])!=NULL)
1634 {1635 structsock *sknext=NULL;
1636 structsk_buff *skb1;
1637 raw_sk=get_sock_raw(raw_sk, hash, iph->saddr, iph->daddr);
1638 if(raw_sk) /* Any raw sockets */1639 {1640 do1641 {1642 /* Find the next */1643 sknext=get_sock_raw(raw_sk->next, hash, iph->saddr, iph->daddr);
1644 if(sknext)
1645 skb1=skb_clone(skb, GFP_ATOMIC);
1646 else1647 break; /* One pending raw socket left */1648 if(skb1)
1649 raw_rcv(raw_sk, skb1, dev, iph->saddr,iph->daddr);
1650 raw_sk=sknext;
1651 }1652 while(raw_sk!=NULL);
1653 /* Here either raw_sk is the last raw socket, or NULL if none */1654 /* We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy */1655 }1656 }1657
1658 /*1659 * skb->h.raw now points at the protocol beyond the IP header.1660 */1661
1662 hash = iph->protocol & (MAX_INET_PROTOS -1);
1663 for (ipprot = (structinet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(structinet_protocol *)ipprot->next)
1664 {1665 structsk_buff *skb2;
1666
1667 if (ipprot->protocol != iph->protocol)
1668 continue;
1669 /*1670 * See if we need to make a copy of it. This will1671 * only be set if more than one protocol wants it.1672 * and then not for the last one. If there is a pending1673 * raw delivery wait for that1674 */1675 if (ipprot->copy || raw_sk)
1676 {1677 skb2 = skb_clone(skb, GFP_ATOMIC);
1678 if(skb2==NULL)
1679 continue;
1680 }1681 else1682 {1683 skb2 = skb;
1684 }1685 flag = 1;
1686
1687 /*1688 * Pass on the datagram to each protocol that wants it,1689 * based on the datagram protocol. We should really1690 * check the protocol handler's return values here...1691 */1692 ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr,
1693 (ntohs(iph->tot_len) - (iph->ihl * 4)),
1694 iph->saddr, 0, ipprot);
1695
1696 }1697
1698 /*1699 * All protocols checked.1700 * If this packet was a broadcast, we may *not* reply to it, since that1701 * causes (proven, grin) ARP storms and a leakage of memory (i.e. all1702 * ICMP reply messages get queued up for transmission...)1703 */1704
1705 if(raw_sk!=NULL) /* Shift to last raw user */1706 raw_rcv(raw_sk, skb, dev, iph->saddr, iph->daddr);
1707 elseif (!flag) /* Free and report errors */1708 {1709 if (brd != IS_BROADCAST && brd!=IS_MULTICAST)
1710 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
1711 kfree_skb(skb, FREE_WRITE);
1712 }1713
1714 return(0);
1715 }1716
1717
1718 /*1719 * Queues a packet to be sent, and starts the transmitter1720 * if necessary. if free = 1 then we free the block after1721 * transmit, otherwise we don't. If free==2 we not only1722 * free the block but also don't assign a new ip seq number.1723 * This routine also needs to put in the total length,1724 * and compute the checksum1725 */1726
1727 voidip_queue_xmit(structsock *sk, structdevice *dev,
/* */1728 structsk_buff *skb, intfree)
1729 {1730 structiphdr *iph;
1731 unsignedchar *ptr;
1732
1733 /* Sanity check */1734 if (dev == NULL)
1735 {1736 printk("IP: ip_queue_xmit dev = NULL\n");
1737 return;
1738 }1739
1740 IS_SKB(skb);
1741
1742 /*1743 * Do some book-keeping in the packet for later1744 */1745
1746
1747 skb->dev = dev;
1748 skb->when = jiffies;
1749
1750 /*1751 * Find the IP header and set the length. This is bad1752 * but once we get the skb data handling code in the1753 * hardware will push its header sensibly and we will1754 * set skb->ip_hdr to avoid this mess and the fixed1755 * header length problem1756 */1757
1758 ptr = skb->data;
1759 ptr += dev->hard_header_len;
1760 iph = (structiphdr *)ptr;
1761 skb->ip_hdr = iph;
1762 iph->tot_len = ntohs(skb->len-dev->hard_header_len);
1763
1764 /*1765 * No reassigning numbers to fragments...1766 */1767
1768 if(free!=2)
1769 iph->id = htons(ip_id_count++);
1770 else1771 free=1;
1772
1773 /* All buffers without an owner socket get freed */1774 if (sk == NULL)
1775 free = 1;
1776
1777 skb->free = free;
1778
1779 /*1780 * Do we need to fragment. Again this is inefficient.1781 * We need to somehow lock the original buffer and use1782 * bits of it.1783 */1784
1785 if(skb->len > dev->mtu + dev->hard_header_len)
1786 {1787 ip_fragment(sk,skb,dev,0);
1788 IS_SKB(skb);
1789 kfree_skb(skb,FREE_WRITE);
1790 return;
1791 }1792
1793 /*1794 * Add an IP checksum1795 */1796
1797 ip_send_check(iph);
1798
1799 /*1800 * Print the frame when debugging1801 */1802
1803 /*1804 * More debugging. You cannot queue a packet already on a list1805 * Spot this and moan loudly.1806 */1807 if (skb->next != NULL)
1808 {1809 printk("ip_queue_xmit: next != NULL\n");
1810 skb_unlink(skb);
1811 }1812
1813 /*1814 * If a sender wishes the packet to remain unfreed1815 * we add it to his send queue. This arguably belongs1816 * in the TCP level since nobody else uses it. BUT1817 * remember IPng might change all the rules.1818 */1819
1820 if (!free)
1821 {1822 unsignedlongflags;
1823 /* The socket now has more outstanding blocks */1824
1825 sk->packets_out++;
1826
1827 /* Protect the list for a moment */1828 save_flags(flags);
1829 cli();
1830
1831 if (skb->link3 != NULL)
1832 {1833 printk("ip.c: link3 != NULL\n");
1834 skb->link3 = NULL;
1835 }1836 if (sk->send_head == NULL)
1837 {1838 sk->send_tail = skb;
1839 sk->send_head = skb;
1840 }1841 else1842 {1843 sk->send_tail->link3 = skb;
1844 sk->send_tail = skb;
1845 }1846 /* skb->link3 is NULL */1847
1848 /* Interrupt restore */1849 restore_flags(flags);
1850 /* Set the IP write timeout to the round trip time for the packet.1851 If an acknowledge has not arrived by then we may wish to act */1852 reset_timer(sk, TIME_WRITE, sk->rto);
1853 }1854 else1855 /* Remember who owns the buffer */1856 skb->sk = sk;
1857
1858 /*1859 * If the indicated interface is up and running, send the packet.1860 */1861 ip_statistics.IpOutRequests++;
1862 #ifdefCONFIG_IP_ACCT1863 ip_acct_cnt(iph,ip_acct_chain,1);
1864 #endif1865
1866 if (dev->flags & IFF_UP)
1867 {1868 /*1869 * If we have an owner use its priority setting,1870 * otherwise use NORMAL1871 */1872
1873 if (sk != NULL)
1874 {1875 dev_queue_xmit(skb, dev, sk->priority);
1876 }1877 else1878 {1879 dev_queue_xmit(skb, dev, SOPRI_NORMAL);
1880 }1881 }1882 else1883 {1884 ip_statistics.IpOutDiscards++;
1885 if (free)
1886 kfree_skb(skb, FREE_WRITE);
1887 }1888 }1889
1890
1891 /*1892 * A socket has timed out on its send queue and wants to do a1893 * little retransmitting. Currently this means TCP.1894 */1895
1896 voidip_do_retransmit(structsock *sk, intall)
/* */1897 {1898 structsk_buff * skb;
1899 structproto *prot;
1900 structdevice *dev;
1901
1902 prot = sk->prot;
1903 skb = sk->send_head;
1904
1905 while (skb != NULL)
1906 {1907 dev = skb->dev;
1908 IS_SKB(skb);
1909 skb->when = jiffies;
1910
1911 /*1912 * In general it's OK just to use the old packet. However we1913 * need to use the current ack and window fields. Urg and1914 * urg_ptr could possibly stand to be updated as well, but we1915 * don't keep the necessary data. That shouldn't be a problem,1916 * if the other end is doing the right thing. Since we're1917 * changing the packet, we have to issue a new IP identifier.1918 */1919
1920 /* this check may be unnecessary - retransmit only for TCP */1921 if (sk->protocol == IPPROTO_TCP) {1922 structtcphdr *th;
1923 structiphdr *iph;
1924 intsize;
1925
1926 iph = (structiphdr *)(skb->data + dev->hard_header_len);
1927 th = (structtcphdr *)(((char *)iph) + (iph->ihl << 2));
1928 size = skb->len - (((unsignedchar *) th) - skb->data);
1929
1930 iph->id = htons(ip_id_count++);
1931 ip_send_check(iph);
1932
1933 th->ack_seq = ntohl(sk->acked_seq);
1934 th->window = ntohs(tcp_select_window(sk));
1935 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
1936 }1937
1938 /*1939 * If the interface is (still) up and running, kick it.1940 */1941
1942 if (dev->flags & IFF_UP)
1943 {1944 /*1945 * If the packet is still being sent by the device/protocol1946 * below then don't retransmit. This is both needed, and good -1947 * especially with connected mode AX.25 where it stops resends1948 * occurring of an as yet unsent anyway frame!1949 * We still add up the counts as the round trip time wants1950 * adjusting.1951 */1952 if (sk && !skb_device_locked(skb))
1953 {1954 /* Remove it from any existing driver queue first! */1955 skb_unlink(skb);
1956 /* Now queue it */1957 ip_statistics.IpOutRequests++;
1958 dev_queue_xmit(skb, dev, sk->priority);
1959 }1960 }1961
1962 /*1963 * Count retransmissions1964 */1965 sk->retransmits++;
1966 sk->prot->retransmits ++;
1967
1968 /*1969 * Only one retransmit requested.1970 */1971 if (!all)
1972 break;
1973
1974 /*1975 * This should cut it off before we send too many packets.1976 */1977 if (sk->retransmits >= sk->cong_window)
1978 break;
1979 skb = skb->link3;
1980 }1981 }1982
1983 /*1984 * This is the normal code called for timeouts. It does the retransmission1985 * and then does backoff. ip_do_retransmit is separated out because1986 * tcp_ack needs to send stuff from the retransmit queue without1987 * initiating a backoff.1988 */1989
1990 voidip_retransmit(structsock *sk, intall)
/* */1991 {1992 ip_do_retransmit(sk, all);
1993
1994 /*1995 * Increase the timeout each time we retransmit. Note that1996 * we do not increase the rtt estimate. rto is initialized1997 * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests1998 * that doubling rto each time is the least we can get away with.1999 * In KA9Q, Karn uses this for the first few times, and then2000 * goes to quadratic. netBSD doubles, but only goes up to *64,2001 * and clamps at 1 to 64 sec afterwards. Note that 120 sec is2002 * defined in the protocol as the maximum possible RTT. I guess2003 * we'll have to use something other than TCP to talk to the2004 * University of Mars.2005 */2006
2007 sk->retransmits++;
2008 sk->backoff++;
2009 sk->rto = min(sk->rto << 1, 120*HZ);
2010 reset_timer(sk, TIME_WRITE, sk->rto);
2011 }2012
2013 #ifdefCONFIG_IP_MULTICAST2014
2015 /*2016 * Write an multicast group list table for the IGMP daemon to2017 * read.2018 */2019
2020 intip_mc_procinfo(char *buffer, char **start, off_toffset, intlength)
/* */2021 {2022 off_tpos=0, begin=0;
2023 structip_mc_list *im;
2024 unsignedlongflags;
2025 intlen=0;
2026
2027
2028 len=sprintf(buffer,"Device : Multicast\n");
2029 save_flags(flags);
2030 cli();
2031
2032 im=ip_mc_head;
2033
2034 while(im!=NULL)
2035 {2036 len+=sprintf(buffer+len,"%-10s: %08lX\n", im->interface->name, im->multiaddr);
2037 pos=begin+len;
2038 if(pos<offset)
2039 {2040 len=0;
2041 begin=pos;
2042 }2043 if(pos>offset+length)
2044 break;
2045 im=im->next;
2046 }2047 restore_flags(flags);
2048 *start=buffer+(offset-begin);
2049 len-=(offset-begin);
2050 if(len>length)
2051 len=length;
2052 returnlen;
2053 }2054
2055
2056 #endif2057 /*2058 * Socket option code for IP. This is the end of the line after any TCP,UDP etc options on2059 * an IP socket.2060 *2061 * We implement IP_TOS (type of service), IP_TTL (time to live).2062 *2063 * Next release we will sort out IP_OPTIONS since for some people are kind of important.2064 */2065
2066 intip_setsockopt(structsock *sk, intlevel, intoptname, char *optval, intoptlen)
/* */2067 {2068 intval,err;
2069 #ifdefined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT)
2070 structip_fwtmp_fw;
2071 #endif2072 if (optval == NULL)
2073 return(-EINVAL);
2074
2075 err=verify_area(VERIFY_READ, optval, sizeof(int));
2076 if(err)
2077 returnerr;
2078
2079 val = get_fs_long((unsignedlong *)optval);
2080
2081 if(level!=SOL_IP)
2082 return -EOPNOTSUPP;
2083
2084 switch(optname)
2085 {2086 caseIP_TOS:
2087 if(val<0||val>255)
2088 return -EINVAL;
2089 sk->ip_tos=val;
2090 if(val==IPTOS_LOWDELAY)
2091 sk->priority=SOPRI_INTERACTIVE;
2092 if(val==IPTOS_THROUGHPUT)
2093 sk->priority=SOPRI_BACKGROUND;
2094 return 0;
2095 caseIP_TTL:
2096 if(val<1||val>255)
2097 return -EINVAL;
2098 sk->ip_ttl=val;
2099 return 0;
2100 #ifdefCONFIG_IP_MULTICAST2101 caseIP_MULTICAST_TTL:
2102 {2103 unsignedcharucval;
2104
2105 ucval=get_fs_byte((unsignedchar *)optval);
2106 printk("MC TTL %d\n", ucval);
2107 if(ucval<1||ucval>255)
2108 return -EINVAL;
2109 sk->ip_mc_ttl=(int)ucval;
2110 return 0;
2111 }2112
2113 caseIP_MULTICAST_IF:
2114 {2115 /* Not fully tested */2116 structin_addraddr;
2117 structdevice *dev=NULL;
2118
2119 /*2120 * Check the arguments are allowable2121 */2122
2123 err=verify_area(VERIFY_READ, optval, sizeof(addr));
2124 if(err)
2125 returnerr;
2126
2127 memcpy_fromfs(&addr,optval,sizeof(addr));
2128
2129 printk("MC bind %s\n", in_ntoa(addr.s_addr));
2130
2131 /*2132 * What address has been requested2133 */2134
2135 if(addr.s_addr==INADDR_ANY) /* Default */2136 {2137 sk->ip_mc_name[0]=0;
2138 return 0;
2139 }2140
2141 /*2142 * Find the device2143 */2144
2145 for(dev = dev_base; dev; dev = dev->next)
2146 {2147 if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST)&&
2148 (dev->pa_addr==addr.s_addr))
2149 break;
2150 }2151
2152 /*2153 * Did we find one2154 */2155
2156 if(dev)
2157 {2158 strcpy(sk->ip_mc_name,dev->name);
2159 return 0;
2160 }2161 return -EADDRNOTAVAIL;
2162 }2163
2164 caseIP_ADD_MEMBERSHIP:
2165 {2166
2167 /*2168 * FIXME: Add/Del membership should have a semaphore protecting them from re-entry2169 */2170 structip_mreqmreq;
2171 staticstructoptionsoptmem;
2172 unsignedlongroute_src;
2173 structrtable *rt;
2174 structip_mc_list *l=NULL;
2175 structdevice *dev=NULL;
2176 intct=0;
2177
2178 /*2179 * Check the arguments.2180 */2181
2182 err=verify_area(VERIFY_READ, optval, sizeof(mreq));
2183 if(err)
2184 returnerr;
2185
2186 memcpy_fromfs(&mreq,optval,sizeof(mreq));
2187
2188 /* 2189 * Get device for use later2190 */2191
2192 if(mreq.imr_interface.s_addr==INADDR_ANY)
2193 {2194 /*2195 * Not set so scan.2196 */2197 if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,&optmem, &route_src))!=NULL)
2198 {2199 dev=rt->rt_dev;
2200 rt->rt_use--;
2201 }2202 }2203 else2204 {2205 /*2206 * Find a suitable device.2207 */2208 for(dev = dev_base; dev; dev = dev->next)
2209 {2210 if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST)&&
2211 (dev->pa_addr==mreq.imr_interface.s_addr))
2212 break;
2213 }2214 }2215
2216 /*2217 * No device, no cookies.2218 */2219
2220 if(!dev)
2221 return -ENODEV;
2222
2223 /*2224 * Join group.2225 */2226
2227 returnip_mc_join_group(sk,dev,mreq.imr_multiaddr.s_addr);
2228 }2229
2230 caseIP_DROP_MEMBERSHIP:
2231 {2232 structip_mreqmreq;
2233 structrtable *rt;
2234 staticstructoptionsoptmem;
2235 unsignedlongroute_src;
2236 structdevice *dev=NULL;
2237
2238 /*2239 * Check the arguments2240 */2241
2242 err=verify_area(VERIFY_READ, optval, sizeof(mreq));
2243 if(err)
2244 returnerr;
2245
2246 memcpy_fromfs(&mreq,optval,sizeof(mreq));
2247
2248 /*2249 * Get device for use later 2250 */2251
2252 if(mreq.imr_interface.s_addr==INADDR_ANY)
2253 {2254 if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,&optmem, &route_src))!=NULL)
2255 {2256 dev=rt->rt_dev;
2257 rt->rt_use--;
2258 }2259 }2260 else2261 {2262 for(dev = dev_base; dev; dev = dev->next)
2263 {2264 if((dev->flags&IFF_UP)&& (dev->flags&IFF_MULTICAST)&&
2265 (dev->pa_addr==mreq.imr_interface.s_addr))
2266 break;
2267 }2268 }2269
2270 /*2271 * Did we find a suitable device.2272 */2273
2274 if(!dev)
2275 return -ENODEV;
2276
2277 /*2278 * Leave group2279 */2280
2281 returnip_mc_leave_group(sk,dev,mreq.imr_multiaddr.s_addr);
2282 }2283 #endif2284 #ifdefCONFIG_IP_FIREWALL2285 caseIP_FW_ADD_BLK:
2286 caseIP_FW_DEL_BLK:
2287 caseIP_FW_ADD_FWD:
2288 caseIP_FW_DEL_FWD:
2289 caseIP_FW_CHK_BLK:
2290 caseIP_FW_CHK_FWD:
2291 caseIP_FW_FLUSH:
2292 caseIP_FW_POLICY:
2293 if(!suser())
2294 return -EPERM;
2295 if(optlen>sizeof(tmp_fw) || optlen<1)
2296 return -EINVAL;
2297 err=verify_area(VERIFY_READ,optval,optlen);
2298 if(err)
2299 returnerr;
2300 memcpy_fromfs(&tmp_fw,optval,optlen);
2301 err=ip_fw_ctl(optname, &tmp_fw,optlen);
2302 return -err; /* -0 is 0 after all */2303
2304 #endif2305 #ifdefCONFIG_IP_ACCT2306 caseIP_ACCT_DEL:
2307 caseIP_ACCT_ADD:
2308 caseIP_ACCT_FLUSH:
2309 caseIP_ACCT_ZERO:
2310 if(!suser())
2311 return -EPERM;
2312 if(optlen>sizeof(tmp_fw) || optlen<1)
2313 return -EINVAL;
2314 err=verify_area(VERIFY_READ,optval,optlen);
2315 if(err)
2316 returnerr;
2317 memcpy_fromfs(&tmp_fw, optval,optlen);
2318 err=ip_acct_ctl(optname, &tmp_fw,optlen);
2319 return -err; /* -0 is 0 after all */2320 #endif2321 /* IP_OPTIONS and friends go here eventually */2322 default:
2323 return(-ENOPROTOOPT);
2324 }2325 }2326
2327 /*2328 * Get the options. Note for future reference. The GET of IP options gets the2329 * _received_ ones. The set sets the _sent_ ones.2330 */2331
2332 intip_getsockopt(structsock *sk, intlevel, intoptname, char *optval, int *optlen)
/* */2333 {2334 intval,err;
2335 #ifdefCONFIG_IP_MULTICAST2336 intlen;
2337 #endif2338
2339 if(level!=SOL_IP)
2340 return -EOPNOTSUPP;
2341
2342 switch(optname)
2343 {2344 caseIP_TOS:
2345 val=sk->ip_tos;
2346 break;
2347 caseIP_TTL:
2348 val=sk->ip_ttl;
2349 break;
2350 #ifdefCONFIG_IP_MULTICAST2351 caseIP_MULTICAST_TTL:
2352 val=sk->ip_mc_ttl;
2353 break;
2354 caseIP_MULTICAST_IF:
2355 err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
2356 if(err)
2357 returnerr;
2358 len=strlen(sk->ip_mc_name);
2359 err=verify_area(VERIFY_WRITE, optval, len);
2360 if(err)
2361 returnerr;
2362 put_fs_long(len,(unsignedlong *) optlen);
2363 memcpy_tofs((void *)optval,sk->ip_mc_name, len);
2364 return 0;
2365 #endif2366 default:
2367 return(-ENOPROTOOPT);
2368 }2369 err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
2370 if(err)
2371 returnerr;
2372 put_fs_long(sizeof(int),(unsignedlong *) optlen);
2373
2374 err=verify_area(VERIFY_WRITE, optval, sizeof(int));
2375 if(err)
2376 returnerr;
2377 put_fs_long(val,(unsignedlong *)optval);
2378
2379 return(0);
2380 }2381
2382 /*2383 * IP protocol layer initialiser2384 */2385
2386 staticstructpacket_typeip_packet_type =
2387 {2388 0, /* MUTTER ntohs(ETH_P_IP),*/2389 0, /* copy */2390 ip_rcv,
2391 NULL,
2392 NULL,
2393 };
2394
2395
2396 /*2397 * IP registers the packet type and then calls the subprotocol initialisers2398 */2399
2400 voidip_init(void)
/* */2401 {2402 ip_packet_type.type=htons(ETH_P_IP);
2403 dev_add_pack(&ip_packet_type);
2404 /* ip_raw_init();2405 ip_packet_init();2406 ip_tcp_init();2407 ip_udp_init();*/2408 }