1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * The Internet Protocol (IP) module. 7 * 8 * Version: @(#)ip.c 1.0.16b 9/1/93 9 * 10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu> 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Donald Becker, <becker@super.org> 13 * Alan Cox, <gw4pts@gw4pts.ampr.org> 14 * 15 * Fixes: 16 * Alan Cox : Commented a couple of minor bits of surplus code 17 * Alan Cox : Undefining IP_FORWARD doesn't include the code 18 * (just stops a compiler warning). 19 * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes 20 * are junked rather than corrupting things. 21 * Alan Cox : Frames to bad broadcast subnets are dumped 22 * We used to process them non broadcast and 23 * boy could that cause havoc. 24 * Alan Cox : ip_forward sets the free flag on the 25 * new frame it queues. Still crap because 26 * it copies the frame but at least it 27 * doesn't eat memory too. 28 * Alan Cox : Generic queue code and memory fixes. 29 * Fred Van Kempen : IP fragment support (borrowed from NET2E) 30 * Gerhard Koerting: Forward fragmented frames correctly. 31 * Gerhard Koerting: Fixes to my fix of the above 8-). 32 * Gerhard Koerting: IP interface addressing fix. 33 * Linus Torvalds : More robustness checks 34 * Alan Cox : Even more checks: Still not as robust as it ought to be 35 * Alan Cox : Save IP header pointer for later 36 * Alan Cox : ip option setting 37 * Alan Cox : Use ip_tos/ip_ttl settings 38 * Alan Cox : Fragmentation bogosity removed 39 * (Thanks to Mark.Bush@prg.ox.ac.uk) 40 * Dmitry Gorodchanin : Send of a raw packet crash fix. 41 * Alan Cox : Silly ip bug when an overlength 42 * fragment turns up. Now frees the 43 * queue. 44 * Linus Torvalds/ : Memory leakage on fragmentation 45 * Alan Cox : handling. 46 * Gerhard Koerting: Forwarding uses IP priority hints 47 * Teemu Rantanen : Fragment problems. 48 * Alan Cox : General cleanup, comments and reformat 49 * Alan Cox : SNMP statistics 50 * Alan Cox : BSD address rule semantics. Also see 51 * UDP as there is a nasty checksum issue 52 * if you do things the wrong way. 53 * 54 * To Fix: 55 * IP option processing is mostly not needed. ip_forward needs to know about routing rules 56 * and time stamp but that's about all. 57 * 58 * This program is free software; you can redistribute it and/or 59 * modify it under the terms of the GNU General Public License 60 * as published by the Free Software Foundation; either version 61 * 2 of the License, or (at your option) any later version. 62 */ 63 #include <asm/segment.h>
64 #include <asm/system.h>
65 #include <linux/types.h>
66 #include <linux/kernel.h>
67 #include <linux/sched.h>
68 #include <linux/string.h>
69 #include <linux/errno.h>
70 #include <linux/socket.h>
71 #include <linux/sockios.h>
72 #include <linux/in.h>
73 #include <linux/inet.h>
74 #include <linux/netdevice.h>
75 #include <linux/etherdevice.h>
76 #include "snmp.h"
77 #include "ip.h"
78 #include "protocol.h"
79 #include "route.h"
80 #include "tcp.h"
81 #include <linux/skbuff.h>
82 #include "sock.h"
83 #include "arp.h"
84 #include "icmp.h"
85
86 #defineCONFIG_IP_FORWARD 87 #defineCONFIG_IP_DEFRAG 88
89 externintlast_retran;
90 externvoidsort_send(structsock *sk);
91
92 #definemin(a,b) ((a)<(b)?(a):(b))
93
94 /* 95 * SNMP management statistics 96 */ 97
98 structip_mibip_statistics={1,64,}; /* Forwarding=Yes, Default TTL=64 */ 99
100 /* 101 * Print an IP packet for debugging purposes. 102 * 103 * This function is exported for the IP 104 * upper layers to use also. 105 */ 106
107 voidip_print(conststructiphdr *ip)
/* */ 108 { 109 unsignedcharbuff[32];
110 unsignedchar *ptr;
111 intaddr;
112 intlen;
113 inti;
114
115 /* Are we debugging IP frames */ 116
117 if (inet_debug != DBG_IP)
118 return;
119
120 /* Dump the IP header. */ 121 printk("IP: ihl=%d, version=%d, tos=%d, tot_len=%d\n",
122 ip->ihl, ip->version, ip->tos, ntohs(ip->tot_len));
123 printk(" id=%X, ttl=%d, prot=%d, check=%X\n",
124 ip->id, ip->ttl, ip->protocol, ip->check);
125 printk(" frag_off=%d\n", ip->frag_off);
126 printk(" soucre=%s ", in_ntoa(ip->saddr));
127 printk("dest=%s\n", in_ntoa(ip->daddr));
128 printk(" ----\n");
129
130 /* Dump the data. */ 131 ptr = (unsignedchar *)(ip + 1);
132 addr = 0;
133 len = ntohs(ip->tot_len) - (4 * ip->ihl);
134
135 while (len > 0)
136 { 137 printk(" %04X: ", addr);
138 for(i = 0; i < 16; i++)
139 { 140 if (len > 0)
141 { 142 printk("%02X ", (*ptr & 0xFF));
143 buff[i] = *ptr++;
144 if (buff[i] < 32 || buff[i] > 126)
145 buff[i] = '.';
146 } 147 else 148 { 149 printk(" ");
150 buff[i] = ' ';
151 } 152 addr++;
153 len--;
154 };
155 buff[i] = '\0';
156 printk(" \"%s\"\n", buff);
157 } 158 printk(" ----\n\n");
159 } 160
161 /* 162 * Handle the issuing of an ioctl() request 163 * for the ip device. This is scheduled to 164 * disappear 165 */ 166
167 intip_ioctl(structsock *sk, intcmd, unsignedlongarg)
/* */ 168 { 169 switch(cmd)
170 { 171 caseDDIOCSDBG:
172 return(dbg_ioctl((void *) arg, DBG_IP));
173 default:
174 return(-EINVAL);
175 } 176 } 177
178
179 /* these two routines will do routing. */ 180
181 staticvoid 182 strict_route(structiphdr *iph, structoptions *opt)
/* */ 183 { 184 } 185
186
187 staticvoid 188 loose_route(structiphdr *iph, structoptions *opt)
/* */ 189 { 190 } 191
192
193 staticvoid 194 print_ipprot(structinet_protocol *ipprot)
/* */ 195 { 196 DPRINTF((DBG_IP, "handler = %X, protocol = %d, copy=%d \n",
197 ipprot->handler, ipprot->protocol, ipprot->copy));
198 } 199
200
201 /* This routine will check to see if we have lost a gateway. */ 202 void 203 ip_route_check(unsignedlongdaddr)
/* */ 204 { 205 } 206
207
208 #if 0
209 /* this routine puts the options at the end of an ip header. */ 210 staticint 211 build_options(structiphdr *iph, structoptions *opt)
/* */ 212 { 213 unsignedchar *ptr;
214 /* currently we don't support any options. */ 215 ptr = (unsignedchar *)(iph+1);
216 *ptr = 0;
217 return (4);
218 } 219 #endif 220
221
222 /* 223 * Take an skb, and fill in the MAC header. 224 */ 225
226 staticintip_send(structsk_buff *skb, unsignedlongdaddr, intlen, structdevice *dev, unsignedlongsaddr)
/* */ 227 { 228 intmac = 0;
229
230 skb->dev = dev;
231 skb->arp = 1;
232 if (dev->hard_header)
233 { 234 /* 235 * Build a hardware header. Source address is our mac, destination unknown 236 * (rebuild header will sort this out) 237 */ 238 mac = dev->hard_header(skb->data, dev, ETH_P_IP, NULL, NULL, len, skb);
239 if (mac < 0)
240 { 241 mac = -mac;
242 skb->arp = 0;
243 skb->raddr = daddr; /* next routing address */ 244 } 245 } 246 returnmac;
247 } 248
249
250 /* 251 * This routine builds the appropriate hardware/IP headers for 252 * the routine. It assumes that if *dev != NULL then the 253 * protocol knows what it's doing, otherwise it uses the 254 * routing/ARP tables to select a device struct. 255 */ 256 intip_build_header(structsk_buff *skb, unsignedlongsaddr, unsignedlongdaddr,
/* */ 257 structdevice **dev, inttype, structoptions *opt, intlen, inttos, intttl)
258 { 259 staticstructoptionsoptmem;
260 structiphdr *iph;
261 structrtable *rt;
262 unsignedchar *buff;
263 unsignedlongraddr;
264 staticintcount = 0;
265 inttmp;
266 unsignedlongsrc;
267
268 /* 269 * If there is no 'from' address as yet, then make it our loopback 270 */ 271
272 if (saddr == 0)
273 saddr = ip_my_addr();
274
275 DPRINTF((DBG_IP, "ip_build_header (skb=%X, saddr=%X, daddr=%X, *dev=%X,\n"
276 " type=%d, opt=%X, len = %d)\n",
277 skb, saddr, daddr, *dev, type, opt, len));
278
279 buff = skb->data;
280
281 /* 282 * See if we need to look up the device. 283 */ 284
285 if (*dev == NULL)
286 { 287 if(skb->localroute)
288 rt = ip_rt_local(daddr, &optmem, &src);
289 else 290 rt = ip_rt_route(daddr, &optmem, &src);
291 if (rt == NULL)
292 { 293 ip_statistics.IpOutNoRoutes++;
294 return(-ENETUNREACH);
295 } 296
297 *dev = rt->rt_dev;
298 /* 299 * If the frame is from us and going off machine it MUST MUST MUST 300 * have the output device ip address and never the loopback 301 */ 302 if (saddr == 0x0100007FL && daddr != 0x0100007FL)
303 saddr = src;/*rt->rt_dev->pa_addr;*/ 304 raddr = rt->rt_gateway;
305
306 DPRINTF((DBG_IP, "ip_build_header: saddr set to %s\n", in_ntoa(saddr)));
307 opt = &optmem;
308 } 309 else 310 { 311 /* 312 * We still need the address of the first hop. 313 */ 314 if(skb->localroute)
315 rt = ip_rt_local(daddr, &optmem, &src);
316 else 317 rt = ip_rt_route(daddr, &optmem, &src);
318 /* 319 * If the frame is from us and going off machine it MUST MUST MUST 320 * have the output device ip address and never the loopback 321 */ 322 if (saddr == 0x0100007FL && daddr != 0x0100007FL)
323 saddr = src;/*rt->rt_dev->pa_addr;*/ 324
325 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
326 } 327
328 /* 329 * No gateway so aim at the real destination 330 */ 331 if (raddr == 0)
332 raddr = daddr;
333
334 /* 335 * Now build the MAC header. 336 */ 337
338 tmp = ip_send(skb, raddr, len, *dev, saddr);
339 buff += tmp;
340 len -= tmp;
341
342 /* 343 * Book keeping 344 */ 345
346 skb->dev = *dev;
347 skb->saddr = saddr;
348 if (skb->sk)
349 skb->sk->saddr = saddr;
350
351 /* 352 * Now build the IP header. 353 */ 354
355 /* 356 * If we are using IPPROTO_RAW, then we don't need an IP header, since 357 * one is being supplied to us by the user 358 */ 359
360 if(type == IPPROTO_RAW)
361 return (tmp);
362
363 iph = (structiphdr *)buff;
364 iph->version = 4;
365 iph->tos = tos;
366 iph->frag_off = 0;
367 iph->ttl = ttl;
368 iph->daddr = daddr;
369 iph->saddr = saddr;
370 iph->protocol = type;
371 iph->ihl = 5;
372 iph->id = htons(count++);
373
374 /* Setup the IP options. */ 375 #ifdef Not_Yet_Avail
376 build_options(iph, opt);
377 #endif 378
379 return(20 + tmp); /* IP header plus MAC header size */ 380 } 381
382
383 staticint 384 do_options(structiphdr *iph, structoptions *opt)
/* */ 385 { 386 unsignedchar *buff;
387 intdone = 0;
388 inti, len = sizeof(structiphdr);
389
390 /* Zero out the options. */ 391 opt->record_route.route_size = 0;
392 opt->loose_route.route_size = 0;
393 opt->strict_route.route_size = 0;
394 opt->tstamp.ptr = 0;
395 opt->security = 0;
396 opt->compartment = 0;
397 opt->handling = 0;
398 opt->stream = 0;
399 opt->tcc = 0;
400 return(0);
401
402 /* Advance the pointer to start at the options. */ 403 buff = (unsignedchar *)(iph + 1);
404
405 /* Now start the processing. */ 406 while (!done && len < iph->ihl*4) switch(*buff) { 407 caseIPOPT_END:
408 done = 1;
409 break;
410 caseIPOPT_NOOP:
411 buff++;
412 len++;
413 break;
414 caseIPOPT_SEC:
415 buff++;
416 if (*buff != 11) return(1);
417 buff++;
418 opt->security = ntohs(*(unsignedshort *)buff);
419 buff += 2;
420 opt->compartment = ntohs(*(unsignedshort *)buff);
421 buff += 2;
422 opt->handling = ntohs(*(unsignedshort *)buff);
423 buff += 2;
424 opt->tcc = ((*buff) << 16) + ntohs(*(unsignedshort *)(buff+1));
425 buff += 3;
426 len += 11;
427 break;
428 caseIPOPT_LSRR:
429 buff++;
430 if ((*buff - 3)% 4 != 0) return(1);
431 len += *buff;
432 opt->loose_route.route_size = (*buff -3)/4;
433 buff++;
434 if (*buff % 4 != 0) return(1);
435 opt->loose_route.pointer = *buff/4 - 1;
436 buff++;
437 buff++;
438 for (i = 0; i < opt->loose_route.route_size; i++) { 439 if(i>=MAX_ROUTE)
440 return(1);
441 opt->loose_route.route[i] = *(unsignedlong *)buff;
442 buff += 4;
443 } 444 break;
445 caseIPOPT_SSRR:
446 buff++;
447 if ((*buff - 3)% 4 != 0) return(1);
448 len += *buff;
449 opt->strict_route.route_size = (*buff -3)/4;
450 buff++;
451 if (*buff % 4 != 0) return(1);
452 opt->strict_route.pointer = *buff/4 - 1;
453 buff++;
454 buff++;
455 for (i = 0; i < opt->strict_route.route_size; i++) { 456 if(i>=MAX_ROUTE)
457 return(1);
458 opt->strict_route.route[i] = *(unsignedlong *)buff;
459 buff += 4;
460 } 461 break;
462 caseIPOPT_RR:
463 buff++;
464 if ((*buff - 3)% 4 != 0) return(1);
465 len += *buff;
466 opt->record_route.route_size = (*buff -3)/4;
467 buff++;
468 if (*buff % 4 != 0) return(1);
469 opt->record_route.pointer = *buff/4 - 1;
470 buff++;
471 buff++;
472 for (i = 0; i < opt->record_route.route_size; i++) { 473 if(i>=MAX_ROUTE)
474 return 1;
475 opt->record_route.route[i] = *(unsignedlong *)buff;
476 buff += 4;
477 } 478 break;
479 caseIPOPT_SID:
480 len += 4;
481 buff +=2;
482 opt->stream = *(unsignedshort *)buff;
483 buff += 2;
484 break;
485 caseIPOPT_TIMESTAMP:
486 buff++;
487 len += *buff;
488 if (*buff % 4 != 0) return(1);
489 opt->tstamp.len = *buff / 4 - 1;
490 buff++;
491 if ((*buff - 1) % 4 != 0) return(1);
492 opt->tstamp.ptr = (*buff-1)/4;
493 buff++;
494 opt->tstamp.x.full_char = *buff;
495 buff++;
496 for (i = 0; i < opt->tstamp.len; i++) { 497 opt->tstamp.data[i] = *(unsignedlong *)buff;
498 buff += 4;
499 } 500 break;
501 default:
502 return(1);
503 } 504
505 if (opt->record_route.route_size == 0) { 506 if (opt->strict_route.route_size != 0) { 507 memcpy(&(opt->record_route), &(opt->strict_route),
508 sizeof(opt->record_route));
509 }elseif (opt->loose_route.route_size != 0) { 510 memcpy(&(opt->record_route), &(opt->loose_route),
511 sizeof(opt->record_route));
512 } 513 } 514
515 if (opt->strict_route.route_size != 0 &&
516 opt->strict_route.route_size != opt->strict_route.pointer) { 517 strict_route(iph, opt);
518 return(0);
519 } 520
521 if (opt->loose_route.route_size != 0 &&
522 opt->loose_route.route_size != opt->loose_route.pointer) { 523 loose_route(iph, opt);
524 return(0);
525 } 526
527 return(0);
528 } 529
530 /* 531 * This is a version of ip_compute_csum() optimized for IP headers, which 532 * always checksum on 4 octet boundaries. 533 */ 534
535 staticinlineunsignedshortip_fast_csum(unsignedchar * buff, intwlen)
/* */ 536 { 537 unsignedlongsum = 0;
538
539 if (wlen)
540 { 541 unsignedlongbogus;
542 __asm__("clc\n"
543 "1:\t"
544 "lodsl\n\t"
545 "adcl %3, %0\n\t"
546 "decl %2\n\t"
547 "jne 1b\n\t"
548 "adcl $0, %0\n\t"
549 "movl %0, %3\n\t"
550 "shrl $16, %3\n\t"
551 "addw %w3, %w0\n\t"
552 "adcw $0, %w0"
553 : "=r" (sum), "=S" (buff), "=r" (wlen), "=a" (bogus)
554 : "0" (sum), "1" (buff), "2" (wlen));
555 } 556 return (~sum) & 0xffff;
557 } 558
559 /* 560 * This routine does all the checksum computations that don't 561 * require anything special (like copying or special headers). 562 */ 563
564 unsignedshortip_compute_csum(unsignedchar * buff, intlen)
/* */ 565 { 566 unsignedlongsum = 0;
567
568 /* Do the first multiple of 4 bytes and convert to 16 bits. */ 569 if (len > 3)
570 { 571 __asm__("clc\n"
572 "1:\t"
573 "lodsl\n\t"
574 "adcl %%eax, %%ebx\n\t"
575 "loop 1b\n\t"
576 "adcl $0, %%ebx\n\t"
577 "movl %%ebx, %%eax\n\t"
578 "shrl $16, %%eax\n\t"
579 "addw %%ax, %%bx\n\t"
580 "adcw $0, %%bx"
581 : "=b" (sum) , "=S" (buff)
582 : "0" (sum), "c" (len >> 2) ,"1" (buff)
583 : "ax", "cx", "si", "bx" );
584 } 585 if (len & 2)
586 { 587 __asm__("lodsw\n\t"
588 "addw %%ax, %%bx\n\t"
589 "adcw $0, %%bx"
590 : "=b" (sum), "=S" (buff)
591 : "0" (sum), "1" (buff)
592 : "bx", "ax", "si");
593 } 594 if (len & 1)
595 { 596 __asm__("lodsb\n\t"
597 "movb $0, %%ah\n\t"
598 "addw %%ax, %%bx\n\t"
599 "adcw $0, %%bx"
600 : "=b" (sum), "=S" (buff)
601 : "0" (sum), "1" (buff)
602 : "bx", "ax", "si");
603 } 604 sum =~sum;
605 return(sum & 0xffff);
606 } 607
608 /* 609 * Check the header of an incoming IP datagram. This version is still used in slhc.c. 610 */ 611
612 intip_csum(structiphdr *iph)
/* */ 613 { 614 returnip_fast_csum((unsignedchar *)iph, iph->ihl);
615 } 616
617 /* 618 * Generate a checksym for an outgoing IP datagram. 619 */ 620
621 staticvoidip_send_check(structiphdr *iph)
/* */ 622 { 623 iph->check = 0;
624 iph->check = ip_fast_csum((unsignedchar *)iph, iph->ihl);
625 } 626
627 /************************ Fragment Handlers From NET2E not yet with tweaks to beat 4K **********************************/ 628
629
630 /* 631 * This fragment handler is a bit of a heap. On the other hand it works quite 632 * happily and handles things quite well. 633 */ 634
635 staticstructipq *ipqueue = NULL; /* IP fragment queue */ 636
637 /* 638 * Create a new fragment entry. 639 */ 640
641 staticstructipfrag *ip_frag_create(intoffset, intend, structsk_buff *skb, unsignedchar *ptr)
/* */ 642 { 643 structipfrag *fp;
644
645 fp = (structipfrag *) kmalloc(sizeof(structipfrag), GFP_ATOMIC);
646 if (fp == NULL)
647 { 648 printk("IP: frag_create: no memory left !\n");
649 return(NULL);
650 } 651 memset(fp, 0, sizeof(structipfrag));
652
653 /* Fill in the structure. */ 654 fp->offset = offset;
655 fp->end = end;
656 fp->len = end - offset;
657 fp->skb = skb;
658 fp->ptr = ptr;
659
660 return(fp);
661 } 662
663
664 /* 665 * Find the correct entry in the "incomplete datagrams" queue for 666 * this IP datagram, and return the queue entry address if found. 667 */ 668
669 staticstructipq *ip_find(structiphdr *iph)
/* */ 670 { 671 structipq *qp;
672 structipq *qplast;
673
674 cli();
675 qplast = NULL;
676 for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
677 { 678 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
679 iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
680 { 681 del_timer(&qp->timer); /* So it doesn't vanish on us. The timer will be reset anyway */ 682 sti();
683 return(qp);
684 } 685 } 686 sti();
687 return(NULL);
688 } 689
690
691 /* 692 * Remove an entry from the "incomplete datagrams" queue, either 693 * because we completed, reassembled and processed it, or because 694 * it timed out. 695 */ 696
697 staticvoidip_free(structipq *qp)
/* */ 698 { 699 structipfrag *fp;
700 structipfrag *xp;
701
702 /* 703 * Stop the timer for this entry. 704 */ 705
706 del_timer(&qp->timer);
707
708 /* Remove this entry from the "incomplete datagrams" queue. */ 709 cli();
710 if (qp->prev == NULL)
711 { 712 ipqueue = qp->next;
713 if (ipqueue != NULL)
714 ipqueue->prev = NULL;
715 } 716 else 717 { 718 qp->prev->next = qp->next;
719 if (qp->next != NULL)
720 qp->next->prev = qp->prev;
721 } 722
723 /* Release all fragment data. */ 724
725 fp = qp->fragments;
726 while (fp != NULL)
727 { 728 xp = fp->next;
729 IS_SKB(fp->skb);
730 kfree_skb(fp->skb,FREE_READ);
731 kfree_s(fp, sizeof(structipfrag));
732 fp = xp;
733 } 734
735 /* Release the MAC header. */ 736 kfree_s(qp->mac, qp->maclen);
737
738 /* Release the IP header. */ 739 kfree_s(qp->iph, qp->ihlen + 8);
740
741 /* Finally, release the queue descriptor itself. */ 742 kfree_s(qp, sizeof(structipq));
743 /* printk("ip_free:done\n");*/ 744 sti();
745 } 746
747
748 /* 749 * Oops- a fragment queue timed out. Kill it and send an ICMP reply. 750 */ 751
752 staticvoidip_expire(unsignedlongarg)
/* */ 753 { 754 structipq *qp;
755
756 qp = (structipq *)arg;
757 DPRINTF((DBG_IP, "IP: queue_expire: fragment queue 0x%X timed out!\n", qp));
758
759 /* 760 * Send an ICMP "Fragment Reassembly Timeout" message. 761 */ 762
763 ip_statistics.IpReasmTimeout++;
764 ip_statistics.IpReasmFails++;
765 /* This if is always true... shrug */ 766 if(qp->fragments!=NULL)
767 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
768 ICMP_EXC_FRAGTIME, qp->dev);
769
770 /* 771 * Nuke the fragment queue. 772 */ 773 ip_free(qp);
774 } 775
776
777 /* 778 * Add an entry to the 'ipq' queue for a newly received IP datagram. 779 * We will (hopefully :-) receive all other fragments of this datagram 780 * in time, so we just create a queue for this datagram, in which we 781 * will insert the received fragments at their respective positions. 782 */ 783
784 staticstructipq *ip_create(structsk_buff *skb, structiphdr *iph, structdevice *dev)
/* */ 785 { 786 structipq *qp;
787 intmaclen;
788 intihlen;
789
790 qp = (structipq *) kmalloc(sizeof(structipq), GFP_ATOMIC);
791 if (qp == NULL)
792 { 793 printk("IP: create: no memory left !\n");
794 return(NULL);
795 skb->dev = qp->dev;
796 } 797 memset(qp, 0, sizeof(structipq));
798
799 /* 800 * Allocate memory for the MAC header. 801 * 802 * FIXME: We have a maximum MAC address size limit and define 803 * elsewhere. We should use it here and avoid the 3 kmalloc() calls 804 */ 805
806 maclen = ((unsignedlong) iph) - ((unsignedlong) skb->data);
807 qp->mac = (unsignedchar *) kmalloc(maclen, GFP_ATOMIC);
808 if (qp->mac == NULL)
809 { 810 printk("IP: create: no memory left !\n");
811 kfree_s(qp, sizeof(structipq));
812 return(NULL);
813 } 814
815 /* 816 * Allocate memory for the IP header (plus 8 octects for ICMP). 817 */ 818
819 ihlen = (iph->ihl * sizeof(unsignedlong));
820 qp->iph = (structiphdr *) kmalloc(ihlen + 8, GFP_ATOMIC);
821 if (qp->iph == NULL)
822 { 823 printk("IP: create: no memory left !\n");
824 kfree_s(qp->mac, maclen);
825 kfree_s(qp, sizeof(structipq));
826 return(NULL);
827 } 828
829 /* Fill in the structure. */ 830 memcpy(qp->mac, skb->data, maclen);
831 memcpy(qp->iph, iph, ihlen + 8);
832 qp->len = 0;
833 qp->ihlen = ihlen;
834 qp->maclen = maclen;
835 qp->fragments = NULL;
836 qp->dev = dev;
837
838 /* Start a timer for this entry. */ 839 qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */ 840 qp->timer.data = (unsignedlong) qp; /* pointer to queue */ 841 qp->timer.function = ip_expire; /* expire function */ 842 add_timer(&qp->timer);
843
844 /* Add this entry to the queue. */ 845 qp->prev = NULL;
846 cli();
847 qp->next = ipqueue;
848 if (qp->next != NULL)
849 qp->next->prev = qp;
850 ipqueue = qp;
851 sti();
852 return(qp);
853 } 854
855
856 /* 857 * See if a fragment queue is complete. 858 */ 859
860 staticintip_done(structipq *qp)
/* */ 861 { 862 structipfrag *fp;
863 intoffset;
864
865 /* Only possible if we received the final fragment. */ 866 if (qp->len == 0)
867 return(0);
868
869 /* Check all fragment offsets to see if they connect. */ 870 fp = qp->fragments;
871 offset = 0;
872 while (fp != NULL)
873 { 874 if (fp->offset > offset)
875 return(0); /* fragment(s) missing */ 876 offset = fp->end;
877 fp = fp->next;
878 } 879
880 /* All fragments are present. */ 881 return(1);
882 } 883
884
885 /* 886 * Build a new IP datagram from all its fragments. 887 * 888 * FIXME: We copy here because we lack an effective way of handling lists 889 * of bits on input. Until the new skb data handling is in I'm not going 890 * to touch this with a bargepole. This also causes a 4Kish limit on 891 * packet sizes. 892 */ 893
894 staticstructsk_buff *ip_glue(structipq *qp)
/* */ 895 { 896 structsk_buff *skb;
897 structiphdr *iph;
898 structipfrag *fp;
899 unsignedchar *ptr;
900 intcount, len;
901
902 /* 903 * Allocate a new buffer for the datagram. 904 */ 905
906 len = qp->maclen + qp->ihlen + qp->len;
907
908 if ((skb = alloc_skb(len,GFP_ATOMIC)) == NULL)
909 { 910 ip_statistics.IpReasmFails++;
911 printk("IP: queue_glue: no memory for glueing queue 0x%X\n", (int) qp);
912 ip_free(qp);
913 return(NULL);
914 } 915
916 /* Fill in the basic details. */ 917 skb->len = (len - qp->maclen);
918 skb->h.raw = skb->data;
919 skb->free = 1;
920
921 /* Copy the original MAC and IP headers into the new buffer. */ 922 ptr = (unsignedchar *) skb->h.raw;
923 memcpy(ptr, ((unsignedchar *) qp->mac), qp->maclen);
924 ptr += qp->maclen;
925 memcpy(ptr, ((unsignedchar *) qp->iph), qp->ihlen);
926 ptr += qp->ihlen;
927 skb->h.raw += qp->maclen;
928
929 count = 0;
930
931 /* Copy the data portions of all fragments into the new buffer. */ 932 fp = qp->fragments;
933 while(fp != NULL)
934 { 935 if(count+fp->len>skb->len)
936 { 937 printk("Invalid fragment list: Fragment over size.\n");
938 ip_free(qp);
939 kfree_skb(skb,FREE_WRITE);
940 ip_statistics.IpReasmFails++;
941 returnNULL;
942 } 943 memcpy((ptr + fp->offset), fp->ptr, fp->len);
944 count += fp->len;
945 fp = fp->next;
946 } 947
948 /* We glued together all fragments, so remove the queue entry. */ 949 ip_free(qp);
950
951 /* Done with all fragments. Fixup the new IP header. */ 952 iph = skb->h.iph;
953 iph->frag_off = 0;
954 iph->tot_len = htons((iph->ihl * sizeof(unsignedlong)) + count);
955 skb->ip_hdr = iph;
956
957 ip_statistics.IpReasmOKs++;
958 return(skb);
959 } 960
961
962 /* 963 * Process an incoming IP datagram fragment. 964 */ 965
966 staticstructsk_buff *ip_defrag(structiphdr *iph, structsk_buff *skb, structdevice *dev)
/* */ 967 { 968 structipfrag *prev, *next;
969 structipfrag *tfp;
970 structipq *qp;
971 structsk_buff *skb2;
972 unsignedchar *ptr;
973 intflags, offset;
974 inti, ihl, end;
975
976 ip_statistics.IpReasmReqds++;
977
978 /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */ 979 qp = ip_find(iph);
980
981 /* Is this a non-fragmented datagram? */ 982 offset = ntohs(iph->frag_off);
983 flags = offset & ~IP_OFFSET;
984 offset &= IP_OFFSET;
985 if (((flags & IP_MF) == 0) && (offset == 0))
986 { 987 if (qp != NULL)
988 ip_free(qp); /* Huh? How could this exist?? */ 989 return(skb);
990 } 991
992 offset <<= 3; /* offset is in 8-byte chunks */ 993
994 /* 995 * If the queue already existed, keep restarting its timer as long 996 * as we still are receiving fragments. Otherwise, create a fresh 997 * queue entry. 998 */ 999
1000 if (qp != NULL)
1001 {1002 del_timer(&qp->timer);
1003 qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */1004 qp->timer.data = (unsignedlong) qp; /* pointer to queue */1005 qp->timer.function = ip_expire; /* expire function */1006 add_timer(&qp->timer);
1007 }1008 else1009 {1010 /*1011 * If we failed to create it, then discard the frame1012 */1013 if ((qp = ip_create(skb, iph, dev)) == NULL)
1014 {1015 skb->sk = NULL;
1016 kfree_skb(skb, FREE_READ);
1017 ip_statistics.IpReasmFails++;
1018 returnNULL;
1019 }1020 }1021
1022 /*1023 * Determine the position of this fragment. 1024 */1025
1026 ihl = (iph->ihl * sizeof(unsignedlong));
1027 end = offset + ntohs(iph->tot_len) - ihl;
1028
1029 /*1030 * Point into the IP datagram 'data' part. 1031 */1032
1033 ptr = skb->data + dev->hard_header_len + ihl;
1034
1035 /* 1036 * Is this the final fragment? 1037 */1038
1039 if ((flags & IP_MF) == 0)
1040 qp->len = end;
1041
1042 /*1043 * Find out which fragments are in front and at the back of us1044 * in the chain of fragments so far. We must know where to put1045 * this fragment, right?1046 */1047
1048 prev = NULL;
1049 for(next = qp->fragments; next != NULL; next = next->next)
1050 {1051 if (next->offset > offset)
1052 break; /* bingo! */1053 prev = next;
1054 }1055
1056 /*1057 * We found where to put this one.1058 * Check for overlap with preceeding fragment, and, if needed,1059 * align things so that any overlaps are eliminated.1060 */1061 if (prev != NULL && offset < prev->end)
1062 {1063 i = prev->end - offset;
1064 offset += i; /* ptr into datagram */1065 ptr += i; /* ptr into fragment data */1066 DPRINTF((DBG_IP, "IP: defrag: fixed low overlap %d bytes\n", i));
1067 }1068
1069 /*1070 * Look for overlap with succeeding segments.1071 * If we can merge fragments, do it.1072 */1073
1074 for(; next != NULL; next = tfp)
1075 {1076 tfp = next->next;
1077 if (next->offset >= end)
1078 break; /* no overlaps at all */1079
1080 i = end - next->offset; /* overlap is 'i' bytes */1081 next->len -= i; /* so reduce size of */1082 next->offset += i; /* next fragment */1083 next->ptr += i;
1084
1085 /* 1086 * If we get a frag size of <= 0, remove it and the packet1087 * that it goes with.1088 */1089 if (next->len <= 0)
1090 {1091 DPRINTF((DBG_IP, "IP: defrag: removing frag 0x%X (len %d)\n",
1092 next, next->len));
1093 if (next->prev != NULL)
1094 next->prev->next = next->next;
1095 else1096 qp->fragments = next->next;
1097
1098 if (tfp->next != NULL)
1099 next->next->prev = next->prev;
1100
1101 kfree_skb(next->skb,FREE_READ);
1102 kfree_s(next, sizeof(structipfrag));
1103 }1104 DPRINTF((DBG_IP, "IP: defrag: fixed high overlap %d bytes\n", i));
1105 }1106
1107 /* 1108 * Insert this fragment in the chain of fragments. 1109 */1110
1111 tfp = NULL;
1112 tfp = ip_frag_create(offset, end, skb, ptr);
1113
1114 /*1115 * No memory to save the fragment - so throw the lot1116 */1117
1118 if (!tfp)
1119 {1120 skb->sk = NULL;
1121 kfree_skb(skb, FREE_READ);
1122 returnNULL;
1123 }1124 tfp->prev = prev;
1125 tfp->next = next;
1126 if (prev != NULL)
1127 prev->next = tfp;
1128 else1129 qp->fragments = tfp;
1130
1131 if (next != NULL)
1132 next->prev = tfp;
1133
1134 /*1135 * OK, so we inserted this new fragment into the chain.1136 * Check if we now have a full IP datagram which we can1137 * bump up to the IP layer...1138 */1139
1140 if (ip_done(qp))
1141 {1142 skb2 = ip_glue(qp); /* glue together the fragments */1143 return(skb2);
1144 }1145 return(NULL);
1146 }1147
1148
1149 /*1150 * This IP datagram is too large to be sent in one piece. Break it up into1151 * smaller pieces (each of size equal to the MAC header plus IP header plus1152 * a block of the data of the original IP data part) that will yet fit in a1153 * single device frame, and queue such a frame for sending by calling the1154 * ip_queue_xmit(). Note that this is recursion, and bad things will happen1155 * if this function causes a loop...1156 *1157 * Yes this is inefficient, feel free to submit a quicker one.1158 *1159 * **Protocol Violation**1160 * We copy all the options to each fragment. !FIXME!1161 */1162
1163 voidip_fragment(structsock *sk, structsk_buff *skb, structdevice *dev, intis_frag)
/* */1164 {1165 structiphdr *iph;
1166 unsignedchar *raw;
1167 unsignedchar *ptr;
1168 structsk_buff *skb2;
1169 intleft, mtu, hlen, len;
1170 intoffset;
1171
1172 /* 1173 * Point into the IP datagram header. 1174 */1175
1176 raw = skb->data;
1177 iph = (structiphdr *) (raw + dev->hard_header_len);
1178
1179 skb->ip_hdr = iph;
1180
1181 /* 1182 * Setup starting values. 1183 */1184
1185 hlen = (iph->ihl * sizeof(unsignedlong));
1186 left = ntohs(iph->tot_len) - hlen; /* Space per frame */1187 hlen += dev->hard_header_len; /* Total header size */1188 mtu = (dev->mtu - hlen); /* Size of data space */1189 ptr = (raw + hlen); /* Where to start from */1190
1191 DPRINTF((DBG_IP, "IP: Fragmentation Desired\n"));
1192 DPRINTF((DBG_IP, " DEV=%s, MTU=%d, LEN=%d SRC=%s",
1193 dev->name, dev->mtu, left, in_ntoa(iph->saddr)));
1194 DPRINTF((DBG_IP, " DST=%s\n", in_ntoa(iph->daddr)));
1195
1196 /*1197 * Check for any "DF" flag. [DF means do not fragment]1198 */1199
1200 if (ntohs(iph->frag_off) & IP_DF)
1201 {1202 DPRINTF((DBG_IP, "IP: Fragmentation Desired, but DF set !\n"));
1203 DPRINTF((DBG_IP, " DEV=%s, MTU=%d, LEN=%d SRC=%s",
1204 dev->name, dev->mtu, left, in_ntoa(iph->saddr)));
1205 DPRINTF((DBG_IP, " DST=%s\n", in_ntoa(iph->daddr)));
1206
1207 ip_statistics.IpFragFails++;
1208 icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev);
1209 return;
1210 }1211
1212 /*1213 * The protocol doesn't seem to say what to do in the case that the1214 * frame + options doesn't fit the mtu. As it used to fall down dead1215 * in this case we were fortunate it didn't happen1216 */1217
1218 if(mtu<8)
1219 {1220 /* It's wrong but its better than nothing */1221 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev);
1222 ip_statistics.IpFragFails++;
1223 return;
1224 }1225
1226 /* 1227 * Fragment the datagram. 1228 */1229
1230 /*1231 * The initial offset is 0 for a complete frame. When1232 * fragmenting fragments its wherever this one starts.1233 */1234
1235 if (is_frag & 2)
1236 offset = (ntohs(iph->frag_off) & 0x1fff) << 3;
1237 else1238 offset = 0;
1239
1240
1241 /*1242 * Keep copying data until we run out.1243 */1244
1245 while(left > 0)
1246 {1247 len = left;
1248 /* IF: it doesn't fit, use 'mtu' - the data space left */1249 if (len > mtu)
1250 len = mtu;
1251 /* IF: we are not sending upto and including the packet end1252 then align the next start on an eight byte boundary */1253 if (len < left)
1254 {1255 len/=8;
1256 len*=8;
1257 }1258 DPRINTF((DBG_IP,"IP: frag: creating fragment of %d bytes (%d total)\n",
1259 len, len + hlen));
1260
1261 /*1262 * Allocate buffer. 1263 */1264
1265 if ((skb2 = alloc_skb(len + hlen,GFP_ATOMIC)) == NULL)
1266 {1267 printk("IP: frag: no memory for new fragment!\n");
1268 ip_statistics.IpFragFails++;
1269 return;
1270 }1271
1272 /*1273 * Set up data on packet1274 */1275
1276 skb2->arp = skb->arp;
1277 skb2->free = skb->free;
1278 skb2->len = len + hlen;
1279 skb2->h.raw=(char *) skb2->data;
1280
1281 /*1282 * Charge the memory for the fragment to any owner1283 * it might posess1284 */1285
1286 if (sk)
1287 sk->wmem_alloc += skb2->mem_len;
1288
1289 /* 1290 * Copy the packet header into the new buffer. 1291 */1292
1293 memcpy(skb2->h.raw, raw, hlen);
1294
1295 /*1296 * Copy a block of the IP datagram. 1297 */1298 memcpy(skb2->h.raw + hlen, ptr, len);
1299 left -= len;
1300
1301 skb2->h.raw+=dev->hard_header_len;
1302
1303 /*1304 * Fill in the new header fields. 1305 */1306 iph = (structiphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
1307 iph->frag_off = htons((offset >> 3));
1308 /* 1309 * Added AC : If we are fragmenting a fragment thats not the1310 * last fragment then keep MF on each bit 1311 */1312 if (left > 0 || (is_frag & 1))
1313 iph->frag_off |= htons(IP_MF);
1314 ptr += len;
1315 offset += len;
1316
1317 /* 1318 * Put this fragment into the sending queue. 1319 */1320
1321 ip_statistics.IpFragCreates++;
1322
1323 ip_queue_xmit(sk, dev, skb2, 1);
1324 }1325 ip_statistics.IpFragOKs++;
1326 }1327
1328
1329
1330 #ifdefCONFIG_IP_FORWARD1331
1332 /* 1333 * Forward an IP datagram to its next destination. 1334 */1335
1336 staticvoidip_forward(structsk_buff *skb, structdevice *dev, intis_frag)
/* */1337 {1338 structdevice *dev2; /* Output device */1339 structiphdr *iph; /* Our header */1340 structsk_buff *skb2; /* Output packet */1341 structrtable *rt; /* Route we use */1342 unsignedchar *ptr; /* Data pointer */1343 unsignedlongraddr; /* Router IP address */1344
1345 /*1346 * Only forward packets that were fired at us when we are in promiscuous1347 * mode. In standard mode we rely on the driver to filter for us.1348 */1349
1350 if(dev->flags&IFF_PROMISC)
1351 {1352 if(memcmp((char *)&skb[1],dev->dev_addr,dev->addr_len))
1353 return;
1354 }1355
1356
1357
1358 /*1359 * According to the RFC, we must first decrease the TTL field. If1360 * that reaches zero, we must reply an ICMP control message telling1361 * that the packet's lifetime expired.1362 *1363 * Exception:1364 * We may not generate an ICMP for an ICMP. icmp_send does the1365 * enforcement of this so we can forget it here. It is however1366 * sometimes VERY important.1367 */1368
1369 iph = skb->h.iph;
1370 iph->ttl--;
1371 if (iph->ttl <= 0)
1372 {1373 DPRINTF((DBG_IP, "\nIP: *** datagram expired: TTL=0 (ignored) ***\n"));
1374 DPRINTF((DBG_IP, " SRC = %s ", in_ntoa(iph->saddr)));
1375 DPRINTF((DBG_IP, " DST = %s (ignored)\n", in_ntoa(iph->daddr)));
1376
1377 /* Tell the sender its packet died... */1378 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, dev);
1379 return;
1380 }1381
1382 /* 1383 * Re-compute the IP header checksum. 1384 * This is inefficient. We know what has happened to the header1385 * and could thus adjust the checksum as Phil Karn does in KA9Q1386 */1387
1388 ip_send_check(iph);
1389
1390 /*1391 * OK, the packet is still valid. Fetch its destination address,1392 * and give it to the IP sender for further processing.1393 */1394
1395 rt = ip_rt_route(iph->daddr, NULL, NULL);
1396 if (rt == NULL)
1397 {1398 DPRINTF((DBG_IP, "\nIP: *** routing (phase I) failed ***\n"));
1399
1400 /*1401 * Tell the sender its packet cannot be delivered. Again1402 * ICMP is screened later.1403 */1404 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, dev);
1405 return;
1406 }1407
1408
1409 /*1410 * Gosh. Not only is the packet valid; we even know how to1411 * forward it onto its final destination. Can we say this1412 * is being plain lucky?1413 * If the router told us that there is no GW, use the dest.1414 * IP address itself- we seem to be connected directly...1415 */1416
1417 raddr = rt->rt_gateway;
1418
1419 if (raddr != 0)
1420 {1421 /*1422 * There is a gateway so find the correct route for it.1423 * Gateways cannot in turn be gatewayed.1424 */1425 rt = ip_rt_route(raddr, NULL, NULL);
1426 if (rt == NULL)
1427 {1428 DPRINTF((DBG_IP, "\nIP: *** routing (phase II) failed ***\n"));
1429
1430 /* 1431 * Tell the sender its packet cannot be delivered... 1432 */1433 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, dev);
1434 return;
1435 }1436 if (rt->rt_gateway != 0)
1437 raddr = rt->rt_gateway;
1438 }1439 else1440 raddr = iph->daddr;
1441
1442 /*1443 * Having picked a route we can now send the frame out.1444 */1445
1446 dev2 = rt->rt_dev;
1447
1448 /*1449 * In IP you never forward a frame on the interface that it arrived1450 * upon. We should generate an ICMP HOST REDIRECT giving the route1451 * we calculated.1452 * For now just dropping the packet is an acceptable compromise.1453 */1454
1455 if (dev == dev2)
1456 return;
1457
1458 /*1459 * We now allocate a new buffer, and copy the datagram into it.1460 * If the indicated interface is up and running, kick it.1461 */1462
1463 DPRINTF((DBG_IP, "\nIP: *** fwd %s -> ", in_ntoa(iph->saddr)));
1464 DPRINTF((DBG_IP, "%s (via %s), LEN=%d\n",
1465 in_ntoa(raddr), dev2->name, skb->len));
1466
1467 if (dev2->flags & IFF_UP)
1468 {1469
1470 /*1471 * Current design decrees we copy the packet. For identical header1472 * lengths we could avoid it. The new skb code will let us push1473 * data so the problem goes away then.1474 */1475
1476 skb2 = alloc_skb(dev2->hard_header_len + skb->len, GFP_ATOMIC);
1477 /*1478 * This is rare and since IP is tolerant of network failures1479 * quite harmless.1480 */1481 if (skb2 == NULL)
1482 {1483 printk("\nIP: No memory available for IP forward\n");
1484 return;
1485 }1486 ptr = skb2->data;
1487 skb2->free = 1;
1488 skb2->len = skb->len + dev2->hard_header_len;
1489 skb2->h.raw = ptr;
1490
1491 /* 1492 * Copy the packet data into the new buffer. 1493 */1494 memcpy(ptr + dev2->hard_header_len, skb->h.raw, skb->len);
1495
1496 /* Now build the MAC header. */1497 (void) ip_send(skb2, raddr, skb->len, dev2, dev2->pa_addr);
1498
1499 ip_statistics.IpForwDatagrams++;
1500
1501 /*1502 * See if it needs fragmenting. Note in ip_rcv we tagged1503 * the fragment type. This must be right so that1504 * the fragmenter does the right thing.1505 */1506
1507 if(skb2->len > dev2->mtu)
1508 {1509 ip_fragment(NULL,skb2,dev2, is_frag);
1510 kfree_skb(skb2,FREE_WRITE);
1511 }1512 else1513 {1514 /*1515 * Map service types to priority. We lie about1516 * throughput being low priority, but its a good1517 * choice to help improve general usage.1518 */1519 if(iph->tos & IPTOS_LOWDELAY)
1520 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
1521 elseif(iph->tos & IPTOS_THROUGHPUT)
1522 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
1523 else1524 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
1525 }1526 }1527 }1528
1529
1530 #endif1531
1532 /*1533 * This function receives all incoming IP datagrams. 1534 */1535
1536 intip_rcv(structsk_buff *skb, structdevice *dev, structpacket_type *pt)
/* */1537 {1538 structiphdr *iph = skb->h.iph;
1539 unsignedcharhash;
1540 unsignedcharflag = 0;
1541 unsignedcharopts_p = 0; /* Set iff the packet has options. */1542 structinet_protocol *ipprot;
1543 staticstructoptionsopt; /* since we don't use these yet, and they1544 take up stack space. */1545 intbrd;
1546 intis_frag=0;
1547
1548
1549 ip_statistics.IpInReceives++;
1550
1551 DPRINTF((DBG_IP, "<<\n"));
1552
1553 /*1554 * Tag the ip header of this packet so we can find it1555 */1556
1557 skb->ip_hdr = iph;
1558
1559 /*1560 * Is the datagram acceptable? 1561 *1562 * 1. Length at least the size of an ip header1563 * 2. Version of 41564 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums]1565 * (4. We ought to check for IP multicast addresses and undefined types.. does this matter ?)1566 */1567
1568 if (skb->len<sizeof(structiphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsignedchar *)iph, iph->ihl) !=0)
1569 {1570 ip_statistics.IpInHdrErrors++;
1571 DPRINTF((DBG_IP, "\nIP: *** datagram error ***\n"));
1572 DPRINTF((DBG_IP, " SRC = %s ", in_ntoa(iph->saddr)));
1573 DPRINTF((DBG_IP, " DST = %s (ignored)\n", in_ntoa(iph->daddr)));
1574 kfree_skb(skb, FREE_WRITE);
1575 return(0);
1576 }1577
1578 /*1579 * Our transport medium may have padded the buffer out. Now we know it1580 * is IP we can trim to the true length of the frame.1581 */1582
1583 skb->len=ntohs(iph->tot_len);
1584
1585 /*1586 * Next anaylse the packet for options. Studies show under one packet in1587 * a thousand have options....1588 */1589
1590 if (iph->ihl != 5)
1591 {/* Fast path for the typical optionless IP packet. */1592 ip_print(iph); /* Bogus, only for debugging. */1593 memset((char *) &opt, 0, sizeof(opt));
1594 if (do_options(iph, &opt) != 0)
1595 return 0;
1596 opts_p = 1;
1597 }1598
1599 /*1600 * Remember if the frame is fragmented.1601 */1602
1603 if (iph->frag_off & 0x0020)
1604 is_frag|=1;
1605
1606 /*1607 * Last fragment ?1608 */1609
1610 if (ntohs(iph->frag_off) & 0x1fff)
1611 is_frag|=2;
1612
1613 /* 1614 * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday. 1615 *1616 * This is inefficient. While finding out if it is for us we could also compute1617 * the routing table entry. This is where the great unified cache theory comes1618 * in as and when someone impliments it1619 */1620
1621 if ((brd = ip_chk_addr(iph->daddr)) == 0)
1622 {1623
1624 /*1625 * The packet is for another target. Forward the frame1626 */1627
1628 #ifdefCONFIG_IP_FORWARD1629 ip_forward(skb, dev, is_frag);
1630 #else1631 printk("Machine %x tried to use us as a forwarder to %x but we have forwarding disabled!\n",
1632 iph->saddr,iph->daddr);
1633 ip_statistics.IpInAddrErrors++;
1634 #endif1635 /*1636 * The forwarder is inefficient and copies the packet. We 1637 * free the original now.1638 */1639
1640 kfree_skb(skb, FREE_WRITE);
1641 return(0);
1642 }1643
1644 /*1645 * Reassemble IP fragments. 1646 */1647
1648 if(is_frag)
1649 {1650 #ifdefCONFIG_IP_DEFRAG1651 /* Defragment. Obtain the complete packet if there is one */1652 skb=ip_defrag(iph,skb,dev);
1653 if(skb==NULL)
1654 return 0;
1655 iph=skb->h.iph;
1656 #else1657 printk("\nIP: *** datagram fragmentation not yet implemented ***\n");
1658 printk(" SRC = %s ", in_ntoa(iph->saddr));
1659 printk(" DST = %s (ignored)\n", in_ntoa(iph->daddr));
1660 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
1661 kfree_skb(skb, FREE_WRITE);
1662 return(0);
1663 #endif1664 }1665
1666 /*1667 * Point into the IP datagram, just past the header. 1668 */1669
1670 skb->ip_hdr = iph;
1671 skb->h.raw += iph->ihl*4;
1672
1673 /*1674 * skb->h.raw now points at the protocol beyond the IP header.1675 */1676
1677 hash = iph->protocol & (MAX_INET_PROTOS -1);
1678 for (ipprot = (structinet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(structinet_protocol *)ipprot->next)
1679 {1680 structsk_buff *skb2;
1681
1682 if (ipprot->protocol != iph->protocol)
1683 continue;
1684 DPRINTF((DBG_IP, "Using protocol = %X:\n", ipprot));
1685 print_ipprot(ipprot);
1686
1687 /*1688 * See if we need to make a copy of it. This will1689 * only be set if more than one protocol wants it. 1690 * and then not for the last one.1691 *1692 * This is an artifact of poor upper protocol design. 1693 * Because the upper protocols damage the actual packet1694 * we must do copying. In actual fact it's even worse1695 * than this as TCP may hold on to the buffer.1696 */1697 if (ipprot->copy)
1698 {1699 #if 0
1700 skb2 = alloc_skb(skb->mem_len-sizeof(structsk_buff), GFP_ATOMIC);
1701 if (skb2 == NULL)
1702 continue;
1703 memcpy(skb2, skb, skb2->mem_len);
1704 skb2->ip_hdr = (structiphdr *)(
1705 (unsignedlong)skb2 +
1706 (unsignedlong) skb->ip_hdr -
1707 (unsignedlong)skb);
1708 skb2->h.raw = (unsignedchar *)(
1709 (unsignedlong)skb2 +
1710 (unsignedlong) skb->h.raw -
1711 (unsignedlong)skb);
1712 skb2->free=1;
1713 #else1714 skb2 = skb_clone(skb, GFP_ATOMIC);
1715 if(skb2==NULL)
1716 continue;
1717 #endif1718 }1719 else1720 {1721 skb2 = skb;
1722 }1723 flag = 1;
1724
1725 /*1726 * Pass on the datagram to each protocol that wants it,1727 * based on the datagram protocol. We should really1728 * check the protocol handler's return values here...1729 */1730 ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr,
1731 (ntohs(iph->tot_len) - (iph->ihl * 4)),
1732 iph->saddr, 0, ipprot);
1733
1734 }1735
1736 /*1737 * All protocols checked.1738 * If this packet was a broadcast, we may *not* reply to it, since that1739 * causes (proven, grin) ARP storms and a leakage of memory (i.e. all1740 * ICMP reply messages get queued up for transmission...)1741 */1742
1743 if (!flag)
1744 {1745 if (brd != IS_BROADCAST)
1746 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
1747 kfree_skb(skb, FREE_WRITE);
1748 }1749
1750 return(0);
1751 }1752
1753
1754 /*1755 * Queues a packet to be sent, and starts the transmitter1756 * if necessary. if free = 1 then we free the block after1757 * transmit, otherwise we don't.1758 * This routine also needs to put in the total length,1759 * and compute the checksum1760 */1761
1762 voidip_queue_xmit(structsock *sk, structdevice *dev,
/* */1763 structsk_buff *skb, intfree)
1764 {1765 structiphdr *iph;
1766 unsignedchar *ptr;
1767
1768 /* All buffers without an owner socket get freed */1769 if (sk == NULL)
1770 free = 1;
1771
1772 /* Sanity check */1773 if (dev == NULL)
1774 {1775 printk("IP: ip_queue_xmit dev = NULL\n");
1776 return;
1777 }1778
1779 IS_SKB(skb);
1780
1781 /*1782 * Do some book-keeping in the packet for later1783 */1784
1785 skb->free = free;
1786 skb->dev = dev;
1787 skb->when = jiffies;
1788
1789 DPRINTF((DBG_IP, ">>\n"));
1790
1791 /*1792 * Find the IP header and set the length. This is bad1793 * but once we get the skb data handling code in the1794 * hardware will push its header sensibly and we will1795 * set skb->ip_hdr to avoid this mess and the fixed1796 * header length problem1797 */1798
1799 ptr = skb->data;
1800 ptr += dev->hard_header_len;
1801 iph = (structiphdr *)ptr;
1802 skb->ip_hdr = iph;
1803 iph->tot_len = ntohs(skb->len-dev->hard_header_len);
1804
1805 /*1806 * Do we need to fragment. Again this is inefficient. 1807 * We need to somehow lock the original buffer and use1808 * bits of it.1809 */1810
1811 if(skb->len > dev->mtu)
1812 {1813 ip_fragment(sk,skb,dev,0);
1814 IS_SKB(skb);
1815 kfree_skb(skb,FREE_WRITE);
1816 return;
1817 }1818
1819 /*1820 * Add an IP checksum1821 */1822
1823 ip_send_check(iph);
1824
1825 /*1826 * Print the frame when debugging1827 */1828 ip_print(iph);
1829
1830 /*1831 * More debugging. You cannot queue a packet already on a list1832 * Spot this and moan loudly.1833 */1834 if (skb->next != NULL)
1835 {1836 printk("ip_queue_xmit: next != NULL\n");
1837 skb_unlink(skb);
1838 }1839
1840 /*1841 * If a sender wishes the packet to remain unfreed1842 * we add it to his send queue. This arguably belongs1843 * in the TCP level since nobody elses uses it. BUT1844 * remember IPng might change all the rules.1845 */1846
1847 if (!free)
1848 {1849 unsignedlongflags;
1850 /* The socket now has more outstanding blocks */1851
1852 sk->packets_out++;
1853
1854 /* Protect the list for a moment */1855 save_flags(flags);
1856 cli();
1857
1858 if (skb->link3 != NULL)
1859 {1860 printk("ip.c: link3 != NULL\n");
1861 skb->link3 = NULL;
1862 }1863 if (sk->send_head == NULL)
1864 {1865 sk->send_tail = skb;
1866 sk->send_head = skb;
1867 }1868 else1869 {1870 sk->send_tail->link3 = skb;
1871 sk->send_tail = skb;
1872 }1873 /* skb->link3 is NULL */1874
1875 /* Interrupt restore */1876 restore_flags(flags);
1877 /* Set the IP write timeout to the round trip time for the packet.1878 If an acknowledge has not arrived by then we may wish to act */1879 reset_timer(sk, TIME_WRITE, sk->rto);
1880 }1881 else1882 /* Remember who owns the buffer */1883 skb->sk = sk;
1884
1885 /*1886 * If the indicated interface is up and running, send the packet. 1887 */1888 ip_statistics.IpOutRequests++;
1889
1890 if (dev->flags & IFF_UP)
1891 {1892 /* 1893 * If we have an owner use its priority setting,1894 * otherwise use NORMAL1895 */1896
1897 if (sk != NULL)
1898 {1899 dev_queue_xmit(skb, dev, sk->priority);
1900 }1901 else1902 {1903 dev_queue_xmit(skb, dev, SOPRI_NORMAL);
1904 }1905 }1906 else1907 {1908 ip_statistics.IpOutDiscards++;
1909 if (free)
1910 kfree_skb(skb, FREE_WRITE);
1911 }1912 }1913
1914
1915 /*1916 * A socket has timed out on its send queue and wants to do a1917 * little retransmitting. Currently this means TCP.1918 */1919
1920 voidip_do_retransmit(structsock *sk, intall)
/* */1921 {1922 structsk_buff * skb;
1923 structproto *prot;
1924 structdevice *dev;
1925 intretransmits;
1926
1927 prot = sk->prot;
1928 skb = sk->send_head;
1929 retransmits = sk->retransmits;
1930
1931 while (skb != NULL)
1932 {1933 dev = skb->dev;
1934 IS_SKB(skb);
1935 #if 0
1936 /********** THIS IS NOW DONE BY THE DEVICE LAYER **********/1937 /*1938 * The rebuild_header function sees if the ARP is done.1939 * If not it sends a new ARP request, and if so it builds1940 * the header. It isn't really needed here, and with the1941 * new ARP pretty much will not happen.1942 */1943
1944 if (!skb->arp)
1945 {1946 if (dev->rebuild_header(skb->data, dev, skb->raddr, NULL))
1947 {1948 if (!all)
1949 break;
1950 skb = skb->link3;
1951 continue;
1952 }1953 }1954 #endif1955 skb->when = jiffies;
1956
1957 /* 1958 * If the interface is (still) up and running, kick it. 1959 */1960
1961 if (dev->flags & IFF_UP)
1962 {1963 /*1964 * If the packet is still being sent by the device/protocol1965 * below then don't retransmit. This is both needed, and good -1966 * especially with connected mode AX.25 where it stops resends1967 * occuring of an as yet unsent anyway frame!1968 * We still add up the counts as the round trip time wants1969 * adjusting.1970 */1971 if (sk && !skb_device_locked(skb))
1972 {1973 /* Remove it from any existing driver queue first! */1974 skb_unlink(skb);
1975 /* Now queue it */1976 ip_statistics.IpOutRequests++;
1977 dev_queue_xmit(skb, dev, sk->priority);
1978 }1979 }1980
1981 /*1982 * Count retransmissions1983 */1984 retransmits++;
1985 sk->prot->retransmits ++;
1986
1987 /*1988 * Only one retransmit requested.1989 */1990 if (!all)
1991 break;
1992
1993 /*1994 * This should cut it off before we send too many packets. 1995 */1996 if (sk->retransmits > sk->cong_window)
1997 break;
1998 skb = skb->link3;
1999 }2000 }2001
2002 /*2003 * This is the normal code called for timeouts. It does the retransmission2004 * and then does backoff. ip_do_retransmit is separated out because2005 * tcp_ack needs to send stuff from the retransmit queue without2006 * initiating a backoff.2007 */2008
2009 voidip_retransmit(structsock *sk, intall)
/* */2010 {2011 ip_do_retransmit(sk, all);
2012
2013 /*2014 * Increase the timeout each time we retransmit. Note that2015 * we do not increase the rtt estimate. rto is initialized2016 * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests2017 * that doubling rto each time is the least we can get away with.2018 * In KA9Q, Karn uses this for the first few times, and then2019 * goes to quadratic. netBSD doubles, but only goes up to *64,2020 * and clamps at 1 to 64 sec afterwards. Note that 120 sec is2021 * defined in the protocol as the maximum possible RTT. I guess2022 * we'll have to use something other than TCP to talk to the2023 * University of Mars.2024 */2025
2026 sk->retransmits++;
2027 sk->backoff++;
2028 sk->rto = min(sk->rto << 1, 120*HZ);
2029 reset_timer(sk, TIME_WRITE, sk->rto);
2030 }2031
2032 /*2033 * Socket option code for IP. This is the end of the line after any TCP,UDP etc options on2034 * an IP socket.2035 *2036 * We impliment IP_TOS (type of service), IP_TTL (time to live).2037 *2038 * Next release we will sort out IP_OPTIONS since for some people are kind of important.2039 */2040
2041 intip_setsockopt(structsock *sk, intlevel, intoptname, char *optval, intoptlen)
/* */2042 {2043 intval,err;
2044
2045 if (optval == NULL)
2046 return(-EINVAL);
2047
2048 err=verify_area(VERIFY_READ, optval, sizeof(int));
2049 if(err)
2050 returnerr;
2051
2052 val = get_fs_long((unsignedlong *)optval);
2053
2054 if(level!=SOL_IP)
2055 return -EOPNOTSUPP;
2056
2057 switch(optname)
2058 {2059 caseIP_TOS:
2060 if(val<0||val>255)
2061 return -EINVAL;
2062 sk->ip_tos=val;
2063 return 0;
2064 caseIP_TTL:
2065 if(val<1||val>255)
2066 return -EINVAL;
2067 sk->ip_ttl=val;
2068 return 0;
2069 /* IP_OPTIONS and friends go here eventually */2070 default:
2071 return(-ENOPROTOOPT);
2072 }2073 }2074
2075 /*2076 * Get the options. Note for future reference. The GET of IP options gets the2077 * _received_ ones. The set sets the _sent_ ones.2078 */2079
2080 intip_getsockopt(structsock *sk, intlevel, intoptname, char *optval, int *optlen)
/* */2081 {2082 intval,err;
2083
2084 if(level!=SOL_IP)
2085 return -EOPNOTSUPP;
2086
2087 switch(optname)
2088 {2089 caseIP_TOS:
2090 val=sk->ip_tos;
2091 break;
2092 caseIP_TTL:
2093 val=sk->ip_ttl;
2094 break;
2095 default:
2096 return(-ENOPROTOOPT);
2097 }2098 err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
2099 if(err)
2100 returnerr;
2101 put_fs_long(sizeof(int),(unsignedlong *) optlen);
2102
2103 err=verify_area(VERIFY_WRITE, optval, sizeof(int));
2104 if(err)
2105 returnerr;
2106 put_fs_long(val,(unsignedlong *)optval);
2107
2108 return(0);
2109 }2110
2111 /*2112 * IP protocol layer initialiser2113 */2114
2115 staticstructpacket_typeip_packet_type =
2116 {2117 0, /* MUTTER ntohs(ETH_P_IP),*/2118 0, /* copy */2119 ip_rcv,
2120 NULL,
2121 NULL,
2122 };
2123
2124
2125 /*2126 * IP registers the packet type and then calls the subprotocol initialisers2127 */2128
2129 voidip_init(void)
/* */2130 {2131 ip_packet_type.type=htons(ETH_P_IP);
2132 dev_add_pack(&ip_packet_type);
2133 /* ip_raw_init();2134 ip_packet_init();2135 ip_tcp_init();2136 ip_udp_init();*/2137 }