1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * The Internet Protocol (IP) module. 7 * 8 * Version: @(#)ip.c 1.0.16b 9/1/93 9 * 10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu> 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Donald Becker, <becker@super.org> 13 * Alan Cox, <gw4pts@gw4pts.ampr.org> 14 * 15 * Fixes: 16 * Alan Cox : Commented a couple of minor bits of surplus code 17 * Alan Cox : Undefining IP_FORWARD doesn't include the code 18 * (just stops a compiler warning). 19 * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes 20 * are junked rather than corrupting things. 21 * Alan Cox : Frames to bad broadcast subnets are dumped 22 * We used to process them non broadcast and 23 * boy could that cause havoc. 24 * Alan Cox : ip_forward sets the free flag on the 25 * new frame it queues. Still crap because 26 * it copies the frame but at least it 27 * doesn't eat memory too. 28 * Alan Cox : Generic queue code and memory fixes. 29 * Fred Van Kempen : IP fragment support (borrowed from NET2E) 30 * Gerhard Koerting: Forward fragmented frames correctly. 31 * Gerhard Koerting: Fixes to my fix of the above 8-). 32 * Gerhard Koerting: IP interface addressing fix. 33 * Linus Torvalds : More robustness checks 34 * Alan Cox : Even more checks: Still not as robust as it ought to be 35 * Alan Cox : Save IP header pointer for later 36 * Alan Cox : ip option setting 37 * Alan Cox : Use ip_tos/ip_ttl settings 38 * Alan Cox : Fragmentation bogosity removed 39 * (Thanks to Mark.Bush@prg.ox.ac.uk) 40 * Dmitry Gorodchanin : Send of a raw packet crash fix. 41 * Alan Cox : Silly ip bug when an overlength 42 * fragment turns up. Now frees the 43 * queue. 44 * Linus Torvalds/ : Memory leakage on fragmentation 45 * Alan Cox : handling. 46 * Gerhard Koerting: Forwarding uses IP priority hints 47 * Teemu Rantanen : Fragment problems. 48 * Alan Cox : General cleanup, comments and reformat 49 * Alan Cox : SNMP statistics 50 * Alan Cox : BSD address rule semantics. Also see 51 * UDP as there is a nasty checksum issue 52 * if you do things the wrong way. 53 * 54 * To Fix: 55 * IP option processing is mostly not needed. ip_forward needs to know about routing rules 56 * and time stamp but that's about all. 57 * 58 * This program is free software; you can redistribute it and/or 59 * modify it under the terms of the GNU General Public License 60 * as published by the Free Software Foundation; either version 61 * 2 of the License, or (at your option) any later version. 62 */ 63 #include <asm/segment.h>
64 #include <asm/system.h>
65 #include <linux/types.h>
66 #include <linux/kernel.h>
67 #include <linux/sched.h>
68 #include <linux/string.h>
69 #include <linux/errno.h>
70 #include <linux/socket.h>
71 #include <linux/sockios.h>
72 #include <linux/in.h>
73 #include <linux/inet.h>
74 #include <linux/netdevice.h>
75 #include <linux/etherdevice.h>
76 #include "snmp.h"
77 #include "ip.h"
78 #include "protocol.h"
79 #include "route.h"
80 #include "tcp.h"
81 #include <linux/skbuff.h>
82 #include "sock.h"
83 #include "arp.h"
84 #include "icmp.h"
85
86 #defineCONFIG_IP_FORWARD 87 #defineCONFIG_IP_DEFRAG 88
89 externintlast_retran;
90 externvoidsort_send(structsock *sk);
91
92 #definemin(a,b) ((a)<(b)?(a):(b))
93
94 /* 95 * SNMP management statistics 96 */ 97
98 structip_mibip_statistics={1,64,}; /* Forwarding=Yes, Default TTL=64 */ 99
100 /* 101 * Print an IP packet for debugging purposes. 102 * 103 * This function is exported for the IP 104 * upper layers to use also. 105 */ 106
107 voidip_print(conststructiphdr *ip)
/* */ 108 { 109 unsignedcharbuff[32];
110 unsignedchar *ptr;
111 intaddr;
112 intlen;
113 inti;
114
115 /* Are we debugging IP frames */ 116
117 if (inet_debug != DBG_IP)
118 return;
119
120 /* Dump the IP header. */ 121 printk("IP: ihl=%d, version=%d, tos=%d, tot_len=%d\n",
122 ip->ihl, ip->version, ip->tos, ntohs(ip->tot_len));
123 printk(" id=%X, ttl=%d, prot=%d, check=%X\n",
124 ip->id, ip->ttl, ip->protocol, ip->check);
125 printk(" frag_off=%d\n", ip->frag_off);
126 printk(" soucre=%s ", in_ntoa(ip->saddr));
127 printk("dest=%s\n", in_ntoa(ip->daddr));
128 printk(" ----\n");
129
130 /* Dump the data. */ 131 ptr = (unsignedchar *)(ip + 1);
132 addr = 0;
133 len = ntohs(ip->tot_len) - (4 * ip->ihl);
134
135 while (len > 0)
136 { 137 printk(" %04X: ", addr);
138 for(i = 0; i < 16; i++)
139 { 140 if (len > 0)
141 { 142 printk("%02X ", (*ptr & 0xFF));
143 buff[i] = *ptr++;
144 if (buff[i] < 32 || buff[i] > 126)
145 buff[i] = '.';
146 } 147 else 148 { 149 printk(" ");
150 buff[i] = ' ';
151 } 152 addr++;
153 len--;
154 };
155 buff[i] = '\0';
156 printk(" \"%s\"\n", buff);
157 } 158 printk(" ----\n\n");
159 } 160
161 /* 162 * Handle the issuing of an ioctl() request 163 * for the ip device. This is scheduled to 164 * disappear 165 */ 166
167 intip_ioctl(structsock *sk, intcmd, unsignedlongarg)
/* */ 168 { 169 switch(cmd)
170 { 171 caseDDIOCSDBG:
172 return(dbg_ioctl((void *) arg, DBG_IP));
173 default:
174 return(-EINVAL);
175 } 176 } 177
178
179 /* these two routines will do routing. */ 180
181 staticvoid 182 strict_route(structiphdr *iph, structoptions *opt)
/* */ 183 { 184 } 185
186
187 staticvoid 188 loose_route(structiphdr *iph, structoptions *opt)
/* */ 189 { 190 } 191
192
193 staticvoid 194 print_ipprot(structinet_protocol *ipprot)
/* */ 195 { 196 DPRINTF((DBG_IP, "handler = %X, protocol = %d, copy=%d \n",
197 ipprot->handler, ipprot->protocol, ipprot->copy));
198 } 199
200
201 /* This routine will check to see if we have lost a gateway. */ 202 void 203 ip_route_check(unsignedlongdaddr)
/* */ 204 { 205 } 206
207
208 #if 0
209 /* this routine puts the options at the end of an ip header. */ 210 staticint 211 build_options(structiphdr *iph, structoptions *opt)
/* */ 212 { 213 unsignedchar *ptr;
214 /* currently we don't support any options. */ 215 ptr = (unsignedchar *)(iph+1);
216 *ptr = 0;
217 return (4);
218 } 219 #endif 220
221
222 /* 223 * Take an skb, and fill in the MAC header. 224 */ 225
226 staticintip_send(structsk_buff *skb, unsignedlongdaddr, intlen, structdevice *dev, unsignedlongsaddr)
/* */ 227 { 228 intmac = 0;
229
230 skb->dev = dev;
231 skb->arp = 1;
232 if (dev->hard_header)
233 { 234 /* 235 * Build a hardware header. Source address is our mac, destination unknown 236 * (rebuild header will sort this out) 237 */ 238 mac = dev->hard_header(skb->data, dev, ETH_P_IP, NULL, NULL, len, skb);
239 if (mac < 0)
240 { 241 mac = -mac;
242 skb->arp = 0;
243 skb->raddr = daddr; /* next routing address */ 244 } 245 } 246 returnmac;
247 } 248
249
250 /* 251 * This routine builds the appropriate hardware/IP headers for 252 * the routine. It assumes that if *dev != NULL then the 253 * protocol knows what it's doing, otherwise it uses the 254 * routing/ARP tables to select a device struct. 255 */ 256 intip_build_header(structsk_buff *skb, unsignedlongsaddr, unsignedlongdaddr,
/* */ 257 structdevice **dev, inttype, structoptions *opt, intlen, inttos, intttl)
258 { 259 staticstructoptionsoptmem;
260 structiphdr *iph;
261 structrtable *rt;
262 unsignedchar *buff;
263 unsignedlongraddr;
264 staticintcount = 0;
265 inttmp;
266 unsignedlongsrc;
267
268 /* 269 * If there is no 'from' address as yet, then make it our loopback 270 */ 271
272 if (saddr == 0)
273 saddr = ip_my_addr();
274
275 DPRINTF((DBG_IP, "ip_build_header (skb=%X, saddr=%X, daddr=%X, *dev=%X,\n"
276 " type=%d, opt=%X, len = %d)\n",
277 skb, saddr, daddr, *dev, type, opt, len));
278
279 buff = skb->data;
280
281 /* 282 * See if we need to look up the device. 283 */ 284
285 if (*dev == NULL)
286 { 287 rt = ip_rt_route(daddr, &optmem, &src);
288 if (rt == NULL)
289 { 290 ip_statistics.IpOutNoRoutes++;
291 return(-ENETUNREACH);
292 } 293
294 *dev = rt->rt_dev;
295 /* 296 * If the frame is from us and going off machine it MUST MUST MUST 297 * have the output device ip address and never the loopback 298 */ 299 if (saddr == 0x0100007FL && daddr != 0x0100007FL)
300 saddr = src;/*rt->rt_dev->pa_addr;*/ 301 raddr = rt->rt_gateway;
302
303 DPRINTF((DBG_IP, "ip_build_header: saddr set to %s\n", in_ntoa(saddr)));
304 opt = &optmem;
305 } 306 else 307 { 308 /* 309 * We still need the address of the first hop. 310 */ 311 rt = ip_rt_route(daddr, &optmem, &src);
312 /* 313 * If the frame is from us and going off machine it MUST MUST MUST 314 * have the output device ip address and never the loopback 315 */ 316 if (saddr == 0x0100007FL && daddr != 0x0100007FL)
317 saddr = src;/*rt->rt_dev->pa_addr;*/ 318
319 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
320 } 321
322 /* 323 * No gateway so aim at the real destination 324 */ 325 if (raddr == 0)
326 raddr = daddr;
327
328 /* 329 * Now build the MAC header. 330 */ 331
332 tmp = ip_send(skb, raddr, len, *dev, saddr);
333 buff += tmp;
334 len -= tmp;
335
336 /* 337 * Book keeping 338 */ 339
340 skb->dev = *dev;
341 skb->saddr = saddr;
342 if (skb->sk)
343 skb->sk->saddr = saddr;
344
345 /* 346 * Now build the IP header. 347 */ 348
349 /* 350 * If we are using IPPROTO_RAW, then we don't need an IP header, since 351 * one is being supplied to us by the user 352 */ 353
354 if(type == IPPROTO_RAW)
355 return (tmp);
356
357 iph = (structiphdr *)buff;
358 iph->version = 4;
359 iph->tos = tos;
360 iph->frag_off = 0;
361 iph->ttl = ttl;
362 iph->daddr = daddr;
363 iph->saddr = saddr;
364 iph->protocol = type;
365 iph->ihl = 5;
366 iph->id = htons(count++);
367
368 /* Setup the IP options. */ 369 #ifdef Not_Yet_Avail
370 build_options(iph, opt);
371 #endif 372
373 return(20 + tmp); /* IP header plus MAC header size */ 374 } 375
376
377 staticint 378 do_options(structiphdr *iph, structoptions *opt)
/* */ 379 { 380 unsignedchar *buff;
381 intdone = 0;
382 inti, len = sizeof(structiphdr);
383
384 /* Zero out the options. */ 385 opt->record_route.route_size = 0;
386 opt->loose_route.route_size = 0;
387 opt->strict_route.route_size = 0;
388 opt->tstamp.ptr = 0;
389 opt->security = 0;
390 opt->compartment = 0;
391 opt->handling = 0;
392 opt->stream = 0;
393 opt->tcc = 0;
394 return(0);
395
396 /* Advance the pointer to start at the options. */ 397 buff = (unsignedchar *)(iph + 1);
398
399 /* Now start the processing. */ 400 while (!done && len < iph->ihl*4) switch(*buff) { 401 caseIPOPT_END:
402 done = 1;
403 break;
404 caseIPOPT_NOOP:
405 buff++;
406 len++;
407 break;
408 caseIPOPT_SEC:
409 buff++;
410 if (*buff != 11) return(1);
411 buff++;
412 opt->security = ntohs(*(unsignedshort *)buff);
413 buff += 2;
414 opt->compartment = ntohs(*(unsignedshort *)buff);
415 buff += 2;
416 opt->handling = ntohs(*(unsignedshort *)buff);
417 buff += 2;
418 opt->tcc = ((*buff) << 16) + ntohs(*(unsignedshort *)(buff+1));
419 buff += 3;
420 len += 11;
421 break;
422 caseIPOPT_LSRR:
423 buff++;
424 if ((*buff - 3)% 4 != 0) return(1);
425 len += *buff;
426 opt->loose_route.route_size = (*buff -3)/4;
427 buff++;
428 if (*buff % 4 != 0) return(1);
429 opt->loose_route.pointer = *buff/4 - 1;
430 buff++;
431 buff++;
432 for (i = 0; i < opt->loose_route.route_size; i++) { 433 if(i>=MAX_ROUTE)
434 return(1);
435 opt->loose_route.route[i] = *(unsignedlong *)buff;
436 buff += 4;
437 } 438 break;
439 caseIPOPT_SSRR:
440 buff++;
441 if ((*buff - 3)% 4 != 0) return(1);
442 len += *buff;
443 opt->strict_route.route_size = (*buff -3)/4;
444 buff++;
445 if (*buff % 4 != 0) return(1);
446 opt->strict_route.pointer = *buff/4 - 1;
447 buff++;
448 buff++;
449 for (i = 0; i < opt->strict_route.route_size; i++) { 450 if(i>=MAX_ROUTE)
451 return(1);
452 opt->strict_route.route[i] = *(unsignedlong *)buff;
453 buff += 4;
454 } 455 break;
456 caseIPOPT_RR:
457 buff++;
458 if ((*buff - 3)% 4 != 0) return(1);
459 len += *buff;
460 opt->record_route.route_size = (*buff -3)/4;
461 buff++;
462 if (*buff % 4 != 0) return(1);
463 opt->record_route.pointer = *buff/4 - 1;
464 buff++;
465 buff++;
466 for (i = 0; i < opt->record_route.route_size; i++) { 467 if(i>=MAX_ROUTE)
468 return 1;
469 opt->record_route.route[i] = *(unsignedlong *)buff;
470 buff += 4;
471 } 472 break;
473 caseIPOPT_SID:
474 len += 4;
475 buff +=2;
476 opt->stream = *(unsignedshort *)buff;
477 buff += 2;
478 break;
479 caseIPOPT_TIMESTAMP:
480 buff++;
481 len += *buff;
482 if (*buff % 4 != 0) return(1);
483 opt->tstamp.len = *buff / 4 - 1;
484 buff++;
485 if ((*buff - 1) % 4 != 0) return(1);
486 opt->tstamp.ptr = (*buff-1)/4;
487 buff++;
488 opt->tstamp.x.full_char = *buff;
489 buff++;
490 for (i = 0; i < opt->tstamp.len; i++) { 491 opt->tstamp.data[i] = *(unsignedlong *)buff;
492 buff += 4;
493 } 494 break;
495 default:
496 return(1);
497 } 498
499 if (opt->record_route.route_size == 0) { 500 if (opt->strict_route.route_size != 0) { 501 memcpy(&(opt->record_route), &(opt->strict_route),
502 sizeof(opt->record_route));
503 }elseif (opt->loose_route.route_size != 0) { 504 memcpy(&(opt->record_route), &(opt->loose_route),
505 sizeof(opt->record_route));
506 } 507 } 508
509 if (opt->strict_route.route_size != 0 &&
510 opt->strict_route.route_size != opt->strict_route.pointer) { 511 strict_route(iph, opt);
512 return(0);
513 } 514
515 if (opt->loose_route.route_size != 0 &&
516 opt->loose_route.route_size != opt->loose_route.pointer) { 517 loose_route(iph, opt);
518 return(0);
519 } 520
521 return(0);
522 } 523
524 /* 525 * This is a version of ip_compute_csum() optimized for IP headers, which 526 * always checksum on 4 octet boundaries. 527 */ 528
529 staticinlineunsignedshortip_fast_csum(unsignedchar * buff, intwlen)
/* */ 530 { 531 unsignedlongsum = 0;
532
533 if (wlen)
534 { 535 unsignedlongbogus;
536 __asm__("clc\n"
537 "1:\t"
538 "lodsl\n\t"
539 "adcl %3, %0\n\t"
540 "decl %2\n\t"
541 "jne 1b\n\t"
542 "adcl $0, %0\n\t"
543 "movl %0, %3\n\t"
544 "shrl $16, %3\n\t"
545 "addw %w3, %w0\n\t"
546 "adcw $0, %w0"
547 : "=r" (sum), "=S" (buff), "=r" (wlen), "=a" (bogus)
548 : "0" (sum), "1" (buff), "2" (wlen));
549 } 550 return (~sum) & 0xffff;
551 } 552
553 /* 554 * This routine does all the checksum computations that don't 555 * require anything special (like copying or special headers). 556 */ 557
558 unsignedshortip_compute_csum(unsignedchar * buff, intlen)
/* */ 559 { 560 unsignedlongsum = 0;
561
562 /* Do the first multiple of 4 bytes and convert to 16 bits. */ 563 if (len > 3)
564 { 565 __asm__("clc\n"
566 "1:\t"
567 "lodsl\n\t"
568 "adcl %%eax, %%ebx\n\t"
569 "loop 1b\n\t"
570 "adcl $0, %%ebx\n\t"
571 "movl %%ebx, %%eax\n\t"
572 "shrl $16, %%eax\n\t"
573 "addw %%ax, %%bx\n\t"
574 "adcw $0, %%bx"
575 : "=b" (sum) , "=S" (buff)
576 : "0" (sum), "c" (len >> 2) ,"1" (buff)
577 : "ax", "cx", "si", "bx" );
578 } 579 if (len & 2)
580 { 581 __asm__("lodsw\n\t"
582 "addw %%ax, %%bx\n\t"
583 "adcw $0, %%bx"
584 : "=b" (sum), "=S" (buff)
585 : "0" (sum), "1" (buff)
586 : "bx", "ax", "si");
587 } 588 if (len & 1)
589 { 590 __asm__("lodsb\n\t"
591 "movb $0, %%ah\n\t"
592 "addw %%ax, %%bx\n\t"
593 "adcw $0, %%bx"
594 : "=b" (sum), "=S" (buff)
595 : "0" (sum), "1" (buff)
596 : "bx", "ax", "si");
597 } 598 sum =~sum;
599 return(sum & 0xffff);
600 } 601
602 /* 603 * Check the header of an incoming IP datagram. This version is still used in slhc.c. 604 */ 605
606 intip_csum(structiphdr *iph)
/* */ 607 { 608 returnip_fast_csum((unsignedchar *)iph, iph->ihl);
609 } 610
611 /* 612 * Generate a checksym for an outgoing IP datagram. 613 */ 614
615 staticvoidip_send_check(structiphdr *iph)
/* */ 616 { 617 iph->check = 0;
618 iph->check = ip_fast_csum((unsignedchar *)iph, iph->ihl);
619 } 620
621 /************************ Fragment Handlers From NET2E not yet with tweaks to beat 4K **********************************/ 622
623
624 /* 625 * This fragment handler is a bit of a heap. On the other hand it works quite 626 * happily and handles things quite well. 627 */ 628
629 staticstructipq *ipqueue = NULL; /* IP fragment queue */ 630
631 /* 632 * Create a new fragment entry. 633 */ 634
635 staticstructipfrag *ip_frag_create(intoffset, intend, structsk_buff *skb, unsignedchar *ptr)
/* */ 636 { 637 structipfrag *fp;
638
639 fp = (structipfrag *) kmalloc(sizeof(structipfrag), GFP_ATOMIC);
640 if (fp == NULL)
641 { 642 printk("IP: frag_create: no memory left !\n");
643 return(NULL);
644 } 645 memset(fp, 0, sizeof(structipfrag));
646
647 /* Fill in the structure. */ 648 fp->offset = offset;
649 fp->end = end;
650 fp->len = end - offset;
651 fp->skb = skb;
652 fp->ptr = ptr;
653
654 return(fp);
655 } 656
657
658 /* 659 * Find the correct entry in the "incomplete datagrams" queue for 660 * this IP datagram, and return the queue entry address if found. 661 */ 662
663 staticstructipq *ip_find(structiphdr *iph)
/* */ 664 { 665 structipq *qp;
666 structipq *qplast;
667
668 cli();
669 qplast = NULL;
670 for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
671 { 672 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
673 iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
674 { 675 del_timer(&qp->timer); /* So it doesn't vanish on us. The timer will be reset anyway */ 676 sti();
677 return(qp);
678 } 679 } 680 sti();
681 return(NULL);
682 } 683
684
685 /* 686 * Remove an entry from the "incomplete datagrams" queue, either 687 * because we completed, reassembled and processed it, or because 688 * it timed out. 689 */ 690
691 staticvoidip_free(structipq *qp)
/* */ 692 { 693 structipfrag *fp;
694 structipfrag *xp;
695
696 /* 697 * Stop the timer for this entry. 698 */ 699
700 del_timer(&qp->timer);
701
702 /* Remove this entry from the "incomplete datagrams" queue. */ 703 cli();
704 if (qp->prev == NULL)
705 { 706 ipqueue = qp->next;
707 if (ipqueue != NULL)
708 ipqueue->prev = NULL;
709 } 710 else 711 { 712 qp->prev->next = qp->next;
713 if (qp->next != NULL)
714 qp->next->prev = qp->prev;
715 } 716
717 /* Release all fragment data. */ 718
719 fp = qp->fragments;
720 while (fp != NULL)
721 { 722 xp = fp->next;
723 IS_SKB(fp->skb);
724 kfree_skb(fp->skb,FREE_READ);
725 kfree_s(fp, sizeof(structipfrag));
726 fp = xp;
727 } 728
729 /* Release the MAC header. */ 730 kfree_s(qp->mac, qp->maclen);
731
732 /* Release the IP header. */ 733 kfree_s(qp->iph, qp->ihlen + 8);
734
735 /* Finally, release the queue descriptor itself. */ 736 kfree_s(qp, sizeof(structipq));
737 /* printk("ip_free:done\n");*/ 738 sti();
739 } 740
741
742 /* 743 * Oops- a fragment queue timed out. Kill it and send an ICMP reply. 744 */ 745
746 staticvoidip_expire(unsignedlongarg)
/* */ 747 { 748 structipq *qp;
749
750 qp = (structipq *)arg;
751 DPRINTF((DBG_IP, "IP: queue_expire: fragment queue 0x%X timed out!\n", qp));
752
753 /* 754 * Send an ICMP "Fragment Reassembly Timeout" message. 755 */ 756
757 ip_statistics.IpReasmTimeout++;
758 ip_statistics.IpReasmFails++;
759 /* This if is always true... shrug */ 760 if(qp->fragments!=NULL)
761 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
762 ICMP_EXC_FRAGTIME, qp->dev);
763
764 /* 765 * Nuke the fragment queue. 766 */ 767 ip_free(qp);
768 } 769
770
771 /* 772 * Add an entry to the 'ipq' queue for a newly received IP datagram. 773 * We will (hopefully :-) receive all other fragments of this datagram 774 * in time, so we just create a queue for this datagram, in which we 775 * will insert the received fragments at their respective positions. 776 */ 777
778 staticstructipq *ip_create(structsk_buff *skb, structiphdr *iph, structdevice *dev)
/* */ 779 { 780 structipq *qp;
781 intmaclen;
782 intihlen;
783
784 qp = (structipq *) kmalloc(sizeof(structipq), GFP_ATOMIC);
785 if (qp == NULL)
786 { 787 printk("IP: create: no memory left !\n");
788 return(NULL);
789 skb->dev = qp->dev;
790 } 791 memset(qp, 0, sizeof(structipq));
792
793 /* 794 * Allocate memory for the MAC header. 795 * 796 * FIXME: We have a maximum MAC address size limit and define 797 * elsewhere. We should use it here and avoid the 3 kmalloc() calls 798 */ 799
800 maclen = ((unsignedlong) iph) - ((unsignedlong) skb->data);
801 qp->mac = (unsignedchar *) kmalloc(maclen, GFP_ATOMIC);
802 if (qp->mac == NULL)
803 { 804 printk("IP: create: no memory left !\n");
805 kfree_s(qp, sizeof(structipq));
806 return(NULL);
807 } 808
809 /* 810 * Allocate memory for the IP header (plus 8 octects for ICMP). 811 */ 812
813 ihlen = (iph->ihl * sizeof(unsignedlong));
814 qp->iph = (structiphdr *) kmalloc(ihlen + 8, GFP_ATOMIC);
815 if (qp->iph == NULL)
816 { 817 printk("IP: create: no memory left !\n");
818 kfree_s(qp->mac, maclen);
819 kfree_s(qp, sizeof(structipq));
820 return(NULL);
821 } 822
823 /* Fill in the structure. */ 824 memcpy(qp->mac, skb->data, maclen);
825 memcpy(qp->iph, iph, ihlen + 8);
826 qp->len = 0;
827 qp->ihlen = ihlen;
828 qp->maclen = maclen;
829 qp->fragments = NULL;
830 qp->dev = dev;
831
832 /* Start a timer for this entry. */ 833 qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */ 834 qp->timer.data = (unsignedlong) qp; /* pointer to queue */ 835 qp->timer.function = ip_expire; /* expire function */ 836 add_timer(&qp->timer);
837
838 /* Add this entry to the queue. */ 839 qp->prev = NULL;
840 cli();
841 qp->next = ipqueue;
842 if (qp->next != NULL)
843 qp->next->prev = qp;
844 ipqueue = qp;
845 sti();
846 return(qp);
847 } 848
849
850 /* 851 * See if a fragment queue is complete. 852 */ 853
854 staticintip_done(structipq *qp)
/* */ 855 { 856 structipfrag *fp;
857 intoffset;
858
859 /* Only possible if we received the final fragment. */ 860 if (qp->len == 0)
861 return(0);
862
863 /* Check all fragment offsets to see if they connect. */ 864 fp = qp->fragments;
865 offset = 0;
866 while (fp != NULL)
867 { 868 if (fp->offset > offset)
869 return(0); /* fragment(s) missing */ 870 offset = fp->end;
871 fp = fp->next;
872 } 873
874 /* All fragments are present. */ 875 return(1);
876 } 877
878
879 /* 880 * Build a new IP datagram from all its fragments. 881 * 882 * FIXME: We copy here because we lack an effective way of handling lists 883 * of bits on input. Until the new skb data handling is in I'm not going 884 * to touch this with a bargepole. This also causes a 4Kish limit on 885 * packet sizes. 886 */ 887
888 staticstructsk_buff *ip_glue(structipq *qp)
/* */ 889 { 890 structsk_buff *skb;
891 structiphdr *iph;
892 structipfrag *fp;
893 unsignedchar *ptr;
894 intcount, len;
895
896 /* 897 * Allocate a new buffer for the datagram. 898 */ 899
900 len = qp->maclen + qp->ihlen + qp->len;
901
902 if ((skb = alloc_skb(len,GFP_ATOMIC)) == NULL)
903 { 904 ip_statistics.IpReasmFails++;
905 printk("IP: queue_glue: no memory for glueing queue 0x%X\n", (int) qp);
906 ip_free(qp);
907 return(NULL);
908 } 909
910 /* Fill in the basic details. */ 911 skb->len = (len - qp->maclen);
912 skb->h.raw = skb->data;
913 skb->free = 1;
914
915 /* Copy the original MAC and IP headers into the new buffer. */ 916 ptr = (unsignedchar *) skb->h.raw;
917 memcpy(ptr, ((unsignedchar *) qp->mac), qp->maclen);
918 ptr += qp->maclen;
919 memcpy(ptr, ((unsignedchar *) qp->iph), qp->ihlen);
920 ptr += qp->ihlen;
921 skb->h.raw += qp->maclen;
922
923 count = 0;
924
925 /* Copy the data portions of all fragments into the new buffer. */ 926 fp = qp->fragments;
927 while(fp != NULL)
928 { 929 if(count+fp->len>skb->len)
930 { 931 printk("Invalid fragment list: Fragment over size.\n");
932 ip_free(qp);
933 kfree_skb(skb,FREE_WRITE);
934 ip_statistics.IpReasmFails++;
935 returnNULL;
936 } 937 memcpy((ptr + fp->offset), fp->ptr, fp->len);
938 count += fp->len;
939 fp = fp->next;
940 } 941
942 /* We glued together all fragments, so remove the queue entry. */ 943 ip_free(qp);
944
945 /* Done with all fragments. Fixup the new IP header. */ 946 iph = skb->h.iph;
947 iph->frag_off = 0;
948 iph->tot_len = htons((iph->ihl * sizeof(unsignedlong)) + count);
949 skb->ip_hdr = iph;
950
951 ip_statistics.IpReasmOKs++;
952 return(skb);
953 } 954
955
956 /* 957 * Process an incoming IP datagram fragment. 958 */ 959
960 staticstructsk_buff *ip_defrag(structiphdr *iph, structsk_buff *skb, structdevice *dev)
/* */ 961 { 962 structipfrag *prev, *next;
963 structipfrag *tfp;
964 structipq *qp;
965 structsk_buff *skb2;
966 unsignedchar *ptr;
967 intflags, offset;
968 inti, ihl, end;
969
970 ip_statistics.IpReasmReqds++;
971
972 /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */ 973 qp = ip_find(iph);
974
975 /* Is this a non-fragmented datagram? */ 976 offset = ntohs(iph->frag_off);
977 flags = offset & ~IP_OFFSET;
978 offset &= IP_OFFSET;
979 if (((flags & IP_MF) == 0) && (offset == 0))
980 { 981 if (qp != NULL)
982 ip_free(qp); /* Huh? How could this exist?? */ 983 return(skb);
984 } 985
986 offset <<= 3; /* offset is in 8-byte chunks */ 987
988 /* 989 * If the queue already existed, keep restarting its timer as long 990 * as we still are receiving fragments. Otherwise, create a fresh 991 * queue entry. 992 */ 993
994 if (qp != NULL)
995 { 996 del_timer(&qp->timer);
997 qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */ 998 qp->timer.data = (unsignedlong) qp; /* pointer to queue */ 999 qp->timer.function = ip_expire; /* expire function */1000 add_timer(&qp->timer);
1001 }1002 else1003 {1004 /*1005 * If we failed to create it, then discard the frame1006 */1007 if ((qp = ip_create(skb, iph, dev)) == NULL)
1008 {1009 skb->sk = NULL;
1010 kfree_skb(skb, FREE_READ);
1011 ip_statistics.IpReasmFails++;
1012 returnNULL;
1013 }1014 }1015
1016 /*1017 * Determine the position of this fragment. 1018 */1019
1020 ihl = (iph->ihl * sizeof(unsignedlong));
1021 end = offset + ntohs(iph->tot_len) - ihl;
1022
1023 /*1024 * Point into the IP datagram 'data' part. 1025 */1026
1027 ptr = skb->data + dev->hard_header_len + ihl;
1028
1029 /* 1030 * Is this the final fragment? 1031 */1032
1033 if ((flags & IP_MF) == 0)
1034 qp->len = end;
1035
1036 /*1037 * Find out which fragments are in front and at the back of us1038 * in the chain of fragments so far. We must know where to put1039 * this fragment, right?1040 */1041
1042 prev = NULL;
1043 for(next = qp->fragments; next != NULL; next = next->next)
1044 {1045 if (next->offset > offset)
1046 break; /* bingo! */1047 prev = next;
1048 }1049
1050 /*1051 * We found where to put this one.1052 * Check for overlap with preceeding fragment, and, if needed,1053 * align things so that any overlaps are eliminated.1054 */1055 if (prev != NULL && offset < prev->end)
1056 {1057 i = prev->end - offset;
1058 offset += i; /* ptr into datagram */1059 ptr += i; /* ptr into fragment data */1060 DPRINTF((DBG_IP, "IP: defrag: fixed low overlap %d bytes\n", i));
1061 }1062
1063 /*1064 * Look for overlap with succeeding segments.1065 * If we can merge fragments, do it.1066 */1067
1068 for(; next != NULL; next = tfp)
1069 {1070 tfp = next->next;
1071 if (next->offset >= end)
1072 break; /* no overlaps at all */1073
1074 i = end - next->offset; /* overlap is 'i' bytes */1075 next->len -= i; /* so reduce size of */1076 next->offset += i; /* next fragment */1077 next->ptr += i;
1078
1079 /* 1080 * If we get a frag size of <= 0, remove it and the packet1081 * that it goes with.1082 */1083 if (next->len <= 0)
1084 {1085 DPRINTF((DBG_IP, "IP: defrag: removing frag 0x%X (len %d)\n",
1086 next, next->len));
1087 if (next->prev != NULL)
1088 next->prev->next = next->next;
1089 else1090 qp->fragments = next->next;
1091
1092 if (tfp->next != NULL)
1093 next->next->prev = next->prev;
1094
1095 kfree_skb(next->skb,FREE_READ);
1096 kfree_s(next, sizeof(structipfrag));
1097 }1098 DPRINTF((DBG_IP, "IP: defrag: fixed high overlap %d bytes\n", i));
1099 }1100
1101 /* 1102 * Insert this fragment in the chain of fragments. 1103 */1104
1105 tfp = NULL;
1106 tfp = ip_frag_create(offset, end, skb, ptr);
1107
1108 /*1109 * No memory to save the fragment - so throw the lot1110 */1111
1112 if (!tfp)
1113 {1114 skb->sk = NULL;
1115 kfree_skb(skb, FREE_READ);
1116 returnNULL;
1117 }1118 tfp->prev = prev;
1119 tfp->next = next;
1120 if (prev != NULL)
1121 prev->next = tfp;
1122 else1123 qp->fragments = tfp;
1124
1125 if (next != NULL)
1126 next->prev = tfp;
1127
1128 /*1129 * OK, so we inserted this new fragment into the chain.1130 * Check if we now have a full IP datagram which we can1131 * bump up to the IP layer...1132 */1133
1134 if (ip_done(qp))
1135 {1136 skb2 = ip_glue(qp); /* glue together the fragments */1137 return(skb2);
1138 }1139 return(NULL);
1140 }1141
1142
1143 /*1144 * This IP datagram is too large to be sent in one piece. Break it up into1145 * smaller pieces (each of size equal to the MAC header plus IP header plus1146 * a block of the data of the original IP data part) that will yet fit in a1147 * single device frame, and queue such a frame for sending by calling the1148 * ip_queue_xmit(). Note that this is recursion, and bad things will happen1149 * if this function causes a loop...1150 *1151 * Yes this is inefficient, feel free to submit a quicker one.1152 *1153 * **Protocol Violation**1154 * We copy all the options to each fragment. !FIXME!1155 */1156
1157 voidip_fragment(structsock *sk, structsk_buff *skb, structdevice *dev, intis_frag)
/* */1158 {1159 structiphdr *iph;
1160 unsignedchar *raw;
1161 unsignedchar *ptr;
1162 structsk_buff *skb2;
1163 intleft, mtu, hlen, len;
1164 intoffset;
1165
1166 /* 1167 * Point into the IP datagram header. 1168 */1169
1170 raw = skb->data;
1171 iph = (structiphdr *) (raw + dev->hard_header_len);
1172
1173 skb->ip_hdr = iph;
1174
1175 /* 1176 * Setup starting values. 1177 */1178
1179 hlen = (iph->ihl * sizeof(unsignedlong));
1180 left = ntohs(iph->tot_len) - hlen; /* Space per frame */1181 hlen += dev->hard_header_len; /* Total header size */1182 mtu = (dev->mtu - hlen); /* Size of data space */1183 ptr = (raw + hlen); /* Where to start from */1184
1185 DPRINTF((DBG_IP, "IP: Fragmentation Desired\n"));
1186 DPRINTF((DBG_IP, " DEV=%s, MTU=%d, LEN=%d SRC=%s",
1187 dev->name, dev->mtu, left, in_ntoa(iph->saddr)));
1188 DPRINTF((DBG_IP, " DST=%s\n", in_ntoa(iph->daddr)));
1189
1190 /*1191 * Check for any "DF" flag. [DF means do not fragment]1192 */1193
1194 if (ntohs(iph->frag_off) & IP_DF)
1195 {1196 DPRINTF((DBG_IP, "IP: Fragmentation Desired, but DF set !\n"));
1197 DPRINTF((DBG_IP, " DEV=%s, MTU=%d, LEN=%d SRC=%s",
1198 dev->name, dev->mtu, left, in_ntoa(iph->saddr)));
1199 DPRINTF((DBG_IP, " DST=%s\n", in_ntoa(iph->daddr)));
1200
1201 ip_statistics.IpFragFails++;
1202 icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev);
1203 return;
1204 }1205
1206 /*1207 * The protocol doesn't seem to say what to do in the case that the1208 * frame + options doesn't fit the mtu. As it used to fall down dead1209 * in this case we were fortunate it didn't happen1210 */1211
1212 if(mtu<8)
1213 {1214 /* It's wrong but its better than nothing */1215 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev);
1216 ip_statistics.IpFragFails++;
1217 return;
1218 }1219
1220 /* 1221 * Fragment the datagram. 1222 */1223
1224 /*1225 * The initial offset is 0 for a complete frame. When1226 * fragmenting fragments its wherever this one starts.1227 */1228
1229 if (is_frag & 2)
1230 offset = (ntohs(iph->frag_off) & 0x1fff) << 3;
1231 else1232 offset = 0;
1233
1234
1235 /*1236 * Keep copying data until we run out.1237 */1238
1239 while(left > 0)
1240 {1241 len = left;
1242 /* IF: it doesn't fit, use 'mtu' - the data space left */1243 if (len > mtu)
1244 len = mtu;
1245 /* IF: we are not sending upto and including the packet end1246 then align the next start on an eight byte boundary */1247 if (len < left)
1248 {1249 len/=8;
1250 len*=8;
1251 }1252 DPRINTF((DBG_IP,"IP: frag: creating fragment of %d bytes (%d total)\n",
1253 len, len + hlen));
1254
1255 /*1256 * Allocate buffer. 1257 */1258
1259 if ((skb2 = alloc_skb(len + hlen,GFP_ATOMIC)) == NULL)
1260 {1261 printk("IP: frag: no memory for new fragment!\n");
1262 ip_statistics.IpFragFails++;
1263 return;
1264 }1265
1266 /*1267 * Set up data on packet1268 */1269
1270 skb2->arp = skb->arp;
1271 skb2->free = skb->free;
1272 skb2->len = len + hlen;
1273 skb2->h.raw=(char *) skb2->data;
1274
1275 /*1276 * Charge the memory for the fragment to any owner1277 * it might posess1278 */1279
1280 if (sk)
1281 sk->wmem_alloc += skb2->mem_len;
1282
1283 /* 1284 * Copy the packet header into the new buffer. 1285 */1286
1287 memcpy(skb2->h.raw, raw, hlen);
1288
1289 /*1290 * Copy a block of the IP datagram. 1291 */1292 memcpy(skb2->h.raw + hlen, ptr, len);
1293 left -= len;
1294
1295 skb2->h.raw+=dev->hard_header_len;
1296
1297 /*1298 * Fill in the new header fields. 1299 */1300 iph = (structiphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
1301 iph->frag_off = htons((offset >> 3));
1302 /* 1303 * Added AC : If we are fragmenting a fragment thats not the1304 * last fragment then keep MF on each bit 1305 */1306 if (left > 0 || (is_frag & 1))
1307 iph->frag_off |= htons(IP_MF);
1308 ptr += len;
1309 offset += len;
1310
1311 /* 1312 * Put this fragment into the sending queue. 1313 */1314
1315 ip_statistics.IpFragCreates++;
1316
1317 ip_queue_xmit(sk, dev, skb2, 1);
1318 }1319 ip_statistics.IpFragOKs++;
1320 }1321
1322
1323
1324 #ifdefCONFIG_IP_FORWARD1325
1326 /* 1327 * Forward an IP datagram to its next destination. 1328 */1329
1330 staticvoidip_forward(structsk_buff *skb, structdevice *dev, intis_frag)
/* */1331 {1332 structdevice *dev2; /* Output device */1333 structiphdr *iph; /* Our header */1334 structsk_buff *skb2; /* Output packet */1335 structrtable *rt; /* Route we use */1336 unsignedchar *ptr; /* Data pointer */1337 unsignedlongraddr; /* Router IP address */1338
1339 /*1340 * Only forward packets that were fired at us when we are in promiscuous1341 * mode. In standard mode we rely on the driver to filter for us.1342 */1343
1344 if(dev->flags&IFF_PROMISC)
1345 {1346 if(memcmp((char *)&skb[1],dev->dev_addr,dev->addr_len))
1347 return;
1348 }1349
1350
1351
1352 /*1353 * According to the RFC, we must first decrease the TTL field. If1354 * that reaches zero, we must reply an ICMP control message telling1355 * that the packet's lifetime expired.1356 *1357 * Exception:1358 * We may not generate an ICMP for an ICMP. icmp_send does the1359 * enforcement of this so we can forget it here. It is however1360 * sometimes VERY important.1361 */1362
1363 iph = skb->h.iph;
1364 iph->ttl--;
1365 if (iph->ttl <= 0)
1366 {1367 DPRINTF((DBG_IP, "\nIP: *** datagram expired: TTL=0 (ignored) ***\n"));
1368 DPRINTF((DBG_IP, " SRC = %s ", in_ntoa(iph->saddr)));
1369 DPRINTF((DBG_IP, " DST = %s (ignored)\n", in_ntoa(iph->daddr)));
1370
1371 /* Tell the sender its packet died... */1372 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, dev);
1373 return;
1374 }1375
1376 /* 1377 * Re-compute the IP header checksum. 1378 * This is inefficient. We know what has happened to the header1379 * and could thus adjust the checksum as Phil Karn does in KA9Q1380 */1381
1382 ip_send_check(iph);
1383
1384 /*1385 * OK, the packet is still valid. Fetch its destination address,1386 * and give it to the IP sender for further processing.1387 */1388
1389 rt = ip_rt_route(iph->daddr, NULL, NULL);
1390 if (rt == NULL)
1391 {1392 DPRINTF((DBG_IP, "\nIP: *** routing (phase I) failed ***\n"));
1393
1394 /*1395 * Tell the sender its packet cannot be delivered. Again1396 * ICMP is screened later.1397 */1398 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, dev);
1399 return;
1400 }1401
1402
1403 /*1404 * Gosh. Not only is the packet valid; we even know how to1405 * forward it onto its final destination. Can we say this1406 * is being plain lucky?1407 * If the router told us that there is no GW, use the dest.1408 * IP address itself- we seem to be connected directly...1409 */1410
1411 raddr = rt->rt_gateway;
1412
1413 if (raddr != 0)
1414 {1415 /*1416 * There is a gateway so find the correct route for it.1417 * Gateways cannot in turn be gatewayed.1418 */1419 rt = ip_rt_route(raddr, NULL, NULL);
1420 if (rt == NULL)
1421 {1422 DPRINTF((DBG_IP, "\nIP: *** routing (phase II) failed ***\n"));
1423
1424 /* 1425 * Tell the sender its packet cannot be delivered... 1426 */1427 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, dev);
1428 return;
1429 }1430 if (rt->rt_gateway != 0)
1431 raddr = rt->rt_gateway;
1432 }1433 else1434 raddr = iph->daddr;
1435
1436 /*1437 * Having picked a route we can now send the frame out.1438 */1439
1440 dev2 = rt->rt_dev;
1441
1442 /*1443 * In IP you never forward a frame on the interface that it arrived1444 * upon. We should generate an ICMP HOST REDIRECT giving the route1445 * we calculated.1446 * For now just dropping the packet is an acceptable compromise.1447 */1448
1449 if (dev == dev2)
1450 return;
1451
1452 /*1453 * We now allocate a new buffer, and copy the datagram into it.1454 * If the indicated interface is up and running, kick it.1455 */1456
1457 DPRINTF((DBG_IP, "\nIP: *** fwd %s -> ", in_ntoa(iph->saddr)));
1458 DPRINTF((DBG_IP, "%s (via %s), LEN=%d\n",
1459 in_ntoa(raddr), dev2->name, skb->len));
1460
1461 if (dev2->flags & IFF_UP)
1462 {1463
1464 /*1465 * Current design decrees we copy the packet. For identical header1466 * lengths we could avoid it. The new skb code will let us push1467 * data so the problem goes away then.1468 */1469
1470 skb2 = alloc_skb(dev2->hard_header_len + skb->len, GFP_ATOMIC);
1471 /*1472 * This is rare and since IP is tolerant of network failures1473 * quite harmless.1474 */1475 if (skb2 == NULL)
1476 {1477 printk("\nIP: No memory available for IP forward\n");
1478 return;
1479 }1480 ptr = skb2->data;
1481 skb2->free = 1;
1482 skb2->len = skb->len + dev2->hard_header_len;
1483 skb2->h.raw = ptr;
1484
1485 /* 1486 * Copy the packet data into the new buffer. 1487 */1488 memcpy(ptr + dev2->hard_header_len, skb->h.raw, skb->len);
1489
1490 /* Now build the MAC header. */1491 (void) ip_send(skb2, raddr, skb->len, dev2, dev2->pa_addr);
1492
1493 ip_statistics.IpForwDatagrams++;
1494
1495 /*1496 * See if it needs fragmenting. Note in ip_rcv we tagged1497 * the fragment type. This must be right so that1498 * the fragmenter does the right thing.1499 */1500
1501 if(skb2->len > dev2->mtu)
1502 {1503 ip_fragment(NULL,skb2,dev2, is_frag);
1504 kfree_skb(skb2,FREE_WRITE);
1505 }1506 else1507 {1508 /*1509 * Map service types to priority. We lie about1510 * throughput being low priority, but its a good1511 * choice to help improve general usage.1512 */1513 if(iph->tos & IPTOS_LOWDELAY)
1514 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
1515 elseif(iph->tos & IPTOS_THROUGHPUT)
1516 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
1517 else1518 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
1519 }1520 }1521 }1522
1523
1524 #endif1525
1526 /*1527 * This function receives all incoming IP datagrams. 1528 */1529
1530 intip_rcv(structsk_buff *skb, structdevice *dev, structpacket_type *pt)
/* */1531 {1532 structiphdr *iph = skb->h.iph;
1533 unsignedcharhash;
1534 unsignedcharflag = 0;
1535 unsignedcharopts_p = 0; /* Set iff the packet has options. */1536 structinet_protocol *ipprot;
1537 staticstructoptionsopt; /* since we don't use these yet, and they1538 take up stack space. */1539 intbrd;
1540 intis_frag=0;
1541
1542
1543 ip_statistics.IpInReceives++;
1544
1545 DPRINTF((DBG_IP, "<<\n"));
1546
1547 /*1548 * Tag the ip header of this packet so we can find it1549 */1550
1551 skb->ip_hdr = iph;
1552
1553 /*1554 * Is the datagram acceptable? 1555 *1556 * 1. Length at least the size of an ip header1557 * 2. Version of 41558 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums]1559 * (4. We ought to check for IP multicast addresses and undefined types.. does this matter ?)1560 */1561
1562 if (skb->len<sizeof(structiphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsignedchar *)iph, iph->ihl) !=0)
1563 {1564 ip_statistics.IpInHdrErrors++;
1565 DPRINTF((DBG_IP, "\nIP: *** datagram error ***\n"));
1566 DPRINTF((DBG_IP, " SRC = %s ", in_ntoa(iph->saddr)));
1567 DPRINTF((DBG_IP, " DST = %s (ignored)\n", in_ntoa(iph->daddr)));
1568 kfree_skb(skb, FREE_WRITE);
1569 return(0);
1570 }1571
1572 /*1573 * Our transport medium may have padded the buffer out. Now we know it1574 * is IP we can trim to the true length of the frame.1575 */1576
1577 skb->len=ntohs(iph->tot_len);
1578
1579 /*1580 * Next anaylse the packet for options. Studies show under one packet in1581 * a thousand have options....1582 */1583
1584 if (iph->ihl != 5)
1585 {/* Fast path for the typical optionless IP packet. */1586 ip_print(iph); /* Bogus, only for debugging. */1587 memset((char *) &opt, 0, sizeof(opt));
1588 if (do_options(iph, &opt) != 0)
1589 return 0;
1590 opts_p = 1;
1591 }1592
1593 /*1594 * Remember if the frame is fragmented.1595 */1596
1597 if (iph->frag_off & 0x0020)
1598 is_frag|=1;
1599
1600 /*1601 * Last fragment ?1602 */1603
1604 if (ntohs(iph->frag_off) & 0x1fff)
1605 is_frag|=2;
1606
1607 /* 1608 * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday. 1609 *1610 * This is inefficient. While finding out if it is for us we could also compute1611 * the routing table entry. This is where the great unified cache theory comes1612 * in as and when someone impliments it1613 */1614
1615 if ((brd = ip_chk_addr(iph->daddr)) == 0)
1616 {1617
1618 /*1619 * The packet is for another target. Forward the frame1620 */1621
1622 #ifdefCONFIG_IP_FORWARD1623 ip_forward(skb, dev, is_frag);
1624 #else1625 printk("Machine %x tried to use us as a forwarder to %x but we have forwarding disabled!\n",
1626 iph->saddr,iph->daddr);
1627 ip_statistics.IpInAddrErrors++;
1628 #endif1629 /*1630 * The forwarder is inefficient and copies the packet. We 1631 * free the original now.1632 */1633
1634 kfree_skb(skb, FREE_WRITE);
1635 return(0);
1636 }1637
1638 /*1639 * Reassemble IP fragments. 1640 */1641
1642 if(is_frag)
1643 {1644 #ifdefCONFIG_IP_DEFRAG1645 /* Defragment. Obtain the complete packet if there is one */1646 skb=ip_defrag(iph,skb,dev);
1647 if(skb==NULL)
1648 return 0;
1649 iph=skb->h.iph;
1650 #else1651 printk("\nIP: *** datagram fragmentation not yet implemented ***\n");
1652 printk(" SRC = %s ", in_ntoa(iph->saddr));
1653 printk(" DST = %s (ignored)\n", in_ntoa(iph->daddr));
1654 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
1655 kfree_skb(skb, FREE_WRITE);
1656 return(0);
1657 #endif1658 }1659
1660 /*1661 * Point into the IP datagram, just past the header. 1662 */1663
1664 skb->ip_hdr = iph;
1665 skb->h.raw += iph->ihl*4;
1666
1667 /*1668 * skb->h.raw now points at the protocol beyond the IP header.1669 */1670
1671 hash = iph->protocol & (MAX_INET_PROTOS -1);
1672 for (ipprot = (structinet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(structinet_protocol *)ipprot->next)
1673 {1674 structsk_buff *skb2;
1675
1676 if (ipprot->protocol != iph->protocol)
1677 continue;
1678 DPRINTF((DBG_IP, "Using protocol = %X:\n", ipprot));
1679 print_ipprot(ipprot);
1680
1681 /*1682 * See if we need to make a copy of it. This will1683 * only be set if more than one protocol wants it. 1684 * and then not for the last one.1685 *1686 * This is an artifact of poor upper protocol design. 1687 * Because the upper protocols damage the actual packet1688 * we must do copying. In actual fact it's even worse1689 * than this as TCP may hold on to the buffer.1690 */1691 if (ipprot->copy)
1692 {1693 #if 0
1694 skb2 = alloc_skb(skb->mem_len-sizeof(structsk_buff), GFP_ATOMIC);
1695 if (skb2 == NULL)
1696 continue;
1697 memcpy(skb2, skb, skb2->mem_len);
1698 skb2->ip_hdr = (structiphdr *)(
1699 (unsignedlong)skb2 +
1700 (unsignedlong) skb->ip_hdr -
1701 (unsignedlong)skb);
1702 skb2->h.raw = (unsignedchar *)(
1703 (unsignedlong)skb2 +
1704 (unsignedlong) skb->h.raw -
1705 (unsignedlong)skb);
1706 skb2->free=1;
1707 #else1708 skb2 = skb_clone(skb, GFP_ATOMIC);
1709 if(skb2==NULL)
1710 continue;
1711 #endif1712 }1713 else1714 {1715 skb2 = skb;
1716 }1717 flag = 1;
1718
1719 /*1720 * Pass on the datagram to each protocol that wants it,1721 * based on the datagram protocol. We should really1722 * check the protocol handler's return values here...1723 */1724 ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr,
1725 (ntohs(iph->tot_len) - (iph->ihl * 4)),
1726 iph->saddr, 0, ipprot);
1727
1728 }1729
1730 /*1731 * All protocols checked.1732 * If this packet was a broadcast, we may *not* reply to it, since that1733 * causes (proven, grin) ARP storms and a leakage of memory (i.e. all1734 * ICMP reply messages get queued up for transmission...)1735 */1736
1737 if (!flag)
1738 {1739 if (brd != IS_BROADCAST)
1740 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
1741 kfree_skb(skb, FREE_WRITE);
1742 }1743
1744 return(0);
1745 }1746
1747
1748 /*1749 * Queues a packet to be sent, and starts the transmitter1750 * if necessary. if free = 1 then we free the block after1751 * transmit, otherwise we don't.1752 * This routine also needs to put in the total length,1753 * and compute the checksum1754 */1755
1756 voidip_queue_xmit(structsock *sk, structdevice *dev,
/* */1757 structsk_buff *skb, intfree)
1758 {1759 structiphdr *iph;
1760 unsignedchar *ptr;
1761
1762 /* All buffers without an owner socket get freed */1763 if (sk == NULL)
1764 free = 1;
1765
1766 /* Sanity check */1767 if (dev == NULL)
1768 {1769 printk("IP: ip_queue_xmit dev = NULL\n");
1770 return;
1771 }1772
1773 IS_SKB(skb);
1774
1775 /*1776 * Do some book-keeping in the packet for later1777 */1778
1779 skb->free = free;
1780 skb->dev = dev;
1781 skb->when = jiffies;
1782
1783 DPRINTF((DBG_IP, ">>\n"));
1784
1785 /*1786 * Find the IP header and set the length. This is bad1787 * but once we get the skb data handling code in the1788 * hardware will push its header sensibly and we will1789 * set skb->ip_hdr to avoid this mess and the fixed1790 * header length problem1791 */1792
1793 ptr = skb->data;
1794 ptr += dev->hard_header_len;
1795 iph = (structiphdr *)ptr;
1796 skb->ip_hdr = iph;
1797 iph->tot_len = ntohs(skb->len-dev->hard_header_len);
1798
1799 /*1800 * Do we need to fragment. Again this is inefficient. 1801 * We need to somehow lock the original buffer and use1802 * bits of it.1803 */1804
1805 if(skb->len > dev->mtu)
1806 {1807 ip_fragment(sk,skb,dev,0);
1808 IS_SKB(skb);
1809 kfree_skb(skb,FREE_WRITE);
1810 return;
1811 }1812
1813 /*1814 * Add an IP checksum1815 */1816
1817 ip_send_check(iph);
1818
1819 /*1820 * Print the frame when debugging1821 */1822 ip_print(iph);
1823
1824 /*1825 * More debugging. You cannot queue a packet already on a list1826 * Spot this and moan loudly.1827 */1828 if (skb->next != NULL)
1829 {1830 printk("ip_queue_xmit: next != NULL\n");
1831 skb_unlink(skb);
1832 }1833
1834 /*1835 * If a sender wishes the packet to remain unfreed1836 * we add it to his send queue. This arguably belongs1837 * in the TCP level since nobody elses uses it. BUT1838 * remember IPng might change all the rules.1839 */1840
1841 if (!free)
1842 {1843 unsignedlongflags;
1844 /* The socket now has more outstanding blocks */1845
1846 sk->packets_out++;
1847
1848 /* Protect the list for a moment */1849 save_flags(flags);
1850 cli();
1851
1852 if (skb->link3 != NULL)
1853 {1854 printk("ip.c: link3 != NULL\n");
1855 skb->link3 = NULL;
1856 }1857 if (sk->send_head == NULL)
1858 {1859 sk->send_tail = skb;
1860 sk->send_head = skb;
1861 }1862 else1863 {1864 sk->send_tail->link3 = skb;
1865 sk->send_tail = skb;
1866 }1867 /* skb->link3 is NULL */1868
1869 /* Interrupt restore */1870 restore_flags(flags);
1871 /* Set the IP write timeout to the round trip time for the packet.1872 If an acknowledge has not arrived by then we may wish to act */1873 reset_timer(sk, TIME_WRITE, sk->rto);
1874 }1875 else1876 /* Remember who owns the buffer */1877 skb->sk = sk;
1878
1879 /*1880 * If the indicated interface is up and running, send the packet. 1881 */1882 ip_statistics.IpOutRequests++;
1883
1884 if (dev->flags & IFF_UP)
1885 {1886 /* 1887 * If we have an owner use its priority setting,1888 * otherwise use NORMAL1889 */1890
1891 if (sk != NULL)
1892 {1893 dev_queue_xmit(skb, dev, sk->priority);
1894 }1895 else1896 {1897 dev_queue_xmit(skb, dev, SOPRI_NORMAL);
1898 }1899 }1900 else1901 {1902 ip_statistics.IpOutDiscards++;
1903 if (free)
1904 kfree_skb(skb, FREE_WRITE);
1905 }1906 }1907
1908
1909 /*1910 * A socket has timed out on its send queue and wants to do a1911 * little retransmitting. Currently this means TCP.1912 */1913
1914 voidip_do_retransmit(structsock *sk, intall)
/* */1915 {1916 structsk_buff * skb;
1917 structproto *prot;
1918 structdevice *dev;
1919 intretransmits;
1920
1921 prot = sk->prot;
1922 skb = sk->send_head;
1923 retransmits = sk->retransmits;
1924
1925 while (skb != NULL)
1926 {1927 dev = skb->dev;
1928 IS_SKB(skb);
1929 #if 0
1930 /********** THIS IS NOW DONE BY THE DEVICE LAYER **********/1931 /*1932 * The rebuild_header function sees if the ARP is done.1933 * If not it sends a new ARP request, and if so it builds1934 * the header. It isn't really needed here, and with the1935 * new ARP pretty much will not happen.1936 */1937
1938 if (!skb->arp)
1939 {1940 if (dev->rebuild_header(skb->data, dev, skb->raddr, NULL))
1941 {1942 if (!all)
1943 break;
1944 skb = skb->link3;
1945 continue;
1946 }1947 }1948 #endif1949 skb->when = jiffies;
1950
1951 /* 1952 * If the interface is (still) up and running, kick it. 1953 */1954
1955 if (dev->flags & IFF_UP)
1956 {1957 /*1958 * If the packet is still being sent by the device/protocol1959 * below then don't retransmit. This is both needed, and good -1960 * especially with connected mode AX.25 where it stops resends1961 * occuring of an as yet unsent anyway frame!1962 * We still add up the counts as the round trip time wants1963 * adjusting.1964 */1965 if (sk && !skb_device_locked(skb))
1966 {1967 /* Remove it from any existing driver queue first! */1968 skb_unlink(skb);
1969 /* Now queue it */1970 ip_statistics.IpOutRequests++;
1971 dev_queue_xmit(skb, dev, sk->priority);
1972 }1973 }1974
1975 /*1976 * Count retransmissions1977 */1978 retransmits++;
1979 sk->prot->retransmits ++;
1980
1981 /*1982 * Only one retransmit requested.1983 */1984 if (!all)
1985 break;
1986
1987 /*1988 * This should cut it off before we send too many packets. 1989 */1990 if (sk->retransmits > sk->cong_window)
1991 break;
1992 skb = skb->link3;
1993 }1994 }1995
1996 /*1997 * This is the normal code called for timeouts. It does the retransmission1998 * and then does backoff. ip_do_retransmit is separated out because1999 * tcp_ack needs to send stuff from the retransmit queue without2000 * initiating a backoff.2001 */2002
2003 voidip_retransmit(structsock *sk, intall)
/* */2004 {2005 ip_do_retransmit(sk, all);
2006
2007 /*2008 * Increase the timeout each time we retransmit. Note that2009 * we do not increase the rtt estimate. rto is initialized2010 * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests2011 * that doubling rto each time is the least we can get away with.2012 * In KA9Q, Karn uses this for the first few times, and then2013 * goes to quadratic. netBSD doubles, but only goes up to *64,2014 * and clamps at 1 to 64 sec afterwards. Note that 120 sec is2015 * defined in the protocol as the maximum possible RTT. I guess2016 * we'll have to use something other than TCP to talk to the2017 * University of Mars.2018 */2019
2020 sk->retransmits++;
2021 sk->backoff++;
2022 sk->rto = min(sk->rto << 1, 120*HZ);
2023 reset_timer(sk, TIME_WRITE, sk->rto);
2024 }2025
2026 /*2027 * Socket option code for IP. This is the end of the line after any TCP,UDP etc options on2028 * an IP socket.2029 *2030 * We impliment IP_TOS (type of service), IP_TTL (time to live).2031 *2032 * Next release we will sort out IP_OPTIONS since for some people are kind of important.2033 */2034
2035 intip_setsockopt(structsock *sk, intlevel, intoptname, char *optval, intoptlen)
/* */2036 {2037 intval,err;
2038
2039 if (optval == NULL)
2040 return(-EINVAL);
2041
2042 err=verify_area(VERIFY_READ, optval, sizeof(int));
2043 if(err)
2044 returnerr;
2045
2046 val = get_fs_long((unsignedlong *)optval);
2047
2048 if(level!=SOL_IP)
2049 return -EOPNOTSUPP;
2050
2051 switch(optname)
2052 {2053 caseIP_TOS:
2054 if(val<0||val>255)
2055 return -EINVAL;
2056 sk->ip_tos=val;
2057 return 0;
2058 caseIP_TTL:
2059 if(val<1||val>255)
2060 return -EINVAL;
2061 sk->ip_ttl=val;
2062 return 0;
2063 /* IP_OPTIONS and friends go here eventually */2064 default:
2065 return(-ENOPROTOOPT);
2066 }2067 }2068
2069 /*2070 * Get the options. Note for future reference. The GET of IP options gets the2071 * _received_ ones. The set sets the _sent_ ones.2072 */2073
2074 intip_getsockopt(structsock *sk, intlevel, intoptname, char *optval, int *optlen)
/* */2075 {2076 intval,err;
2077
2078 if(level!=SOL_IP)
2079 return -EOPNOTSUPP;
2080
2081 switch(optname)
2082 {2083 caseIP_TOS:
2084 val=sk->ip_tos;
2085 break;
2086 caseIP_TTL:
2087 val=sk->ip_ttl;
2088 break;
2089 default:
2090 return(-ENOPROTOOPT);
2091 }2092 err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
2093 if(err)
2094 returnerr;
2095 put_fs_long(sizeof(int),(unsignedlong *) optlen);
2096
2097 err=verify_area(VERIFY_WRITE, optval, sizeof(int));
2098 if(err)
2099 returnerr;
2100 put_fs_long(val,(unsignedlong *)optval);
2101
2102 return(0);
2103 }2104
2105 /*2106 * IP protocol layer initialiser2107 */2108
2109 staticstructpacket_typeip_packet_type =
2110 {2111 0, /* MUTTER ntohs(ETH_P_IP),*/2112 0, /* copy */2113 ip_rcv,
2114 NULL,
2115 NULL,
2116 };
2117
2118
2119 /*2120 * IP registers the packet type and then calls the subprotocol initialisers2121 */2122
2123 voidip_init(void)
/* */2124 {2125 ip_packet_type.type=htons(ETH_P_IP);
2126 dev_add_pack(&ip_packet_type);
2127 /* ip_raw_init();2128 ip_packet_init();2129 ip_tcp_init();2130 ip_udp_init();*/2131 }