1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * The Internet Protocol (IP) module. 7 * 8 * Version: @(#)ip.c 1.0.16b 9/1/93 9 * 10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu> 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Donald Becker, <becker@super.org> 13 * Alan Cox, <gw4pts@gw4pts.ampr.org> 14 * 15 * Fixes: 16 * Alan Cox : Commented a couple of minor bits of surplus code 17 * Alan Cox : Undefining IP_FORWARD doesn't include the code 18 * (just stops a compiler warning). 19 * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes 20 * are junked rather than corrupting things. 21 * Alan Cox : Frames to bad broadcast subnets are dumped 22 * We used to process them non broadcast and 23 * boy could that cause havoc. 24 * Alan Cox : ip_forward sets the free flag on the 25 * new frame it queues. Still crap because 26 * it copies the frame but at least it 27 * doesn't eat memory too. 28 * Alan Cox : Generic queue code and memory fixes. 29 * Fred Van Kempen : IP fragment support (borrowed from NET2E) 30 * Gerhard Koerting: Forward fragmented frames correctly. 31 * Gerhard Koerting: Fixes to my fix of the above 8-). 32 * Gerhard Koerting: IP interface addressing fix. 33 * Linus Torvalds : More robustness checks 34 * Alan Cox : Even more checks: Still not as robust as it ought to be 35 * Alan Cox : Save IP header pointer for later 36 * Alan Cox : ip option setting 37 * Alan Cox : Use ip_tos/ip_ttl settings 38 * Alan Cox : Fragmentation bogosity removed 39 * (Thanks to Mark.Bush@prg.ox.ac.uk) 40 * Dmitry Gorodchanin : Send of a raw packet crash fix. 41 * Alan Cox : Silly ip bug when an overlength 42 * fragment turns up. Now frees the 43 * queue. 44 * Linus Torvalds/ : Memory leakage on fragmentation 45 * Alan Cox : handling. 46 * Gerhard Koerting: Forwarding uses IP priority hints 47 * Teemu Rantanen : Fragment problems. 48 * Alan Cox : General cleanup, comments and reformat 49 * Alan Cox : SNMP statistics 50 * Alan Cox : BSD address rule semantics. Also see 51 * UDP as there is a nasty checksum issue 52 * if you do things the wrong way. 53 * Alan Cox : Always defrag, moved IP_FORWARD to the config.in file 54 * 55 * To Fix: 56 * IP option processing is mostly not needed. ip_forward needs to know about routing rules 57 * and time stamp but that's about all. Use the route mtu field here too 58 * 59 * This program is free software; you can redistribute it and/or 60 * modify it under the terms of the GNU General Public License 61 * as published by the Free Software Foundation; either version 62 * 2 of the License, or (at your option) any later version. 63 */ 64 #include <asm/segment.h>
65 #include <asm/system.h>
66 #include <linux/types.h>
67 #include <linux/kernel.h>
68 #include <linux/sched.h>
69 #include <linux/string.h>
70 #include <linux/errno.h>
71 #include <linux/socket.h>
72 #include <linux/sockios.h>
73 #include <linux/in.h>
74 #include <linux/inet.h>
75 #include <linux/netdevice.h>
76 #include <linux/etherdevice.h>
77 #include "snmp.h"
78 #include "ip.h"
79 #include "protocol.h"
80 #include "route.h"
81 #include "tcp.h"
82 #include <linux/skbuff.h>
83 #include "sock.h"
84 #include "arp.h"
85 #include "icmp.h"
86
87 #define CONFIG_IP_DEFRAG
88
89 externintlast_retran;
90 externvoidsort_send(structsock *sk);
91
92 #definemin(a,b) ((a)<(b)?(a):(b))
93 #defineLOOPBACK(x) (((x) & htonl(0xff000000)) == htonl(0x7f000000))
94
95 /* 96 * SNMP management statistics 97 */ 98
99 structip_mibip_statistics={1,64,}; /* Forwarding=Yes, Default TTL=64 */ 100
101 /* 102 * Handle the issuing of an ioctl() request 103 * for the ip device. This is scheduled to 104 * disappear 105 */ 106
107 intip_ioctl(structsock *sk, intcmd, unsignedlongarg)
/* */ 108 { 109 switch(cmd)
110 { 111 default:
112 return(-EINVAL);
113 } 114 } 115
116
117 /* these two routines will do routing. */ 118
119 staticvoid 120 strict_route(structiphdr *iph, structoptions *opt)
/* */ 121 { 122 } 123
124
125 staticvoid 126 loose_route(structiphdr *iph, structoptions *opt)
/* */ 127 { 128 } 129
130
131
132
133 /* This routine will check to see if we have lost a gateway. */ 134 void 135 ip_route_check(unsignedlongdaddr)
/* */ 136 { 137 } 138
139
140 #if 0
141 /* this routine puts the options at the end of an ip header. */ 142 staticint 143 build_options(structiphdr *iph, structoptions *opt)
/* */ 144 { 145 unsignedchar *ptr;
146 /* currently we don't support any options. */ 147 ptr = (unsignedchar *)(iph+1);
148 *ptr = 0;
149 return (4);
150 } 151 #endif 152
153
154 /* 155 * Take an skb, and fill in the MAC header. 156 */ 157
158 staticintip_send(structsk_buff *skb, unsignedlongdaddr, intlen, structdevice *dev, unsignedlongsaddr)
/* */ 159 { 160 intmac = 0;
161
162 skb->dev = dev;
163 skb->arp = 1;
164 if (dev->hard_header)
165 { 166 /* 167 * Build a hardware header. Source address is our mac, destination unknown 168 * (rebuild header will sort this out) 169 */ 170 mac = dev->hard_header(skb->data, dev, ETH_P_IP, NULL, NULL, len, skb);
171 if (mac < 0)
172 { 173 mac = -mac;
174 skb->arp = 0;
175 skb->raddr = daddr; /* next routing address */ 176 } 177 } 178 returnmac;
179 } 180
181 intip_id_count = 0;
182
183 /* 184 * This routine builds the appropriate hardware/IP headers for 185 * the routine. It assumes that if *dev != NULL then the 186 * protocol knows what it's doing, otherwise it uses the 187 * routing/ARP tables to select a device struct. 188 */ 189 intip_build_header(structsk_buff *skb, unsignedlongsaddr, unsignedlongdaddr,
/* */ 190 structdevice **dev, inttype, structoptions *opt, intlen, inttos, intttl)
191 { 192 staticstructoptionsoptmem;
193 structiphdr *iph;
194 structrtable *rt;
195 unsignedchar *buff;
196 unsignedlongraddr;
197 inttmp;
198 unsignedlongsrc;
199
200 /* 201 * If there is no 'from' address as yet, then make it our loopback 202 */ 203
204 if (saddr == 0)
205 saddr = ip_my_addr();
206
207 buff = skb->data;
208
209 /* 210 * See if we need to look up the device. 211 */ 212
213 if (*dev == NULL)
214 { 215 if(skb->localroute)
216 rt = ip_rt_local(daddr, &optmem, &src);
217 else 218 rt = ip_rt_route(daddr, &optmem, &src);
219 if (rt == NULL)
220 { 221 ip_statistics.IpOutNoRoutes++;
222 return(-ENETUNREACH);
223 } 224
225 *dev = rt->rt_dev;
226 /* 227 * If the frame is from us and going off machine it MUST MUST MUST 228 * have the output device ip address and never the loopback 229 */ 230 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
231 saddr = src;/*rt->rt_dev->pa_addr;*/ 232 raddr = rt->rt_gateway;
233
234 opt = &optmem;
235 } 236 else 237 { 238 /* 239 * We still need the address of the first hop. 240 */ 241 if(skb->localroute)
242 rt = ip_rt_local(daddr, &optmem, &src);
243 else 244 rt = ip_rt_route(daddr, &optmem, &src);
245 /* 246 * If the frame is from us and going off machine it MUST MUST MUST 247 * have the output device ip address and never the loopback 248 */ 249 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
250 saddr = src;/*rt->rt_dev->pa_addr;*/ 251
252 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
253 } 254
255 /* 256 * No gateway so aim at the real destination 257 */ 258 if (raddr == 0)
259 raddr = daddr;
260
261 /* 262 * Now build the MAC header. 263 */ 264
265 tmp = ip_send(skb, raddr, len, *dev, saddr);
266 buff += tmp;
267 len -= tmp;
268
269 /* 270 * Book keeping 271 */ 272
273 skb->dev = *dev;
274 skb->saddr = saddr;
275 if (skb->sk)
276 skb->sk->saddr = saddr;
277
278 /* 279 * Now build the IP header. 280 */ 281
282 /* 283 * If we are using IPPROTO_RAW, then we don't need an IP header, since 284 * one is being supplied to us by the user 285 */ 286
287 if(type == IPPROTO_RAW)
288 return (tmp);
289
290 iph = (structiphdr *)buff;
291 iph->version = 4;
292 iph->tos = tos;
293 iph->frag_off = 0;
294 iph->ttl = ttl;
295 iph->daddr = daddr;
296 iph->saddr = saddr;
297 iph->protocol = type;
298 iph->ihl = 5;
299
300 /* Setup the IP options. */ 301 #ifdef Not_Yet_Avail
302 build_options(iph, opt);
303 #endif 304
305 return(20 + tmp); /* IP header plus MAC header size */ 306 } 307
308
309 staticint 310 do_options(structiphdr *iph, structoptions *opt)
/* */ 311 { 312 unsignedchar *buff;
313 intdone = 0;
314 inti, len = sizeof(structiphdr);
315
316 /* Zero out the options. */ 317 opt->record_route.route_size = 0;
318 opt->loose_route.route_size = 0;
319 opt->strict_route.route_size = 0;
320 opt->tstamp.ptr = 0;
321 opt->security = 0;
322 opt->compartment = 0;
323 opt->handling = 0;
324 opt->stream = 0;
325 opt->tcc = 0;
326 return(0);
327
328 /* Advance the pointer to start at the options. */ 329 buff = (unsignedchar *)(iph + 1);
330
331 /* Now start the processing. */ 332 while (!done && len < iph->ihl*4) switch(*buff) { 333 caseIPOPT_END:
334 done = 1;
335 break;
336 caseIPOPT_NOOP:
337 buff++;
338 len++;
339 break;
340 caseIPOPT_SEC:
341 buff++;
342 if (*buff != 11) return(1);
343 buff++;
344 opt->security = ntohs(*(unsignedshort *)buff);
345 buff += 2;
346 opt->compartment = ntohs(*(unsignedshort *)buff);
347 buff += 2;
348 opt->handling = ntohs(*(unsignedshort *)buff);
349 buff += 2;
350 opt->tcc = ((*buff) << 16) + ntohs(*(unsignedshort *)(buff+1));
351 buff += 3;
352 len += 11;
353 break;
354 caseIPOPT_LSRR:
355 buff++;
356 if ((*buff - 3)% 4 != 0) return(1);
357 len += *buff;
358 opt->loose_route.route_size = (*buff -3)/4;
359 buff++;
360 if (*buff % 4 != 0) return(1);
361 opt->loose_route.pointer = *buff/4 - 1;
362 buff++;
363 buff++;
364 for (i = 0; i < opt->loose_route.route_size; i++) { 365 if(i>=MAX_ROUTE)
366 return(1);
367 opt->loose_route.route[i] = *(unsignedlong *)buff;
368 buff += 4;
369 } 370 break;
371 caseIPOPT_SSRR:
372 buff++;
373 if ((*buff - 3)% 4 != 0) return(1);
374 len += *buff;
375 opt->strict_route.route_size = (*buff -3)/4;
376 buff++;
377 if (*buff % 4 != 0) return(1);
378 opt->strict_route.pointer = *buff/4 - 1;
379 buff++;
380 buff++;
381 for (i = 0; i < opt->strict_route.route_size; i++) { 382 if(i>=MAX_ROUTE)
383 return(1);
384 opt->strict_route.route[i] = *(unsignedlong *)buff;
385 buff += 4;
386 } 387 break;
388 caseIPOPT_RR:
389 buff++;
390 if ((*buff - 3)% 4 != 0) return(1);
391 len += *buff;
392 opt->record_route.route_size = (*buff -3)/4;
393 buff++;
394 if (*buff % 4 != 0) return(1);
395 opt->record_route.pointer = *buff/4 - 1;
396 buff++;
397 buff++;
398 for (i = 0; i < opt->record_route.route_size; i++) { 399 if(i>=MAX_ROUTE)
400 return 1;
401 opt->record_route.route[i] = *(unsignedlong *)buff;
402 buff += 4;
403 } 404 break;
405 caseIPOPT_SID:
406 len += 4;
407 buff +=2;
408 opt->stream = *(unsignedshort *)buff;
409 buff += 2;
410 break;
411 caseIPOPT_TIMESTAMP:
412 buff++;
413 len += *buff;
414 if (*buff % 4 != 0) return(1);
415 opt->tstamp.len = *buff / 4 - 1;
416 buff++;
417 if ((*buff - 1) % 4 != 0) return(1);
418 opt->tstamp.ptr = (*buff-1)/4;
419 buff++;
420 opt->tstamp.x.full_char = *buff;
421 buff++;
422 for (i = 0; i < opt->tstamp.len; i++) { 423 opt->tstamp.data[i] = *(unsignedlong *)buff;
424 buff += 4;
425 } 426 break;
427 default:
428 return(1);
429 } 430
431 if (opt->record_route.route_size == 0) { 432 if (opt->strict_route.route_size != 0) { 433 memcpy(&(opt->record_route), &(opt->strict_route),
434 sizeof(opt->record_route));
435 }elseif (opt->loose_route.route_size != 0) { 436 memcpy(&(opt->record_route), &(opt->loose_route),
437 sizeof(opt->record_route));
438 } 439 } 440
441 if (opt->strict_route.route_size != 0 &&
442 opt->strict_route.route_size != opt->strict_route.pointer) { 443 strict_route(iph, opt);
444 return(0);
445 } 446
447 if (opt->loose_route.route_size != 0 &&
448 opt->loose_route.route_size != opt->loose_route.pointer) { 449 loose_route(iph, opt);
450 return(0);
451 } 452
453 return(0);
454 } 455
456 /* 457 * This is a version of ip_compute_csum() optimized for IP headers, which 458 * always checksum on 4 octet boundaries. 459 */ 460
461 staticinlineunsignedshortip_fast_csum(unsignedchar * buff, intwlen)
/* */ 462 { 463 unsignedlongsum = 0;
464
465 if (wlen)
466 { 467 unsignedlongbogus;
468 __asm__("clc\n"
469 "1:\t"
470 "lodsl\n\t"
471 "adcl %3, %0\n\t"
472 "decl %2\n\t"
473 "jne 1b\n\t"
474 "adcl $0, %0\n\t"
475 "movl %0, %3\n\t"
476 "shrl $16, %3\n\t"
477 "addw %w3, %w0\n\t"
478 "adcw $0, %w0"
479 : "=r" (sum), "=S" (buff), "=r" (wlen), "=a" (bogus)
480 : "0" (sum), "1" (buff), "2" (wlen));
481 } 482 return (~sum) & 0xffff;
483 } 484
485 /* 486 * This routine does all the checksum computations that don't 487 * require anything special (like copying or special headers). 488 */ 489
490 unsignedshortip_compute_csum(unsignedchar * buff, intlen)
/* */ 491 { 492 unsignedlongsum = 0;
493
494 /* Do the first multiple of 4 bytes and convert to 16 bits. */ 495 if (len > 3)
496 { 497 __asm__("clc\n"
498 "1:\t"
499 "lodsl\n\t"
500 "adcl %%eax, %%ebx\n\t"
501 "loop 1b\n\t"
502 "adcl $0, %%ebx\n\t"
503 "movl %%ebx, %%eax\n\t"
504 "shrl $16, %%eax\n\t"
505 "addw %%ax, %%bx\n\t"
506 "adcw $0, %%bx"
507 : "=b" (sum) , "=S" (buff)
508 : "0" (sum), "c" (len >> 2) ,"1" (buff)
509 : "ax", "cx", "si", "bx" );
510 } 511 if (len & 2)
512 { 513 __asm__("lodsw\n\t"
514 "addw %%ax, %%bx\n\t"
515 "adcw $0, %%bx"
516 : "=b" (sum), "=S" (buff)
517 : "0" (sum), "1" (buff)
518 : "bx", "ax", "si");
519 } 520 if (len & 1)
521 { 522 __asm__("lodsb\n\t"
523 "movb $0, %%ah\n\t"
524 "addw %%ax, %%bx\n\t"
525 "adcw $0, %%bx"
526 : "=b" (sum), "=S" (buff)
527 : "0" (sum), "1" (buff)
528 : "bx", "ax", "si");
529 } 530 sum =~sum;
531 return(sum & 0xffff);
532 } 533
534 /* 535 * Check the header of an incoming IP datagram. This version is still used in slhc.c. 536 */ 537
538 intip_csum(structiphdr *iph)
/* */ 539 { 540 returnip_fast_csum((unsignedchar *)iph, iph->ihl);
541 } 542
543 /* 544 * Generate a checksym for an outgoing IP datagram. 545 */ 546
547 staticvoidip_send_check(structiphdr *iph)
/* */ 548 { 549 iph->check = 0;
550 iph->check = ip_fast_csum((unsignedchar *)iph, iph->ihl);
551 } 552
553 /************************ Fragment Handlers From NET2E not yet with tweaks to beat 4K **********************************/ 554
555
556 /* 557 * This fragment handler is a bit of a heap. On the other hand it works quite 558 * happily and handles things quite well. 559 */ 560
561 staticstructipq *ipqueue = NULL; /* IP fragment queue */ 562
563 /* 564 * Create a new fragment entry. 565 */ 566
567 staticstructipfrag *ip_frag_create(intoffset, intend, structsk_buff *skb, unsignedchar *ptr)
/* */ 568 { 569 structipfrag *fp;
570
571 fp = (structipfrag *) kmalloc(sizeof(structipfrag), GFP_ATOMIC);
572 if (fp == NULL)
573 { 574 printk("IP: frag_create: no memory left !\n");
575 return(NULL);
576 } 577 memset(fp, 0, sizeof(structipfrag));
578
579 /* Fill in the structure. */ 580 fp->offset = offset;
581 fp->end = end;
582 fp->len = end - offset;
583 fp->skb = skb;
584 fp->ptr = ptr;
585
586 return(fp);
587 } 588
589
590 /* 591 * Find the correct entry in the "incomplete datagrams" queue for 592 * this IP datagram, and return the queue entry address if found. 593 */ 594
595 staticstructipq *ip_find(structiphdr *iph)
/* */ 596 { 597 structipq *qp;
598 structipq *qplast;
599
600 cli();
601 qplast = NULL;
602 for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
603 { 604 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
605 iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
606 { 607 del_timer(&qp->timer); /* So it doesn't vanish on us. The timer will be reset anyway */ 608 sti();
609 return(qp);
610 } 611 } 612 sti();
613 return(NULL);
614 } 615
616
617 /* 618 * Remove an entry from the "incomplete datagrams" queue, either 619 * because we completed, reassembled and processed it, or because 620 * it timed out. 621 */ 622
623 staticvoidip_free(structipq *qp)
/* */ 624 { 625 structipfrag *fp;
626 structipfrag *xp;
627
628 /* 629 * Stop the timer for this entry. 630 */ 631
632 del_timer(&qp->timer);
633
634 /* Remove this entry from the "incomplete datagrams" queue. */ 635 cli();
636 if (qp->prev == NULL)
637 { 638 ipqueue = qp->next;
639 if (ipqueue != NULL)
640 ipqueue->prev = NULL;
641 } 642 else 643 { 644 qp->prev->next = qp->next;
645 if (qp->next != NULL)
646 qp->next->prev = qp->prev;
647 } 648
649 /* Release all fragment data. */ 650
651 fp = qp->fragments;
652 while (fp != NULL)
653 { 654 xp = fp->next;
655 IS_SKB(fp->skb);
656 kfree_skb(fp->skb,FREE_READ);
657 kfree_s(fp, sizeof(structipfrag));
658 fp = xp;
659 } 660
661 /* Release the MAC header. */ 662 kfree_s(qp->mac, qp->maclen);
663
664 /* Release the IP header. */ 665 kfree_s(qp->iph, qp->ihlen + 8);
666
667 /* Finally, release the queue descriptor itself. */ 668 kfree_s(qp, sizeof(structipq));
669 sti();
670 } 671
672
673 /* 674 * Oops- a fragment queue timed out. Kill it and send an ICMP reply. 675 */ 676
677 staticvoidip_expire(unsignedlongarg)
/* */ 678 { 679 structipq *qp;
680
681 qp = (structipq *)arg;
682
683 /* 684 * Send an ICMP "Fragment Reassembly Timeout" message. 685 */ 686
687 ip_statistics.IpReasmTimeout++;
688 ip_statistics.IpReasmFails++;
689 /* This if is always true... shrug */ 690 if(qp->fragments!=NULL)
691 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
692 ICMP_EXC_FRAGTIME, qp->dev);
693
694 /* 695 * Nuke the fragment queue. 696 */ 697 ip_free(qp);
698 } 699
700
701 /* 702 * Add an entry to the 'ipq' queue for a newly received IP datagram. 703 * We will (hopefully :-) receive all other fragments of this datagram 704 * in time, so we just create a queue for this datagram, in which we 705 * will insert the received fragments at their respective positions. 706 */ 707
708 staticstructipq *ip_create(structsk_buff *skb, structiphdr *iph, structdevice *dev)
/* */ 709 { 710 structipq *qp;
711 intmaclen;
712 intihlen;
713
714 qp = (structipq *) kmalloc(sizeof(structipq), GFP_ATOMIC);
715 if (qp == NULL)
716 { 717 printk("IP: create: no memory left !\n");
718 return(NULL);
719 skb->dev = qp->dev;
720 } 721 memset(qp, 0, sizeof(structipq));
722
723 /* 724 * Allocate memory for the MAC header. 725 * 726 * FIXME: We have a maximum MAC address size limit and define 727 * elsewhere. We should use it here and avoid the 3 kmalloc() calls 728 */ 729
730 maclen = ((unsignedlong) iph) - ((unsignedlong) skb->data);
731 qp->mac = (unsignedchar *) kmalloc(maclen, GFP_ATOMIC);
732 if (qp->mac == NULL)
733 { 734 printk("IP: create: no memory left !\n");
735 kfree_s(qp, sizeof(structipq));
736 return(NULL);
737 } 738
739 /* 740 * Allocate memory for the IP header (plus 8 octects for ICMP). 741 */ 742
743 ihlen = (iph->ihl * sizeof(unsignedlong));
744 qp->iph = (structiphdr *) kmalloc(ihlen + 8, GFP_ATOMIC);
745 if (qp->iph == NULL)
746 { 747 printk("IP: create: no memory left !\n");
748 kfree_s(qp->mac, maclen);
749 kfree_s(qp, sizeof(structipq));
750 return(NULL);
751 } 752
753 /* Fill in the structure. */ 754 memcpy(qp->mac, skb->data, maclen);
755 memcpy(qp->iph, iph, ihlen + 8);
756 qp->len = 0;
757 qp->ihlen = ihlen;
758 qp->maclen = maclen;
759 qp->fragments = NULL;
760 qp->dev = dev;
761
762 /* Start a timer for this entry. */ 763 qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */ 764 qp->timer.data = (unsignedlong) qp; /* pointer to queue */ 765 qp->timer.function = ip_expire; /* expire function */ 766 add_timer(&qp->timer);
767
768 /* Add this entry to the queue. */ 769 qp->prev = NULL;
770 cli();
771 qp->next = ipqueue;
772 if (qp->next != NULL)
773 qp->next->prev = qp;
774 ipqueue = qp;
775 sti();
776 return(qp);
777 } 778
779
780 /* 781 * See if a fragment queue is complete. 782 */ 783
784 staticintip_done(structipq *qp)
/* */ 785 { 786 structipfrag *fp;
787 intoffset;
788
789 /* Only possible if we received the final fragment. */ 790 if (qp->len == 0)
791 return(0);
792
793 /* Check all fragment offsets to see if they connect. */ 794 fp = qp->fragments;
795 offset = 0;
796 while (fp != NULL)
797 { 798 if (fp->offset > offset)
799 return(0); /* fragment(s) missing */ 800 offset = fp->end;
801 fp = fp->next;
802 } 803
804 /* All fragments are present. */ 805 return(1);
806 } 807
808
809 /* 810 * Build a new IP datagram from all its fragments. 811 * 812 * FIXME: We copy here because we lack an effective way of handling lists 813 * of bits on input. Until the new skb data handling is in I'm not going 814 * to touch this with a bargepole. This also causes a 4Kish limit on 815 * packet sizes. 816 */ 817
818 staticstructsk_buff *ip_glue(structipq *qp)
/* */ 819 { 820 structsk_buff *skb;
821 structiphdr *iph;
822 structipfrag *fp;
823 unsignedchar *ptr;
824 intcount, len;
825
826 /* 827 * Allocate a new buffer for the datagram. 828 */ 829
830 len = qp->maclen + qp->ihlen + qp->len;
831
832 if ((skb = alloc_skb(len,GFP_ATOMIC)) == NULL)
833 { 834 ip_statistics.IpReasmFails++;
835 printk("IP: queue_glue: no memory for glueing queue 0x%X\n", (int) qp);
836 ip_free(qp);
837 return(NULL);
838 } 839
840 /* Fill in the basic details. */ 841 skb->len = (len - qp->maclen);
842 skb->h.raw = skb->data;
843 skb->free = 1;
844
845 /* Copy the original MAC and IP headers into the new buffer. */ 846 ptr = (unsignedchar *) skb->h.raw;
847 memcpy(ptr, ((unsignedchar *) qp->mac), qp->maclen);
848 ptr += qp->maclen;
849 memcpy(ptr, ((unsignedchar *) qp->iph), qp->ihlen);
850 ptr += qp->ihlen;
851 skb->h.raw += qp->maclen;
852
853 count = 0;
854
855 /* Copy the data portions of all fragments into the new buffer. */ 856 fp = qp->fragments;
857 while(fp != NULL)
858 { 859 if(count+fp->len>skb->len)
860 { 861 printk("Invalid fragment list: Fragment over size.\n");
862 ip_free(qp);
863 kfree_skb(skb,FREE_WRITE);
864 ip_statistics.IpReasmFails++;
865 returnNULL;
866 } 867 memcpy((ptr + fp->offset), fp->ptr, fp->len);
868 count += fp->len;
869 fp = fp->next;
870 } 871
872 /* We glued together all fragments, so remove the queue entry. */ 873 ip_free(qp);
874
875 /* Done with all fragments. Fixup the new IP header. */ 876 iph = skb->h.iph;
877 iph->frag_off = 0;
878 iph->tot_len = htons((iph->ihl * sizeof(unsignedlong)) + count);
879 skb->ip_hdr = iph;
880
881 ip_statistics.IpReasmOKs++;
882 return(skb);
883 } 884
885
886 /* 887 * Process an incoming IP datagram fragment. 888 */ 889
890 staticstructsk_buff *ip_defrag(structiphdr *iph, structsk_buff *skb, structdevice *dev)
/* */ 891 { 892 structipfrag *prev, *next;
893 structipfrag *tfp;
894 structipq *qp;
895 structsk_buff *skb2;
896 unsignedchar *ptr;
897 intflags, offset;
898 inti, ihl, end;
899
900 ip_statistics.IpReasmReqds++;
901
902 /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */ 903 qp = ip_find(iph);
904
905 /* Is this a non-fragmented datagram? */ 906 offset = ntohs(iph->frag_off);
907 flags = offset & ~IP_OFFSET;
908 offset &= IP_OFFSET;
909 if (((flags & IP_MF) == 0) && (offset == 0))
910 { 911 if (qp != NULL)
912 ip_free(qp); /* Huh? How could this exist?? */ 913 return(skb);
914 } 915
916 offset <<= 3; /* offset is in 8-byte chunks */ 917
918 /* 919 * If the queue already existed, keep restarting its timer as long 920 * as we still are receiving fragments. Otherwise, create a fresh 921 * queue entry. 922 */ 923
924 if (qp != NULL)
925 { 926 del_timer(&qp->timer);
927 qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */ 928 qp->timer.data = (unsignedlong) qp; /* pointer to queue */ 929 qp->timer.function = ip_expire; /* expire function */ 930 add_timer(&qp->timer);
931 } 932 else 933 { 934 /* 935 * If we failed to create it, then discard the frame 936 */ 937 if ((qp = ip_create(skb, iph, dev)) == NULL)
938 { 939 skb->sk = NULL;
940 kfree_skb(skb, FREE_READ);
941 ip_statistics.IpReasmFails++;
942 returnNULL;
943 } 944 } 945
946 /* 947 * Determine the position of this fragment. 948 */ 949
950 ihl = (iph->ihl * sizeof(unsignedlong));
951 end = offset + ntohs(iph->tot_len) - ihl;
952
953 /* 954 * Point into the IP datagram 'data' part. 955 */ 956
957 ptr = skb->data + dev->hard_header_len + ihl;
958
959 /* 960 * Is this the final fragment? 961 */ 962
963 if ((flags & IP_MF) == 0)
964 qp->len = end;
965
966 /* 967 * Find out which fragments are in front and at the back of us 968 * in the chain of fragments so far. We must know where to put 969 * this fragment, right? 970 */ 971
972 prev = NULL;
973 for(next = qp->fragments; next != NULL; next = next->next)
974 { 975 if (next->offset > offset)
976 break; /* bingo! */ 977 prev = next;
978 } 979
980 /* 981 * We found where to put this one. 982 * Check for overlap with preceeding fragment, and, if needed, 983 * align things so that any overlaps are eliminated. 984 */ 985 if (prev != NULL && offset < prev->end)
986 { 987 i = prev->end - offset;
988 offset += i; /* ptr into datagram */ 989 ptr += i; /* ptr into fragment data */ 990 } 991
992 /* 993 * Look for overlap with succeeding segments. 994 * If we can merge fragments, do it. 995 */ 996
997 for(; next != NULL; next = tfp)
998 { 999 tfp = next->next;
1000 if (next->offset >= end)
1001 break; /* no overlaps at all */1002
1003 i = end - next->offset; /* overlap is 'i' bytes */1004 next->len -= i; /* so reduce size of */1005 next->offset += i; /* next fragment */1006 next->ptr += i;
1007
1008 /* 1009 * If we get a frag size of <= 0, remove it and the packet1010 * that it goes with.1011 */1012 if (next->len <= 0)
1013 {1014 if (next->prev != NULL)
1015 next->prev->next = next->next;
1016 else1017 qp->fragments = next->next;
1018
1019 if (tfp->next != NULL)
1020 next->next->prev = next->prev;
1021
1022 kfree_skb(next->skb,FREE_READ);
1023 kfree_s(next, sizeof(structipfrag));
1024 }1025 }1026
1027 /* 1028 * Insert this fragment in the chain of fragments. 1029 */1030
1031 tfp = NULL;
1032 tfp = ip_frag_create(offset, end, skb, ptr);
1033
1034 /*1035 * No memory to save the fragment - so throw the lot1036 */1037
1038 if (!tfp)
1039 {1040 skb->sk = NULL;
1041 kfree_skb(skb, FREE_READ);
1042 returnNULL;
1043 }1044 tfp->prev = prev;
1045 tfp->next = next;
1046 if (prev != NULL)
1047 prev->next = tfp;
1048 else1049 qp->fragments = tfp;
1050
1051 if (next != NULL)
1052 next->prev = tfp;
1053
1054 /*1055 * OK, so we inserted this new fragment into the chain.1056 * Check if we now have a full IP datagram which we can1057 * bump up to the IP layer...1058 */1059
1060 if (ip_done(qp))
1061 {1062 skb2 = ip_glue(qp); /* glue together the fragments */1063 return(skb2);
1064 }1065 return(NULL);
1066 }1067
1068
1069 /*1070 * This IP datagram is too large to be sent in one piece. Break it up into1071 * smaller pieces (each of size equal to the MAC header plus IP header plus1072 * a block of the data of the original IP data part) that will yet fit in a1073 * single device frame, and queue such a frame for sending by calling the1074 * ip_queue_xmit(). Note that this is recursion, and bad things will happen1075 * if this function causes a loop...1076 *1077 * Yes this is inefficient, feel free to submit a quicker one.1078 *1079 * **Protocol Violation**1080 * We copy all the options to each fragment. !FIXME!1081 */1082
1083 voidip_fragment(structsock *sk, structsk_buff *skb, structdevice *dev, intis_frag)
/* */1084 {1085 structiphdr *iph;
1086 unsignedchar *raw;
1087 unsignedchar *ptr;
1088 structsk_buff *skb2;
1089 intleft, mtu, hlen, len;
1090 intoffset;
1091 unsignedlongflags;
1092
1093 /* 1094 * Point into the IP datagram header. 1095 */1096
1097 raw = skb->data;
1098 iph = (structiphdr *) (raw + dev->hard_header_len);
1099
1100 skb->ip_hdr = iph;
1101
1102 /* 1103 * Setup starting values. 1104 */1105
1106 hlen = (iph->ihl * sizeof(unsignedlong));
1107 left = ntohs(iph->tot_len) - hlen; /* Space per frame */1108 hlen += dev->hard_header_len; /* Total header size */1109 mtu = (dev->mtu - hlen); /* Size of data space */1110 ptr = (raw + hlen); /* Where to start from */1111
1112 /*1113 * Check for any "DF" flag. [DF means do not fragment]1114 */1115
1116 if (ntohs(iph->frag_off) & IP_DF)
1117 {1118 ip_statistics.IpFragFails++;
1119 icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev);
1120 return;
1121 }1122
1123 /*1124 * The protocol doesn't seem to say what to do in the case that the1125 * frame + options doesn't fit the mtu. As it used to fall down dead1126 * in this case we were fortunate it didn't happen1127 */1128
1129 if(mtu<8)
1130 {1131 /* It's wrong but its better than nothing */1132 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev);
1133 ip_statistics.IpFragFails++;
1134 return;
1135 }1136
1137 /* 1138 * Fragment the datagram. 1139 */1140
1141 /*1142 * The initial offset is 0 for a complete frame. When1143 * fragmenting fragments its wherever this one starts.1144 */1145
1146 if (is_frag & 2)
1147 offset = (ntohs(iph->frag_off) & 0x1fff) << 3;
1148 else1149 offset = 0;
1150
1151
1152 /*1153 * Keep copying data until we run out.1154 */1155
1156 while(left > 0)
1157 {1158 len = left;
1159 /* IF: it doesn't fit, use 'mtu' - the data space left */1160 if (len > mtu)
1161 len = mtu;
1162 /* IF: we are not sending upto and including the packet end1163 then align the next start on an eight byte boundary */1164 if (len < left)
1165 {1166 len/=8;
1167 len*=8;
1168 }1169 /*1170 * Allocate buffer. 1171 */1172
1173 if ((skb2 = alloc_skb(len + hlen,GFP_ATOMIC)) == NULL)
1174 {1175 printk("IP: frag: no memory for new fragment!\n");
1176 ip_statistics.IpFragFails++;
1177 return;
1178 }1179
1180 /*1181 * Set up data on packet1182 */1183
1184 skb2->arp = skb->arp;
1185 if(skb->free==0)
1186 printk("IP fragmenter: BUG free!=1 in fragmenter\n");
1187 skb2->free = 1;
1188 skb2->len = len + hlen;
1189 skb2->h.raw=(char *) skb2->data;
1190 /*1191 * Charge the memory for the fragment to any owner1192 * it might posess1193 */1194
1195 save_flags(flags);
1196 if (sk)
1197 {1198 cli();
1199 sk->wmem_alloc += skb2->mem_len;
1200 skb2->sk=sk;
1201 }1202 restore_flags(flags);
1203 skb2->raddr = skb->raddr; /* For rebuild_header - must be here */1204
1205 /* 1206 * Copy the packet header into the new buffer. 1207 */1208
1209 memcpy(skb2->h.raw, raw, hlen);
1210
1211 /*1212 * Copy a block of the IP datagram. 1213 */1214 memcpy(skb2->h.raw + hlen, ptr, len);
1215 left -= len;
1216
1217 skb2->h.raw+=dev->hard_header_len;
1218
1219 /*1220 * Fill in the new header fields. 1221 */1222 iph = (structiphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
1223 iph->frag_off = htons((offset >> 3));
1224 /* 1225 * Added AC : If we are fragmenting a fragment thats not the1226 * last fragment then keep MF on each bit 1227 */1228 if (left > 0 || (is_frag & 1))
1229 iph->frag_off |= htons(IP_MF);
1230 ptr += len;
1231 offset += len;
1232
1233 /* 1234 * Put this fragment into the sending queue. 1235 */1236
1237 ip_statistics.IpFragCreates++;
1238
1239 ip_queue_xmit(sk, dev, skb2, 2);
1240 }1241 ip_statistics.IpFragOKs++;
1242 }1243
1244
1245
1246 #ifdefCONFIG_IP_FORWARD1247
1248 /* 1249 * Forward an IP datagram to its next destination. 1250 */1251
1252 staticvoidip_forward(structsk_buff *skb, structdevice *dev, intis_frag)
/* */1253 {1254 structdevice *dev2; /* Output device */1255 structiphdr *iph; /* Our header */1256 structsk_buff *skb2; /* Output packet */1257 structrtable *rt; /* Route we use */1258 unsignedchar *ptr; /* Data pointer */1259 unsignedlongraddr; /* Router IP address */1260
1261 /*1262 * According to the RFC, we must first decrease the TTL field. If1263 * that reaches zero, we must reply an ICMP control message telling1264 * that the packet's lifetime expired.1265 *1266 * Exception:1267 * We may not generate an ICMP for an ICMP. icmp_send does the1268 * enforcement of this so we can forget it here. It is however1269 * sometimes VERY important.1270 */1271
1272 iph = skb->h.iph;
1273 iph->ttl--;
1274 if (iph->ttl <= 0)
1275 {1276 /* Tell the sender its packet died... */1277 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, dev);
1278 return;
1279 }1280
1281 /* 1282 * Re-compute the IP header checksum. 1283 * This is inefficient. We know what has happened to the header1284 * and could thus adjust the checksum as Phil Karn does in KA9Q1285 */1286
1287 ip_send_check(iph);
1288
1289 /*1290 * OK, the packet is still valid. Fetch its destination address,1291 * and give it to the IP sender for further processing.1292 */1293
1294 rt = ip_rt_route(iph->daddr, NULL, NULL);
1295 if (rt == NULL)
1296 {1297 /*1298 * Tell the sender its packet cannot be delivered. Again1299 * ICMP is screened later.1300 */1301 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, dev);
1302 return;
1303 }1304
1305
1306 /*1307 * Gosh. Not only is the packet valid; we even know how to1308 * forward it onto its final destination. Can we say this1309 * is being plain lucky?1310 * If the router told us that there is no GW, use the dest.1311 * IP address itself- we seem to be connected directly...1312 */1313
1314 raddr = rt->rt_gateway;
1315
1316 if (raddr != 0)
1317 {1318 /*1319 * There is a gateway so find the correct route for it.1320 * Gateways cannot in turn be gatewayed.1321 */1322 rt = ip_rt_route(raddr, NULL, NULL);
1323 if (rt == NULL)
1324 {1325 /* 1326 * Tell the sender its packet cannot be delivered... 1327 */1328 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, dev);
1329 return;
1330 }1331 if (rt->rt_gateway != 0)
1332 raddr = rt->rt_gateway;
1333 }1334 else1335 raddr = iph->daddr;
1336
1337 /*1338 * Having picked a route we can now send the frame out.1339 */1340
1341 dev2 = rt->rt_dev;
1342
1343 /*1344 * In IP you never forward a frame on the interface that it arrived1345 * upon. We should generate an ICMP HOST REDIRECT giving the route1346 * we calculated.1347 * For now just dropping the packet is an acceptable compromise.1348 */1349
1350 if (dev == dev2)
1351 return;
1352
1353 /*1354 * We now allocate a new buffer, and copy the datagram into it.1355 * If the indicated interface is up and running, kick it.1356 */1357
1358 if (dev2->flags & IFF_UP)
1359 {1360
1361 /*1362 * Current design decrees we copy the packet. For identical header1363 * lengths we could avoid it. The new skb code will let us push1364 * data so the problem goes away then.1365 */1366
1367 skb2 = alloc_skb(dev2->hard_header_len + skb->len, GFP_ATOMIC);
1368 /*1369 * This is rare and since IP is tolerant of network failures1370 * quite harmless.1371 */1372 if (skb2 == NULL)
1373 {1374 printk("\nIP: No memory available for IP forward\n");
1375 return;
1376 }1377 ptr = skb2->data;
1378 skb2->free = 1;
1379 skb2->len = skb->len + dev2->hard_header_len;
1380 skb2->h.raw = ptr;
1381
1382 /* 1383 * Copy the packet data into the new buffer. 1384 */1385 memcpy(ptr + dev2->hard_header_len, skb->h.raw, skb->len);
1386
1387 /* Now build the MAC header. */1388 (void) ip_send(skb2, raddr, skb->len, dev2, dev2->pa_addr);
1389
1390 ip_statistics.IpForwDatagrams++;
1391
1392 /*1393 * See if it needs fragmenting. Note in ip_rcv we tagged1394 * the fragment type. This must be right so that1395 * the fragmenter does the right thing.1396 */1397
1398 if(skb2->len > dev2->mtu)
1399 {1400 ip_fragment(NULL,skb2,dev2, is_frag);
1401 kfree_skb(skb2,FREE_WRITE);
1402 }1403 else1404 {1405 /*1406 * Map service types to priority. We lie about1407 * throughput being low priority, but its a good1408 * choice to help improve general usage.1409 */1410 if(iph->tos & IPTOS_LOWDELAY)
1411 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
1412 elseif(iph->tos & IPTOS_THROUGHPUT)
1413 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
1414 else1415 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
1416 }1417 }1418 }1419
1420
1421 #endif1422
1423 /*1424 * This function receives all incoming IP datagrams. 1425 */1426
1427 intip_rcv(structsk_buff *skb, structdevice *dev, structpacket_type *pt)
/* */1428 {1429 structiphdr *iph = skb->h.iph;
1430 unsignedcharhash;
1431 unsignedcharflag = 0;
1432 unsignedcharopts_p = 0; /* Set iff the packet has options. */1433 structinet_protocol *ipprot;
1434 staticstructoptionsopt; /* since we don't use these yet, and they1435 take up stack space. */1436 intbrd;
1437 intis_frag=0;
1438
1439
1440 ip_statistics.IpInReceives++;
1441
1442 /*1443 * Tag the ip header of this packet so we can find it1444 */1445
1446 skb->ip_hdr = iph;
1447
1448 /*1449 * Is the datagram acceptable? 1450 *1451 * 1. Length at least the size of an ip header1452 * 2. Version of 41453 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums]1454 * (4. We ought to check for IP multicast addresses and undefined types.. does this matter ?)1455 */1456
1457 if (skb->len<sizeof(structiphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsignedchar *)iph, iph->ihl) !=0)
1458 {1459 ip_statistics.IpInHdrErrors++;
1460 kfree_skb(skb, FREE_WRITE);
1461 return(0);
1462 }1463
1464 /*1465 * Our transport medium may have padded the buffer out. Now we know it1466 * is IP we can trim to the true length of the frame.1467 */1468
1469 skb->len=ntohs(iph->tot_len);
1470
1471 /*1472 * Next anaylse the packet for options. Studies show under one packet in1473 * a thousand have options....1474 */1475
1476 if (iph->ihl != 5)
1477 {/* Fast path for the typical optionless IP packet. */1478 memset((char *) &opt, 0, sizeof(opt));
1479 if (do_options(iph, &opt) != 0)
1480 return 0;
1481 opts_p = 1;
1482 }1483
1484 /*1485 * Remember if the frame is fragmented.1486 */1487
1488 if (iph->frag_off & 0x0020)
1489 is_frag|=1;
1490
1491 /*1492 * Last fragment ?1493 */1494
1495 if (ntohs(iph->frag_off) & 0x1fff)
1496 is_frag|=2;
1497
1498 /* 1499 * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday. 1500 *1501 * This is inefficient. While finding out if it is for us we could also compute1502 * the routing table entry. This is where the great unified cache theory comes1503 * in as and when someone impliments it1504 */1505
1506 if ((brd = ip_chk_addr(iph->daddr)) == 0)
1507 {1508 /*1509 * Don't forward multicast or broadcast frames.1510 */1511
1512 if(skb->pkt_type!=PACKET_HOST)
1513 {1514 kfree_skb(skb,FREE_WRITE);
1515 return 0;
1516 }1517
1518 /*1519 * The packet is for another target. Forward the frame1520 */1521
1522 #ifdefCONFIG_IP_FORWARD1523 ip_forward(skb, dev, is_frag);
1524 #else1525 printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n",
1526 iph->saddr,iph->daddr);
1527 ip_statistics.IpInAddrErrors++;
1528 #endif1529 /*1530 * The forwarder is inefficient and copies the packet. We 1531 * free the original now.1532 */1533
1534 kfree_skb(skb, FREE_WRITE);
1535 return(0);
1536 }1537
1538 /*1539 * Reassemble IP fragments. 1540 */1541
1542 if(is_frag)
1543 {1544 /* Defragment. Obtain the complete packet if there is one */1545 skb=ip_defrag(iph,skb,dev);
1546 if(skb==NULL)
1547 return 0;
1548 iph=skb->h.iph;
1549 }1550
1551 /*1552 * Point into the IP datagram, just past the header. 1553 */1554
1555 skb->ip_hdr = iph;
1556 skb->h.raw += iph->ihl*4;
1557
1558 /*1559 * skb->h.raw now points at the protocol beyond the IP header.1560 */1561
1562 hash = iph->protocol & (MAX_INET_PROTOS -1);
1563 for (ipprot = (structinet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(structinet_protocol *)ipprot->next)
1564 {1565 structsk_buff *skb2;
1566
1567 if (ipprot->protocol != iph->protocol)
1568 continue;
1569 /*1570 * See if we need to make a copy of it. This will1571 * only be set if more than one protocol wants it. 1572 * and then not for the last one.1573 *1574 * This is an artifact of poor upper protocol design. 1575 * Because the upper protocols damage the actual packet1576 * we must do copying. In actual fact it's even worse1577 * than this as TCP may hold on to the buffer.1578 */1579 if (ipprot->copy)
1580 {1581 #if 0
1582 skb2 = alloc_skb(skb->mem_len-sizeof(structsk_buff), GFP_ATOMIC);
1583 if (skb2 == NULL)
1584 continue;
1585 memcpy(skb2, skb, skb2->mem_len);
1586 skb2->ip_hdr = (structiphdr *)(
1587 (unsignedlong)skb2 +
1588 (unsignedlong) skb->ip_hdr -
1589 (unsignedlong)skb);
1590 skb2->h.raw = (unsignedchar *)(
1591 (unsignedlong)skb2 +
1592 (unsignedlong) skb->h.raw -
1593 (unsignedlong)skb);
1594 skb2->free=1;
1595 #else1596 skb2 = skb_clone(skb, GFP_ATOMIC);
1597 if(skb2==NULL)
1598 continue;
1599 #endif1600 }1601 else1602 {1603 skb2 = skb;
1604 }1605 flag = 1;
1606
1607 /*1608 * Pass on the datagram to each protocol that wants it,1609 * based on the datagram protocol. We should really1610 * check the protocol handler's return values here...1611 */1612 ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr,
1613 (ntohs(iph->tot_len) - (iph->ihl * 4)),
1614 iph->saddr, 0, ipprot);
1615
1616 }1617
1618 /*1619 * All protocols checked.1620 * If this packet was a broadcast, we may *not* reply to it, since that1621 * causes (proven, grin) ARP storms and a leakage of memory (i.e. all1622 * ICMP reply messages get queued up for transmission...)1623 */1624
1625 if (!flag)
1626 {1627 if (brd != IS_BROADCAST)
1628 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
1629 kfree_skb(skb, FREE_WRITE);
1630 }1631
1632 return(0);
1633 }1634
1635
1636 /*1637 * Queues a packet to be sent, and starts the transmitter1638 * if necessary. if free = 1 then we free the block after1639 * transmit, otherwise we don't. If free==2 we not only1640 * free the block but also dont assign a new ip seq number.1641 * This routine also needs to put in the total length,1642 * and compute the checksum1643 */1644
1645 voidip_queue_xmit(structsock *sk, structdevice *dev,
/* */1646 structsk_buff *skb, intfree)
1647 {1648 structiphdr *iph;
1649 unsignedchar *ptr;
1650
1651 /* All buffers without an owner socket get freed */1652 if (sk == NULL)
1653 free = 1;
1654
1655 /* Sanity check */1656 if (dev == NULL)
1657 {1658 printk("IP: ip_queue_xmit dev = NULL\n");
1659 return;
1660 }1661
1662 IS_SKB(skb);
1663
1664 /*1665 * Do some book-keeping in the packet for later1666 */1667
1668
1669 skb->dev = dev;
1670 skb->when = jiffies;
1671
1672 /*1673 * Find the IP header and set the length. This is bad1674 * but once we get the skb data handling code in the1675 * hardware will push its header sensibly and we will1676 * set skb->ip_hdr to avoid this mess and the fixed1677 * header length problem1678 */1679
1680 ptr = skb->data;
1681 ptr += dev->hard_header_len;
1682 iph = (structiphdr *)ptr;
1683 skb->ip_hdr = iph;
1684 iph->tot_len = ntohs(skb->len-dev->hard_header_len);
1685
1686 /*1687 * No reassigning numbers to fragments...1688 */1689
1690 if(free!=2)
1691 iph->id = htons(ip_id_count++);
1692 else1693 free=1;
1694
1695 skb->free = free;
1696
1697 /*1698 * Do we need to fragment. Again this is inefficient. 1699 * We need to somehow lock the original buffer and use1700 * bits of it.1701 */1702
1703 if(skb->len > dev->mtu)
1704 {1705 ip_fragment(sk,skb,dev,0);
1706 IS_SKB(skb);
1707 kfree_skb(skb,FREE_WRITE);
1708 return;
1709 }1710
1711 /*1712 * Add an IP checksum1713 */1714
1715 ip_send_check(iph);
1716
1717 /*1718 * Print the frame when debugging1719 */1720
1721 /*1722 * More debugging. You cannot queue a packet already on a list1723 * Spot this and moan loudly.1724 */1725 if (skb->next != NULL)
1726 {1727 printk("ip_queue_xmit: next != NULL\n");
1728 skb_unlink(skb);
1729 }1730
1731 /*1732 * If a sender wishes the packet to remain unfreed1733 * we add it to his send queue. This arguably belongs1734 * in the TCP level since nobody elses uses it. BUT1735 * remember IPng might change all the rules.1736 */1737
1738 if (!free)
1739 {1740 unsignedlongflags;
1741 /* The socket now has more outstanding blocks */1742
1743 sk->packets_out++;
1744
1745 /* Protect the list for a moment */1746 save_flags(flags);
1747 cli();
1748
1749 if (skb->link3 != NULL)
1750 {1751 printk("ip.c: link3 != NULL\n");
1752 skb->link3 = NULL;
1753 }1754 if (sk->send_head == NULL)
1755 {1756 sk->send_tail = skb;
1757 sk->send_head = skb;
1758 }1759 else1760 {1761 sk->send_tail->link3 = skb;
1762 sk->send_tail = skb;
1763 }1764 /* skb->link3 is NULL */1765
1766 /* Interrupt restore */1767 restore_flags(flags);
1768 /* Set the IP write timeout to the round trip time for the packet.1769 If an acknowledge has not arrived by then we may wish to act */1770 reset_timer(sk, TIME_WRITE, sk->rto);
1771 }1772 else1773 /* Remember who owns the buffer */1774 skb->sk = sk;
1775
1776 /*1777 * If the indicated interface is up and running, send the packet. 1778 */1779 ip_statistics.IpOutRequests++;
1780
1781 if (dev->flags & IFF_UP)
1782 {1783 /* 1784 * If we have an owner use its priority setting,1785 * otherwise use NORMAL1786 */1787
1788 if (sk != NULL)
1789 {1790 dev_queue_xmit(skb, dev, sk->priority);
1791 }1792 else1793 {1794 dev_queue_xmit(skb, dev, SOPRI_NORMAL);
1795 }1796 }1797 else1798 {1799 ip_statistics.IpOutDiscards++;
1800 if (free)
1801 kfree_skb(skb, FREE_WRITE);
1802 }1803 }1804
1805
1806 /*1807 * A socket has timed out on its send queue and wants to do a1808 * little retransmitting. Currently this means TCP.1809 */1810
1811 voidip_do_retransmit(structsock *sk, intall)
/* */1812 {1813 structsk_buff * skb;
1814 structproto *prot;
1815 structdevice *dev;
1816 intretransmits;
1817
1818 prot = sk->prot;
1819 skb = sk->send_head;
1820 retransmits = sk->retransmits;
1821
1822 while (skb != NULL)
1823 {1824 dev = skb->dev;
1825 IS_SKB(skb);
1826 skb->when = jiffies;
1827
1828 /* 1829 * In general it's OK just to use the old packet. However we1830 * need to use the current ack and window fields. Urg and 1831 * urg_ptr could possibly stand to be updated as well, but we 1832 * don't keep the necessary data. That shouldn't be a problem,1833 * if the other end is doing the right thing. Since we're 1834 * changing the packet, we have to issue a new IP identifier.1835 */1836
1837 /* this check may be unnecessary - retransmit only for TCP */1838 if (sk->protocol == IPPROTO_TCP) {1839 structtcphdr *th;
1840 structiphdr *iph;
1841 intsize;
1842
1843 iph = (structiphdr *)(skb->data + dev->hard_header_len);
1844 th = (structtcphdr *)(((char *)iph) + (iph->ihl << 2));
1845 size = skb->len - (((unsignedchar *) th) - skb->data);
1846
1847 iph->id = htons(ip_id_count++);
1848 ip_send_check(iph);
1849
1850 th->ack_seq = ntohl(sk->acked_seq);
1851 th->window = ntohs(tcp_select_window(sk));
1852 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
1853 }1854
1855 /* 1856 * If the interface is (still) up and running, kick it. 1857 */1858
1859 if (dev->flags & IFF_UP)
1860 {1861 /*1862 * If the packet is still being sent by the device/protocol1863 * below then don't retransmit. This is both needed, and good -1864 * especially with connected mode AX.25 where it stops resends1865 * occuring of an as yet unsent anyway frame!1866 * We still add up the counts as the round trip time wants1867 * adjusting.1868 */1869 if (sk && !skb_device_locked(skb))
1870 {1871 /* Remove it from any existing driver queue first! */1872 skb_unlink(skb);
1873 /* Now queue it */1874 ip_statistics.IpOutRequests++;
1875 dev_queue_xmit(skb, dev, sk->priority);
1876 }1877 }1878
1879 /*1880 * Count retransmissions1881 */1882 retransmits++;
1883 sk->prot->retransmits ++;
1884
1885 /*1886 * Only one retransmit requested.1887 */1888 if (!all)
1889 break;
1890
1891 /*1892 * This should cut it off before we send too many packets. 1893 */1894 if (sk->retransmits > sk->cong_window)
1895 break;
1896 skb = skb->link3;
1897 }1898 }1899
1900 /*1901 * This is the normal code called for timeouts. It does the retransmission1902 * and then does backoff. ip_do_retransmit is separated out because1903 * tcp_ack needs to send stuff from the retransmit queue without1904 * initiating a backoff.1905 */1906
1907 voidip_retransmit(structsock *sk, intall)
/* */1908 {1909 ip_do_retransmit(sk, all);
1910
1911 /*1912 * Increase the timeout each time we retransmit. Note that1913 * we do not increase the rtt estimate. rto is initialized1914 * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests1915 * that doubling rto each time is the least we can get away with.1916 * In KA9Q, Karn uses this for the first few times, and then1917 * goes to quadratic. netBSD doubles, but only goes up to *64,1918 * and clamps at 1 to 64 sec afterwards. Note that 120 sec is1919 * defined in the protocol as the maximum possible RTT. I guess1920 * we'll have to use something other than TCP to talk to the1921 * University of Mars.1922 */1923
1924 sk->retransmits++;
1925 sk->backoff++;
1926 sk->rto = min(sk->rto << 1, 120*HZ);
1927 reset_timer(sk, TIME_WRITE, sk->rto);
1928 }1929
1930 /*1931 * Socket option code for IP. This is the end of the line after any TCP,UDP etc options on1932 * an IP socket.1933 *1934 * We impliment IP_TOS (type of service), IP_TTL (time to live).1935 *1936 * Next release we will sort out IP_OPTIONS since for some people are kind of important.1937 */1938
1939 intip_setsockopt(structsock *sk, intlevel, intoptname, char *optval, intoptlen)
/* */1940 {1941 intval,err;
1942
1943 if (optval == NULL)
1944 return(-EINVAL);
1945
1946 err=verify_area(VERIFY_READ, optval, sizeof(int));
1947 if(err)
1948 returnerr;
1949
1950 val = get_fs_long((unsignedlong *)optval);
1951
1952 if(level!=SOL_IP)
1953 return -EOPNOTSUPP;
1954
1955 switch(optname)
1956 {1957 caseIP_TOS:
1958 if(val<0||val>255)
1959 return -EINVAL;
1960 sk->ip_tos=val;
1961 return 0;
1962 caseIP_TTL:
1963 if(val<1||val>255)
1964 return -EINVAL;
1965 sk->ip_ttl=val;
1966 return 0;
1967 /* IP_OPTIONS and friends go here eventually */1968 default:
1969 return(-ENOPROTOOPT);
1970 }1971 }1972
1973 /*1974 * Get the options. Note for future reference. The GET of IP options gets the1975 * _received_ ones. The set sets the _sent_ ones.1976 */1977
1978 intip_getsockopt(structsock *sk, intlevel, intoptname, char *optval, int *optlen)
/* */1979 {1980 intval,err;
1981
1982 if(level!=SOL_IP)
1983 return -EOPNOTSUPP;
1984
1985 switch(optname)
1986 {1987 caseIP_TOS:
1988 val=sk->ip_tos;
1989 break;
1990 caseIP_TTL:
1991 val=sk->ip_ttl;
1992 break;
1993 default:
1994 return(-ENOPROTOOPT);
1995 }1996 err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
1997 if(err)
1998 returnerr;
1999 put_fs_long(sizeof(int),(unsignedlong *) optlen);
2000
2001 err=verify_area(VERIFY_WRITE, optval, sizeof(int));
2002 if(err)
2003 returnerr;
2004 put_fs_long(val,(unsignedlong *)optval);
2005
2006 return(0);
2007 }2008
2009 /*2010 * IP protocol layer initialiser2011 */2012
2013 staticstructpacket_typeip_packet_type =
2014 {2015 0, /* MUTTER ntohs(ETH_P_IP),*/2016 0, /* copy */2017 ip_rcv,
2018 NULL,
2019 NULL,
2020 };
2021
2022
2023 /*2024 * IP registers the packet type and then calls the subprotocol initialisers2025 */2026
2027 voidip_init(void)
/* */2028 {2029 ip_packet_type.type=htons(ETH_P_IP);
2030 dev_add_pack(&ip_packet_type);
2031 /* ip_raw_init();2032 ip_packet_init();2033 ip_tcp_init();2034 ip_udp_init();*/2035 }