1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * The Internet Protocol (IP) module. 7 * 8 * Version: @(#)ip.c 1.0.16b 9/1/93 9 * 10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu> 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Donald Becker, <becker@super.org> 13 * Alan Cox, <gw4pts@gw4pts.ampr.org> 14 * 15 * Fixes: 16 * Alan Cox : Commented a couple of minor bits of surplus code 17 * Alan Cox : Undefining IP_FORWARD doesn't include the code 18 * (just stops a compiler warning). 19 * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes 20 * are junked rather than corrupting things. 21 * Alan Cox : Frames to bad broadcast subnets are dumped 22 * We used to process them non broadcast and 23 * boy could that cause havoc. 24 * Alan Cox : ip_forward sets the free flag on the 25 * new frame it queues. Still crap because 26 * it copies the frame but at least it 27 * doesn't eat memory too. 28 * Alan Cox : Generic queue code and memory fixes. 29 * Fred Van Kempen : IP fragment support (borrowed from NET2E) 30 * Gerhard Koerting: Forward fragmented frames correctly. 31 * Gerhard Koerting: Fixes to my fix of the above 8-). 32 * Gerhard Koerting: IP interface addressing fix. 33 * Linus Torvalds : More robustness checks 34 * Alan Cox : Even more checks: Still not as robust as it ought to be 35 * Alan Cox : Save IP header pointer for later 36 * Alan Cox : ip option setting 37 * Alan Cox : Use ip_tos/ip_ttl settings 38 * Alan Cox : Fragmentation bogosity removed 39 * (Thanks to Mark.Bush@prg.ox.ac.uk) 40 * Dmitry Gorodchanin : Send of a raw packet crash fix. 41 * Alan Cox : Silly ip bug when an overlength 42 * fragment turns up. Now frees the 43 * queue. 44 * Linus Torvalds/ : Memory leakage on fragmentation 45 * Alan Cox : handling. 46 * Gerhard Koerting: Forwarding uses IP priority hints 47 * Teemu Rantanen : Fragment problems. 48 * Alan Cox : General cleanup, comments and reformat 49 * Alan Cox : SNMP statistics 50 * Alan Cox : BSD address rule semantics. Also see 51 * UDP as there is a nasty checksum issue 52 * if you do things the wrong way. 53 * Alan Cox : Always defrag, moved IP_FORWARD to the config.in file 54 * Alan Cox : IP options adjust sk->priority. 55 * Pedro Roque : Fix mtu/length error in ip_forward. 56 * Alan Cox : Avoid ip_chk_addr when possible. 57 * 58 * To Fix: 59 * IP option processing is mostly not needed. ip_forward needs to know about routing rules 60 * and time stamp but that's about all. Use the route mtu field here too 61 * 62 * This program is free software; you can redistribute it and/or 63 * modify it under the terms of the GNU General Public License 64 * as published by the Free Software Foundation; either version 65 * 2 of the License, or (at your option) any later version. 66 */ 67 #include <asm/segment.h>
68 #include <asm/system.h>
69 #include <linux/types.h>
70 #include <linux/kernel.h>
71 #include <linux/sched.h>
72 #include <linux/string.h>
73 #include <linux/errno.h>
74 #include <linux/socket.h>
75 #include <linux/sockios.h>
76 #include <linux/in.h>
77 #include <linux/inet.h>
78 #include <linux/netdevice.h>
79 #include <linux/etherdevice.h>
80 #include "snmp.h"
81 #include "ip.h"
82 #include "protocol.h"
83 #include "route.h"
84 #include "tcp.h"
85 #include <linux/skbuff.h>
86 #include "sock.h"
87 #include "arp.h"
88 #include "icmp.h"
89
90 #define CONFIG_IP_DEFRAG
91
92 externintlast_retran;
93 externvoidsort_send(structsock *sk);
94
95 #definemin(a,b) ((a)<(b)?(a):(b))
96 #defineLOOPBACK(x) (((x) & htonl(0xff000000)) == htonl(0x7f000000))
97
98 /* 99 * SNMP management statistics 100 */ 101
102 structip_mibip_statistics={1,64,}; /* Forwarding=Yes, Default TTL=64 */ 103
104 /* 105 * Handle the issuing of an ioctl() request 106 * for the ip device. This is scheduled to 107 * disappear 108 */ 109
110 intip_ioctl(structsock *sk, intcmd, unsignedlongarg)
/* */ 111 { 112 switch(cmd)
113 { 114 default:
115 return(-EINVAL);
116 } 117 } 118
119
120 /* these two routines will do routing. */ 121
122 staticvoid 123 strict_route(structiphdr *iph, structoptions *opt)
/* */ 124 { 125 } 126
127
128 staticvoid 129 loose_route(structiphdr *iph, structoptions *opt)
/* */ 130 { 131 } 132
133
134
135
136 /* This routine will check to see if we have lost a gateway. */ 137 void 138 ip_route_check(unsignedlongdaddr)
/* */ 139 { 140 } 141
142
143 #if 0
144 /* this routine puts the options at the end of an ip header. */ 145 staticint 146 build_options(structiphdr *iph, structoptions *opt)
/* */ 147 { 148 unsignedchar *ptr;
149 /* currently we don't support any options. */ 150 ptr = (unsignedchar *)(iph+1);
151 *ptr = 0;
152 return (4);
153 } 154 #endif 155
156
157 /* 158 * Take an skb, and fill in the MAC header. 159 */ 160
161 staticintip_send(structsk_buff *skb, unsignedlongdaddr, intlen, structdevice *dev, unsignedlongsaddr)
/* */ 162 { 163 intmac = 0;
164
165 skb->dev = dev;
166 skb->arp = 1;
167 if (dev->hard_header)
168 { 169 /* 170 * Build a hardware header. Source address is our mac, destination unknown 171 * (rebuild header will sort this out) 172 */ 173 mac = dev->hard_header(skb->data, dev, ETH_P_IP, NULL, NULL, len, skb);
174 if (mac < 0)
175 { 176 mac = -mac;
177 skb->arp = 0;
178 skb->raddr = daddr; /* next routing address */ 179 } 180 } 181 returnmac;
182 } 183
184 intip_id_count = 0;
185
186 /* 187 * This routine builds the appropriate hardware/IP headers for 188 * the routine. It assumes that if *dev != NULL then the 189 * protocol knows what it's doing, otherwise it uses the 190 * routing/ARP tables to select a device struct. 191 */ 192 intip_build_header(structsk_buff *skb, unsignedlongsaddr, unsignedlongdaddr,
/* */ 193 structdevice **dev, inttype, structoptions *opt, intlen, inttos, intttl)
194 { 195 staticstructoptionsoptmem;
196 structiphdr *iph;
197 structrtable *rt;
198 unsignedchar *buff;
199 unsignedlongraddr;
200 inttmp;
201 unsignedlongsrc;
202
203 /* 204 * If there is no 'from' address as yet, then make it our loopback 205 */ 206
207 if (saddr == 0)
208 saddr = ip_my_addr();
209
210 buff = skb->data;
211
212 /* 213 * See if we need to look up the device. 214 */ 215
216 if (*dev == NULL)
217 { 218 if(skb->localroute)
219 rt = ip_rt_local(daddr, &optmem, &src);
220 else 221 rt = ip_rt_route(daddr, &optmem, &src);
222 if (rt == NULL)
223 { 224 ip_statistics.IpOutNoRoutes++;
225 return(-ENETUNREACH);
226 } 227
228 *dev = rt->rt_dev;
229 /* 230 * If the frame is from us and going off machine it MUST MUST MUST 231 * have the output device ip address and never the loopback 232 */ 233 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
234 saddr = src;/*rt->rt_dev->pa_addr;*/ 235 raddr = rt->rt_gateway;
236
237 opt = &optmem;
238 } 239 else 240 { 241 /* 242 * We still need the address of the first hop. 243 */ 244 if(skb->localroute)
245 rt = ip_rt_local(daddr, &optmem, &src);
246 else 247 rt = ip_rt_route(daddr, &optmem, &src);
248 /* 249 * If the frame is from us and going off machine it MUST MUST MUST 250 * have the output device ip address and never the loopback 251 */ 252 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
253 saddr = src;/*rt->rt_dev->pa_addr;*/ 254
255 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
256 } 257
258 /* 259 * No gateway so aim at the real destination 260 */ 261 if (raddr == 0)
262 raddr = daddr;
263
264 /* 265 * Now build the MAC header. 266 */ 267
268 tmp = ip_send(skb, raddr, len, *dev, saddr);
269 buff += tmp;
270 len -= tmp;
271
272 /* 273 * Book keeping 274 */ 275
276 skb->dev = *dev;
277 skb->saddr = saddr;
278 if (skb->sk)
279 skb->sk->saddr = saddr;
280
281 /* 282 * Now build the IP header. 283 */ 284
285 /* 286 * If we are using IPPROTO_RAW, then we don't need an IP header, since 287 * one is being supplied to us by the user 288 */ 289
290 if(type == IPPROTO_RAW)
291 return (tmp);
292
293 iph = (structiphdr *)buff;
294 iph->version = 4;
295 iph->tos = tos;
296 iph->frag_off = 0;
297 iph->ttl = ttl;
298 iph->daddr = daddr;
299 iph->saddr = saddr;
300 iph->protocol = type;
301 iph->ihl = 5;
302
303 /* Setup the IP options. */ 304 #ifdef Not_Yet_Avail
305 build_options(iph, opt);
306 #endif 307
308 return(20 + tmp); /* IP header plus MAC header size */ 309 } 310
311
312 staticint 313 do_options(structiphdr *iph, structoptions *opt)
/* */ 314 { 315 unsignedchar *buff;
316 intdone = 0;
317 inti, len = sizeof(structiphdr);
318
319 /* Zero out the options. */ 320 opt->record_route.route_size = 0;
321 opt->loose_route.route_size = 0;
322 opt->strict_route.route_size = 0;
323 opt->tstamp.ptr = 0;
324 opt->security = 0;
325 opt->compartment = 0;
326 opt->handling = 0;
327 opt->stream = 0;
328 opt->tcc = 0;
329 return(0);
330
331 /* Advance the pointer to start at the options. */ 332 buff = (unsignedchar *)(iph + 1);
333
334 /* Now start the processing. */ 335 while (!done && len < iph->ihl*4) switch(*buff) { 336 caseIPOPT_END:
337 done = 1;
338 break;
339 caseIPOPT_NOOP:
340 buff++;
341 len++;
342 break;
343 caseIPOPT_SEC:
344 buff++;
345 if (*buff != 11) return(1);
346 buff++;
347 opt->security = ntohs(*(unsignedshort *)buff);
348 buff += 2;
349 opt->compartment = ntohs(*(unsignedshort *)buff);
350 buff += 2;
351 opt->handling = ntohs(*(unsignedshort *)buff);
352 buff += 2;
353 opt->tcc = ((*buff) << 16) + ntohs(*(unsignedshort *)(buff+1));
354 buff += 3;
355 len += 11;
356 break;
357 caseIPOPT_LSRR:
358 buff++;
359 if ((*buff - 3)% 4 != 0) return(1);
360 len += *buff;
361 opt->loose_route.route_size = (*buff -3)/4;
362 buff++;
363 if (*buff % 4 != 0) return(1);
364 opt->loose_route.pointer = *buff/4 - 1;
365 buff++;
366 buff++;
367 for (i = 0; i < opt->loose_route.route_size; i++) { 368 if(i>=MAX_ROUTE)
369 return(1);
370 opt->loose_route.route[i] = *(unsignedlong *)buff;
371 buff += 4;
372 } 373 break;
374 caseIPOPT_SSRR:
375 buff++;
376 if ((*buff - 3)% 4 != 0) return(1);
377 len += *buff;
378 opt->strict_route.route_size = (*buff -3)/4;
379 buff++;
380 if (*buff % 4 != 0) return(1);
381 opt->strict_route.pointer = *buff/4 - 1;
382 buff++;
383 buff++;
384 for (i = 0; i < opt->strict_route.route_size; i++) { 385 if(i>=MAX_ROUTE)
386 return(1);
387 opt->strict_route.route[i] = *(unsignedlong *)buff;
388 buff += 4;
389 } 390 break;
391 caseIPOPT_RR:
392 buff++;
393 if ((*buff - 3)% 4 != 0) return(1);
394 len += *buff;
395 opt->record_route.route_size = (*buff -3)/4;
396 buff++;
397 if (*buff % 4 != 0) return(1);
398 opt->record_route.pointer = *buff/4 - 1;
399 buff++;
400 buff++;
401 for (i = 0; i < opt->record_route.route_size; i++) { 402 if(i>=MAX_ROUTE)
403 return 1;
404 opt->record_route.route[i] = *(unsignedlong *)buff;
405 buff += 4;
406 } 407 break;
408 caseIPOPT_SID:
409 len += 4;
410 buff +=2;
411 opt->stream = *(unsignedshort *)buff;
412 buff += 2;
413 break;
414 caseIPOPT_TIMESTAMP:
415 buff++;
416 len += *buff;
417 if (*buff % 4 != 0) return(1);
418 opt->tstamp.len = *buff / 4 - 1;
419 buff++;
420 if ((*buff - 1) % 4 != 0) return(1);
421 opt->tstamp.ptr = (*buff-1)/4;
422 buff++;
423 opt->tstamp.x.full_char = *buff;
424 buff++;
425 for (i = 0; i < opt->tstamp.len; i++) { 426 opt->tstamp.data[i] = *(unsignedlong *)buff;
427 buff += 4;
428 } 429 break;
430 default:
431 return(1);
432 } 433
434 if (opt->record_route.route_size == 0) { 435 if (opt->strict_route.route_size != 0) { 436 memcpy(&(opt->record_route), &(opt->strict_route),
437 sizeof(opt->record_route));
438 }elseif (opt->loose_route.route_size != 0) { 439 memcpy(&(opt->record_route), &(opt->loose_route),
440 sizeof(opt->record_route));
441 } 442 } 443
444 if (opt->strict_route.route_size != 0 &&
445 opt->strict_route.route_size != opt->strict_route.pointer) { 446 strict_route(iph, opt);
447 return(0);
448 } 449
450 if (opt->loose_route.route_size != 0 &&
451 opt->loose_route.route_size != opt->loose_route.pointer) { 452 loose_route(iph, opt);
453 return(0);
454 } 455
456 return(0);
457 } 458
459 /* 460 * This is a version of ip_compute_csum() optimized for IP headers, which 461 * always checksum on 4 octet boundaries. 462 */ 463
464 staticinlineunsignedshortip_fast_csum(unsignedchar * buff, intwlen)
/* */ 465 { 466 unsignedlongsum = 0;
467
468 if (wlen)
469 { 470 unsignedlongbogus;
471 __asm__("clc\n"
472 "1:\t"
473 "lodsl\n\t"
474 "adcl %3, %0\n\t"
475 "decl %2\n\t"
476 "jne 1b\n\t"
477 "adcl $0, %0\n\t"
478 "movl %0, %3\n\t"
479 "shrl $16, %3\n\t"
480 "addw %w3, %w0\n\t"
481 "adcw $0, %w0"
482 : "=r" (sum), "=S" (buff), "=r" (wlen), "=a" (bogus)
483 : "0" (sum), "1" (buff), "2" (wlen));
484 } 485 return (~sum) & 0xffff;
486 } 487
488 /* 489 * This routine does all the checksum computations that don't 490 * require anything special (like copying or special headers). 491 */ 492
493 unsignedshortip_compute_csum(unsignedchar * buff, intlen)
/* */ 494 { 495 unsignedlongsum = 0;
496
497 /* Do the first multiple of 4 bytes and convert to 16 bits. */ 498 if (len > 3)
499 { 500 __asm__("clc\n"
501 "1:\t"
502 "lodsl\n\t"
503 "adcl %%eax, %%ebx\n\t"
504 "loop 1b\n\t"
505 "adcl $0, %%ebx\n\t"
506 "movl %%ebx, %%eax\n\t"
507 "shrl $16, %%eax\n\t"
508 "addw %%ax, %%bx\n\t"
509 "adcw $0, %%bx"
510 : "=b" (sum) , "=S" (buff)
511 : "0" (sum), "c" (len >> 2) ,"1" (buff)
512 : "ax", "cx", "si", "bx" );
513 } 514 if (len & 2)
515 { 516 __asm__("lodsw\n\t"
517 "addw %%ax, %%bx\n\t"
518 "adcw $0, %%bx"
519 : "=b" (sum), "=S" (buff)
520 : "0" (sum), "1" (buff)
521 : "bx", "ax", "si");
522 } 523 if (len & 1)
524 { 525 __asm__("lodsb\n\t"
526 "movb $0, %%ah\n\t"
527 "addw %%ax, %%bx\n\t"
528 "adcw $0, %%bx"
529 : "=b" (sum), "=S" (buff)
530 : "0" (sum), "1" (buff)
531 : "bx", "ax", "si");
532 } 533 sum =~sum;
534 return(sum & 0xffff);
535 } 536
537 /* 538 * Check the header of an incoming IP datagram. This version is still used in slhc.c. 539 */ 540
541 intip_csum(structiphdr *iph)
/* */ 542 { 543 returnip_fast_csum((unsignedchar *)iph, iph->ihl);
544 } 545
546 /* 547 * Generate a checksum for an outgoing IP datagram. 548 */ 549
550 staticvoidip_send_check(structiphdr *iph)
/* */ 551 { 552 iph->check = 0;
553 iph->check = ip_fast_csum((unsignedchar *)iph, iph->ihl);
554 } 555
556 /************************ Fragment Handlers From NET2E not yet with tweaks to beat 4K **********************************/ 557
558
559 /* 560 * This fragment handler is a bit of a heap. On the other hand it works quite 561 * happily and handles things quite well. 562 */ 563
564 staticstructipq *ipqueue = NULL; /* IP fragment queue */ 565
566 /* 567 * Create a new fragment entry. 568 */ 569
570 staticstructipfrag *ip_frag_create(intoffset, intend, structsk_buff *skb, unsignedchar *ptr)
/* */ 571 { 572 structipfrag *fp;
573
574 fp = (structipfrag *) kmalloc(sizeof(structipfrag), GFP_ATOMIC);
575 if (fp == NULL)
576 { 577 printk("IP: frag_create: no memory left !\n");
578 return(NULL);
579 } 580 memset(fp, 0, sizeof(structipfrag));
581
582 /* Fill in the structure. */ 583 fp->offset = offset;
584 fp->end = end;
585 fp->len = end - offset;
586 fp->skb = skb;
587 fp->ptr = ptr;
588
589 return(fp);
590 } 591
592
593 /* 594 * Find the correct entry in the "incomplete datagrams" queue for 595 * this IP datagram, and return the queue entry address if found. 596 */ 597
598 staticstructipq *ip_find(structiphdr *iph)
/* */ 599 { 600 structipq *qp;
601 structipq *qplast;
602
603 cli();
604 qplast = NULL;
605 for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
606 { 607 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
608 iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
609 { 610 del_timer(&qp->timer); /* So it doesn't vanish on us. The timer will be reset anyway */ 611 sti();
612 return(qp);
613 } 614 } 615 sti();
616 return(NULL);
617 } 618
619
620 /* 621 * Remove an entry from the "incomplete datagrams" queue, either 622 * because we completed, reassembled and processed it, or because 623 * it timed out. 624 */ 625
626 staticvoidip_free(structipq *qp)
/* */ 627 { 628 structipfrag *fp;
629 structipfrag *xp;
630
631 /* 632 * Stop the timer for this entry. 633 */ 634
635 del_timer(&qp->timer);
636
637 /* Remove this entry from the "incomplete datagrams" queue. */ 638 cli();
639 if (qp->prev == NULL)
640 { 641 ipqueue = qp->next;
642 if (ipqueue != NULL)
643 ipqueue->prev = NULL;
644 } 645 else 646 { 647 qp->prev->next = qp->next;
648 if (qp->next != NULL)
649 qp->next->prev = qp->prev;
650 } 651
652 /* Release all fragment data. */ 653
654 fp = qp->fragments;
655 while (fp != NULL)
656 { 657 xp = fp->next;
658 IS_SKB(fp->skb);
659 kfree_skb(fp->skb,FREE_READ);
660 kfree_s(fp, sizeof(structipfrag));
661 fp = xp;
662 } 663
664 /* Release the MAC header. */ 665 kfree_s(qp->mac, qp->maclen);
666
667 /* Release the IP header. */ 668 kfree_s(qp->iph, qp->ihlen + 8);
669
670 /* Finally, release the queue descriptor itself. */ 671 kfree_s(qp, sizeof(structipq));
672 sti();
673 } 674
675
676 /* 677 * Oops- a fragment queue timed out. Kill it and send an ICMP reply. 678 */ 679
680 staticvoidip_expire(unsignedlongarg)
/* */ 681 { 682 structipq *qp;
683
684 qp = (structipq *)arg;
685
686 /* 687 * Send an ICMP "Fragment Reassembly Timeout" message. 688 */ 689
690 ip_statistics.IpReasmTimeout++;
691 ip_statistics.IpReasmFails++;
692 /* This if is always true... shrug */ 693 if(qp->fragments!=NULL)
694 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
695 ICMP_EXC_FRAGTIME, qp->dev);
696
697 /* 698 * Nuke the fragment queue. 699 */ 700 ip_free(qp);
701 } 702
703
704 /* 705 * Add an entry to the 'ipq' queue for a newly received IP datagram. 706 * We will (hopefully :-) receive all other fragments of this datagram 707 * in time, so we just create a queue for this datagram, in which we 708 * will insert the received fragments at their respective positions. 709 */ 710
711 staticstructipq *ip_create(structsk_buff *skb, structiphdr *iph, structdevice *dev)
/* */ 712 { 713 structipq *qp;
714 intmaclen;
715 intihlen;
716
717 qp = (structipq *) kmalloc(sizeof(structipq), GFP_ATOMIC);
718 if (qp == NULL)
719 { 720 printk("IP: create: no memory left !\n");
721 return(NULL);
722 skb->dev = qp->dev;
723 } 724 memset(qp, 0, sizeof(structipq));
725
726 /* 727 * Allocate memory for the MAC header. 728 * 729 * FIXME: We have a maximum MAC address size limit and define 730 * elsewhere. We should use it here and avoid the 3 kmalloc() calls 731 */ 732
733 maclen = ((unsignedlong) iph) - ((unsignedlong) skb->data);
734 qp->mac = (unsignedchar *) kmalloc(maclen, GFP_ATOMIC);
735 if (qp->mac == NULL)
736 { 737 printk("IP: create: no memory left !\n");
738 kfree_s(qp, sizeof(structipq));
739 return(NULL);
740 } 741
742 /* 743 * Allocate memory for the IP header (plus 8 octets for ICMP). 744 */ 745
746 ihlen = (iph->ihl * sizeof(unsignedlong));
747 qp->iph = (structiphdr *) kmalloc(ihlen + 8, GFP_ATOMIC);
748 if (qp->iph == NULL)
749 { 750 printk("IP: create: no memory left !\n");
751 kfree_s(qp->mac, maclen);
752 kfree_s(qp, sizeof(structipq));
753 return(NULL);
754 } 755
756 /* Fill in the structure. */ 757 memcpy(qp->mac, skb->data, maclen);
758 memcpy(qp->iph, iph, ihlen + 8);
759 qp->len = 0;
760 qp->ihlen = ihlen;
761 qp->maclen = maclen;
762 qp->fragments = NULL;
763 qp->dev = dev;
764
765 /* Start a timer for this entry. */ 766 qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */ 767 qp->timer.data = (unsignedlong) qp; /* pointer to queue */ 768 qp->timer.function = ip_expire; /* expire function */ 769 add_timer(&qp->timer);
770
771 /* Add this entry to the queue. */ 772 qp->prev = NULL;
773 cli();
774 qp->next = ipqueue;
775 if (qp->next != NULL)
776 qp->next->prev = qp;
777 ipqueue = qp;
778 sti();
779 return(qp);
780 } 781
782
783 /* 784 * See if a fragment queue is complete. 785 */ 786
787 staticintip_done(structipq *qp)
/* */ 788 { 789 structipfrag *fp;
790 intoffset;
791
792 /* Only possible if we received the final fragment. */ 793 if (qp->len == 0)
794 return(0);
795
796 /* Check all fragment offsets to see if they connect. */ 797 fp = qp->fragments;
798 offset = 0;
799 while (fp != NULL)
800 { 801 if (fp->offset > offset)
802 return(0); /* fragment(s) missing */ 803 offset = fp->end;
804 fp = fp->next;
805 } 806
807 /* All fragments are present. */ 808 return(1);
809 } 810
811
812 /* 813 * Build a new IP datagram from all its fragments. 814 * 815 * FIXME: We copy here because we lack an effective way of handling lists 816 * of bits on input. Until the new skb data handling is in I'm not going 817 * to touch this with a bargepole. This also causes a 4Kish limit on 818 * packet sizes. 819 */ 820
821 staticstructsk_buff *ip_glue(structipq *qp)
/* */ 822 { 823 structsk_buff *skb;
824 structiphdr *iph;
825 structipfrag *fp;
826 unsignedchar *ptr;
827 intcount, len;
828
829 /* 830 * Allocate a new buffer for the datagram. 831 */ 832
833 len = qp->maclen + qp->ihlen + qp->len;
834
835 if ((skb = alloc_skb(len,GFP_ATOMIC)) == NULL)
836 { 837 ip_statistics.IpReasmFails++;
838 printk("IP: queue_glue: no memory for gluing queue 0x%X\n", (int) qp);
839 ip_free(qp);
840 return(NULL);
841 } 842
843 /* Fill in the basic details. */ 844 skb->len = (len - qp->maclen);
845 skb->h.raw = skb->data;
846 skb->free = 1;
847
848 /* Copy the original MAC and IP headers into the new buffer. */ 849 ptr = (unsignedchar *) skb->h.raw;
850 memcpy(ptr, ((unsignedchar *) qp->mac), qp->maclen);
851 ptr += qp->maclen;
852 memcpy(ptr, ((unsignedchar *) qp->iph), qp->ihlen);
853 ptr += qp->ihlen;
854 skb->h.raw += qp->maclen;
855
856 count = 0;
857
858 /* Copy the data portions of all fragments into the new buffer. */ 859 fp = qp->fragments;
860 while(fp != NULL)
861 { 862 if(count+fp->len>skb->len)
863 { 864 printk("Invalid fragment list: Fragment over size.\n");
865 ip_free(qp);
866 kfree_skb(skb,FREE_WRITE);
867 ip_statistics.IpReasmFails++;
868 returnNULL;
869 } 870 memcpy((ptr + fp->offset), fp->ptr, fp->len);
871 count += fp->len;
872 fp = fp->next;
873 } 874
875 /* We glued together all fragments, so remove the queue entry. */ 876 ip_free(qp);
877
878 /* Done with all fragments. Fixup the new IP header. */ 879 iph = skb->h.iph;
880 iph->frag_off = 0;
881 iph->tot_len = htons((iph->ihl * sizeof(unsignedlong)) + count);
882 skb->ip_hdr = iph;
883
884 ip_statistics.IpReasmOKs++;
885 return(skb);
886 } 887
888
889 /* 890 * Process an incoming IP datagram fragment. 891 */ 892
893 staticstructsk_buff *ip_defrag(structiphdr *iph, structsk_buff *skb, structdevice *dev)
/* */ 894 { 895 structipfrag *prev, *next;
896 structipfrag *tfp;
897 structipq *qp;
898 structsk_buff *skb2;
899 unsignedchar *ptr;
900 intflags, offset;
901 inti, ihl, end;
902
903 ip_statistics.IpReasmReqds++;
904
905 /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */ 906 qp = ip_find(iph);
907
908 /* Is this a non-fragmented datagram? */ 909 offset = ntohs(iph->frag_off);
910 flags = offset & ~IP_OFFSET;
911 offset &= IP_OFFSET;
912 if (((flags & IP_MF) == 0) && (offset == 0))
913 { 914 if (qp != NULL)
915 ip_free(qp); /* Huh? How could this exist?? */ 916 return(skb);
917 } 918
919 offset <<= 3; /* offset is in 8-byte chunks */ 920
921 /* 922 * If the queue already existed, keep restarting its timer as long 923 * as we still are receiving fragments. Otherwise, create a fresh 924 * queue entry. 925 */ 926
927 if (qp != NULL)
928 { 929 del_timer(&qp->timer);
930 qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */ 931 qp->timer.data = (unsignedlong) qp; /* pointer to queue */ 932 qp->timer.function = ip_expire; /* expire function */ 933 add_timer(&qp->timer);
934 } 935 else 936 { 937 /* 938 * If we failed to create it, then discard the frame 939 */ 940 if ((qp = ip_create(skb, iph, dev)) == NULL)
941 { 942 skb->sk = NULL;
943 kfree_skb(skb, FREE_READ);
944 ip_statistics.IpReasmFails++;
945 returnNULL;
946 } 947 } 948
949 /* 950 * Determine the position of this fragment. 951 */ 952
953 ihl = (iph->ihl * sizeof(unsignedlong));
954 end = offset + ntohs(iph->tot_len) - ihl;
955
956 /* 957 * Point into the IP datagram 'data' part. 958 */ 959
960 ptr = skb->data + dev->hard_header_len + ihl;
961
962 /* 963 * Is this the final fragment? 964 */ 965
966 if ((flags & IP_MF) == 0)
967 qp->len = end;
968
969 /* 970 * Find out which fragments are in front and at the back of us 971 * in the chain of fragments so far. We must know where to put 972 * this fragment, right? 973 */ 974
975 prev = NULL;
976 for(next = qp->fragments; next != NULL; next = next->next)
977 { 978 if (next->offset > offset)
979 break; /* bingo! */ 980 prev = next;
981 } 982
983 /* 984 * We found where to put this one. 985 * Check for overlap with preceding fragment, and, if needed, 986 * align things so that any overlaps are eliminated. 987 */ 988 if (prev != NULL && offset < prev->end)
989 { 990 i = prev->end - offset;
991 offset += i; /* ptr into datagram */ 992 ptr += i; /* ptr into fragment data */ 993 } 994
995 /* 996 * Look for overlap with succeeding segments. 997 * If we can merge fragments, do it. 998 */ 999
1000 for(; next != NULL; next = tfp)
1001 {1002 tfp = next->next;
1003 if (next->offset >= end)
1004 break; /* no overlaps at all */1005
1006 i = end - next->offset; /* overlap is 'i' bytes */1007 next->len -= i; /* so reduce size of */1008 next->offset += i; /* next fragment */1009 next->ptr += i;
1010
1011 /*1012 * If we get a frag size of <= 0, remove it and the packet1013 * that it goes with.1014 */1015 if (next->len <= 0)
1016 {1017 if (next->prev != NULL)
1018 next->prev->next = next->next;
1019 else1020 qp->fragments = next->next;
1021
1022 if (tfp->next != NULL)
1023 next->next->prev = next->prev;
1024
1025 kfree_skb(next->skb,FREE_READ);
1026 kfree_s(next, sizeof(structipfrag));
1027 }1028 }1029
1030 /*1031 * Insert this fragment in the chain of fragments.1032 */1033
1034 tfp = NULL;
1035 tfp = ip_frag_create(offset, end, skb, ptr);
1036
1037 /*1038 * No memory to save the fragment - so throw the lot1039 */1040
1041 if (!tfp)
1042 {1043 skb->sk = NULL;
1044 kfree_skb(skb, FREE_READ);
1045 returnNULL;
1046 }1047 tfp->prev = prev;
1048 tfp->next = next;
1049 if (prev != NULL)
1050 prev->next = tfp;
1051 else1052 qp->fragments = tfp;
1053
1054 if (next != NULL)
1055 next->prev = tfp;
1056
1057 /*1058 * OK, so we inserted this new fragment into the chain.1059 * Check if we now have a full IP datagram which we can1060 * bump up to the IP layer...1061 */1062
1063 if (ip_done(qp))
1064 {1065 skb2 = ip_glue(qp); /* glue together the fragments */1066 return(skb2);
1067 }1068 return(NULL);
1069 }1070
1071
1072 /*1073 * This IP datagram is too large to be sent in one piece. Break it up into1074 * smaller pieces (each of size equal to the MAC header plus IP header plus1075 * a block of the data of the original IP data part) that will yet fit in a1076 * single device frame, and queue such a frame for sending by calling the1077 * ip_queue_xmit(). Note that this is recursion, and bad things will happen1078 * if this function causes a loop...1079 *1080 * Yes this is inefficient, feel free to submit a quicker one.1081 *1082 * **Protocol Violation**1083 * We copy all the options to each fragment. !FIXME!1084 */1085 voidip_fragment(structsock *sk, structsk_buff *skb, structdevice *dev, intis_frag)
/* */1086 {1087 structiphdr *iph;
1088 unsignedchar *raw;
1089 unsignedchar *ptr;
1090 structsk_buff *skb2;
1091 intleft, mtu, hlen, len;
1092 intoffset;
1093 unsignedlongflags;
1094
1095 /*1096 * Point into the IP datagram header.1097 */1098
1099 raw = skb->data;
1100 iph = (structiphdr *) (raw + dev->hard_header_len);
1101
1102 skb->ip_hdr = iph;
1103
1104 /*1105 * Setup starting values.1106 */1107
1108 hlen = (iph->ihl * sizeof(unsignedlong));
1109 left = ntohs(iph->tot_len) - hlen; /* Space per frame */1110 hlen += dev->hard_header_len; /* Total header size */1111 mtu = (dev->mtu - hlen); /* Size of data space */1112 ptr = (raw + hlen); /* Where to start from */1113
1114 /*1115 * Check for any "DF" flag. [DF means do not fragment]1116 */1117
1118 if (ntohs(iph->frag_off) & IP_DF)
1119 {1120 ip_statistics.IpFragFails++;
1121 icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev);
1122 return;
1123 }1124
1125 /*1126 * The protocol doesn't seem to say what to do in the case that the1127 * frame + options doesn't fit the mtu. As it used to fall down dead1128 * in this case we were fortunate it didn't happen1129 */1130
1131 if(mtu<8)
1132 {1133 /* It's wrong but its better than nothing */1134 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev);
1135 ip_statistics.IpFragFails++;
1136 return;
1137 }1138
1139 /*1140 * Fragment the datagram.1141 */1142
1143 /*1144 * The initial offset is 0 for a complete frame. When1145 * fragmenting fragments its wherever this one starts.1146 */1147
1148 if (is_frag & 2)
1149 offset = (ntohs(iph->frag_off) & 0x1fff) << 3;
1150 else1151 offset = 0;
1152
1153
1154 /*1155 * Keep copying data until we run out.1156 */1157
1158 while(left > 0)
1159 {1160 len = left;
1161 /* IF: it doesn't fit, use 'mtu' - the data space left */1162 if (len > mtu)
1163 len = mtu;
1164 /* IF: we are not sending upto and including the packet end1165 then align the next start on an eight byte boundary */1166 if (len < left)
1167 {1168 len/=8;
1169 len*=8;
1170 }1171 /*1172 * Allocate buffer.1173 */1174
1175 if ((skb2 = alloc_skb(len + hlen,GFP_ATOMIC)) == NULL)
1176 {1177 printk("IP: frag: no memory for new fragment!\n");
1178 ip_statistics.IpFragFails++;
1179 return;
1180 }1181
1182 /*1183 * Set up data on packet1184 */1185
1186 skb2->arp = skb->arp;
1187 if(skb->free==0)
1188 printk("IP fragmenter: BUG free!=1 in fragmenter\n");
1189 skb2->free = 1;
1190 skb2->len = len + hlen;
1191 skb2->h.raw=(char *) skb2->data;
1192 /*1193 * Charge the memory for the fragment to any owner1194 * it might possess1195 */1196
1197 save_flags(flags);
1198 if (sk)
1199 {1200 cli();
1201 sk->wmem_alloc += skb2->mem_len;
1202 skb2->sk=sk;
1203 }1204 restore_flags(flags);
1205 skb2->raddr = skb->raddr; /* For rebuild_header - must be here */1206
1207 /*1208 * Copy the packet header into the new buffer.1209 */1210
1211 memcpy(skb2->h.raw, raw, hlen);
1212
1213 /*1214 * Copy a block of the IP datagram.1215 */1216 memcpy(skb2->h.raw + hlen, ptr, len);
1217 left -= len;
1218
1219 skb2->h.raw+=dev->hard_header_len;
1220
1221 /*1222 * Fill in the new header fields.1223 */1224 iph = (structiphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
1225 iph->frag_off = htons((offset >> 3));
1226 /*1227 * Added AC : If we are fragmenting a fragment thats not the1228 * last fragment then keep MF on each bit1229 */1230 if (left > 0 || (is_frag & 1))
1231 iph->frag_off |= htons(IP_MF);
1232 ptr += len;
1233 offset += len;
1234
1235 /*1236 * Put this fragment into the sending queue.1237 */1238
1239 ip_statistics.IpFragCreates++;
1240
1241 ip_queue_xmit(sk, dev, skb2, 2);
1242 }1243 ip_statistics.IpFragOKs++;
1244 }1245
1246
1247
1248 #ifdefCONFIG_IP_FORWARD1249
1250 /*1251 * Forward an IP datagram to its next destination.1252 */1253
1254 staticvoidip_forward(structsk_buff *skb, structdevice *dev, intis_frag)
/* */1255 {1256 structdevice *dev2; /* Output device */1257 structiphdr *iph; /* Our header */1258 structsk_buff *skb2; /* Output packet */1259 structrtable *rt; /* Route we use */1260 unsignedchar *ptr; /* Data pointer */1261 unsignedlongraddr; /* Router IP address */1262
1263 /*1264 * According to the RFC, we must first decrease the TTL field. If1265 * that reaches zero, we must reply an ICMP control message telling1266 * that the packet's lifetime expired.1267 *1268 * Exception:1269 * We may not generate an ICMP for an ICMP. icmp_send does the1270 * enforcement of this so we can forget it here. It is however1271 * sometimes VERY important.1272 */1273
1274 iph = skb->h.iph;
1275 iph->ttl--;
1276 if (iph->ttl <= 0)
1277 {1278 /* Tell the sender its packet died... */1279 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, dev);
1280 return;
1281 }1282
1283 /*1284 * Re-compute the IP header checksum.1285 * This is inefficient. We know what has happened to the header1286 * and could thus adjust the checksum as Phil Karn does in KA9Q1287 */1288
1289 ip_send_check(iph);
1290
1291 /*1292 * OK, the packet is still valid. Fetch its destination address,1293 * and give it to the IP sender for further processing.1294 */1295
1296 rt = ip_rt_route(iph->daddr, NULL, NULL);
1297 if (rt == NULL)
1298 {1299 /*1300 * Tell the sender its packet cannot be delivered. Again1301 * ICMP is screened later.1302 */1303 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, dev);
1304 return;
1305 }1306
1307
1308 /*1309 * Gosh. Not only is the packet valid; we even know how to1310 * forward it onto its final destination. Can we say this1311 * is being plain lucky?1312 * If the router told us that there is no GW, use the dest.1313 * IP address itself- we seem to be connected directly...1314 */1315
1316 raddr = rt->rt_gateway;
1317
1318 if (raddr != 0)
1319 {1320 /*1321 * There is a gateway so find the correct route for it.1322 * Gateways cannot in turn be gatewayed.1323 */1324 rt = ip_rt_route(raddr, NULL, NULL);
1325 if (rt == NULL)
1326 {1327 /*1328 * Tell the sender its packet cannot be delivered...1329 */1330 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, dev);
1331 return;
1332 }1333 if (rt->rt_gateway != 0)
1334 raddr = rt->rt_gateway;
1335 }1336 else1337 raddr = iph->daddr;
1338
1339 /*1340 * Having picked a route we can now send the frame out.1341 */1342
1343 dev2 = rt->rt_dev;
1344
1345 /*1346 * In IP you never forward a frame on the interface that it arrived1347 * upon. We should generate an ICMP HOST REDIRECT giving the route1348 * we calculated.1349 * For now just dropping the packet is an acceptable compromise.1350 */1351
1352 if (dev == dev2)
1353 return;
1354
1355 /*1356 * We now allocate a new buffer, and copy the datagram into it.1357 * If the indicated interface is up and running, kick it.1358 */1359
1360 if (dev2->flags & IFF_UP)
1361 {1362
1363 /*1364 * Current design decrees we copy the packet. For identical header1365 * lengths we could avoid it. The new skb code will let us push1366 * data so the problem goes away then.1367 */1368
1369 skb2 = alloc_skb(dev2->hard_header_len + skb->len, GFP_ATOMIC);
1370 /*1371 * This is rare and since IP is tolerant of network failures1372 * quite harmless.1373 */1374 if (skb2 == NULL)
1375 {1376 printk("\nIP: No memory available for IP forward\n");
1377 return;
1378 }1379 ptr = skb2->data;
1380 skb2->free = 1;
1381 skb2->len = skb->len + dev2->hard_header_len;
1382 skb2->h.raw = ptr;
1383
1384 /*1385 * Copy the packet data into the new buffer.1386 */1387 memcpy(ptr + dev2->hard_header_len, skb->h.raw, skb->len);
1388
1389 /* Now build the MAC header. */1390 (void) ip_send(skb2, raddr, skb->len, dev2, dev2->pa_addr);
1391
1392 ip_statistics.IpForwDatagrams++;
1393
1394 /*1395 * See if it needs fragmenting. Note in ip_rcv we tagged1396 * the fragment type. This must be right so that1397 * the fragmenter does the right thing.1398 */1399
1400 if(skb2->len > dev2->mtu + dev2->hard_header_len)
1401 {1402 ip_fragment(NULL,skb2,dev2, is_frag);
1403 kfree_skb(skb2,FREE_WRITE);
1404 }1405 else1406 {1407 /*1408 * Map service types to priority. We lie about1409 * throughput being low priority, but its a good1410 * choice to help improve general usage.1411 */1412 if(iph->tos & IPTOS_LOWDELAY)
1413 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
1414 elseif(iph->tos & IPTOS_THROUGHPUT)
1415 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
1416 else1417 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
1418 }1419 }1420 }1421
1422
1423 #endif1424
1425 /*1426 * This function receives all incoming IP datagrams.1427 */1428
1429 intip_rcv(structsk_buff *skb, structdevice *dev, structpacket_type *pt)
/* */1430 {1431 structiphdr *iph = skb->h.iph;
1432 unsignedcharhash;
1433 unsignedcharflag = 0;
1434 unsignedcharopts_p = 0; /* Set iff the packet has options. */1435 structinet_protocol *ipprot;
1436 staticstructoptionsopt; /* since we don't use these yet, and they1437 take up stack space. */1438 intbrd=IS_MYADDR;
1439 intis_frag=0;
1440
1441
1442 ip_statistics.IpInReceives++;
1443
1444 /*1445 * Tag the ip header of this packet so we can find it1446 */1447
1448 skb->ip_hdr = iph;
1449
1450 /*1451 * Is the datagram acceptable?1452 *1453 * 1. Length at least the size of an ip header1454 * 2. Version of 41455 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums]1456 * (4. We ought to check for IP multicast addresses and undefined types.. does this matter ?)1457 */1458
1459 if (skb->len<sizeof(structiphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsignedchar *)iph, iph->ihl) !=0)
1460 {1461 ip_statistics.IpInHdrErrors++;
1462 kfree_skb(skb, FREE_WRITE);
1463 return(0);
1464 }1465
1466 /*1467 * Our transport medium may have padded the buffer out. Now we know it1468 * is IP we can trim to the true length of the frame.1469 */1470
1471 skb->len=ntohs(iph->tot_len);
1472
1473 /*1474 * Next analyse the packet for options. Studies show under one packet in1475 * a thousand have options....1476 */1477
1478 if (iph->ihl != 5)
1479 {/* Fast path for the typical optionless IP packet. */1480 memset((char *) &opt, 0, sizeof(opt));
1481 if (do_options(iph, &opt) != 0)
1482 return 0;
1483 opts_p = 1;
1484 }1485
1486 /*1487 * Remember if the frame is fragmented.1488 */1489
1490 if (iph->frag_off & 0x0020)
1491 is_frag|=1;
1492
1493 /*1494 * Last fragment ?1495 */1496
1497 if (ntohs(iph->frag_off) & 0x1fff)
1498 is_frag|=2;
1499
1500 /*1501 * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday.1502 *1503 * This is inefficient. While finding out if it is for us we could also compute1504 * the routing table entry. This is where the great unified cache theory comes1505 * in as and when someone implements it1506 *1507 * For most hosts over 99% of packets match the first conditional1508 * and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at1509 * function entry.1510 */1511
1512 if ( iph->daddr != skb->dev->pa_addr && (brd = ip_chk_addr(iph->daddr)) == 0)
1513 {1514 /*1515 * Don't forward multicast or broadcast frames.1516 */1517
1518 if(skb->pkt_type!=PACKET_HOST)
1519 {1520 kfree_skb(skb,FREE_WRITE);
1521 return 0;
1522 }1523
1524 /*1525 * The packet is for another target. Forward the frame1526 */1527
1528 #ifdefCONFIG_IP_FORWARD1529 ip_forward(skb, dev, is_frag);
1530 #else1531 /* printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n",1532 iph->saddr,iph->daddr);*/1533 ip_statistics.IpInAddrErrors++;
1534 #endif1535 /*1536 * The forwarder is inefficient and copies the packet. We1537 * free the original now.1538 */1539
1540 kfree_skb(skb, FREE_WRITE);
1541 return(0);
1542 }1543
1544 /*1545 * Reassemble IP fragments.1546 */1547
1548 if(is_frag)
1549 {1550 /* Defragment. Obtain the complete packet if there is one */1551 skb=ip_defrag(iph,skb,dev);
1552 if(skb==NULL)
1553 return 0;
1554 iph=skb->h.iph;
1555 }1556
1557 /*1558 * Point into the IP datagram, just past the header.1559 */1560
1561 skb->ip_hdr = iph;
1562 skb->h.raw += iph->ihl*4;
1563
1564 /*1565 * skb->h.raw now points at the protocol beyond the IP header.1566 */1567
1568 hash = iph->protocol & (MAX_INET_PROTOS -1);
1569 for (ipprot = (structinet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(structinet_protocol *)ipprot->next)
1570 {1571 structsk_buff *skb2;
1572
1573 if (ipprot->protocol != iph->protocol)
1574 continue;
1575 /*1576 * See if we need to make a copy of it. This will1577 * only be set if more than one protocol wants it.1578 * and then not for the last one.1579 *1580 * This is an artifact of poor upper protocol design.1581 * Because the upper protocols damage the actual packet1582 * we must do copying. In actual fact it's even worse1583 * than this as TCP may hold on to the buffer.1584 */1585 if (ipprot->copy)
1586 {1587 skb2 = skb_clone(skb, GFP_ATOMIC);
1588 if(skb2==NULL)
1589 continue;
1590 }1591 else1592 {1593 skb2 = skb;
1594 }1595 flag = 1;
1596
1597 /*1598 * Pass on the datagram to each protocol that wants it,1599 * based on the datagram protocol. We should really1600 * check the protocol handler's return values here...1601 */1602 ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr,
1603 (ntohs(iph->tot_len) - (iph->ihl * 4)),
1604 iph->saddr, 0, ipprot);
1605
1606 }1607
1608 /*1609 * All protocols checked.1610 * If this packet was a broadcast, we may *not* reply to it, since that1611 * causes (proven, grin) ARP storms and a leakage of memory (i.e. all1612 * ICMP reply messages get queued up for transmission...)1613 */1614
1615 if (!flag)
1616 {1617 if (brd != IS_BROADCAST)
1618 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
1619 kfree_skb(skb, FREE_WRITE);
1620 }1621
1622 return(0);
1623 }1624
1625
1626 /*1627 * Queues a packet to be sent, and starts the transmitter1628 * if necessary. if free = 1 then we free the block after1629 * transmit, otherwise we don't. If free==2 we not only1630 * free the block but also don't assign a new ip seq number.1631 * This routine also needs to put in the total length,1632 * and compute the checksum1633 */1634
1635 voidip_queue_xmit(structsock *sk, structdevice *dev,
/* */1636 structsk_buff *skb, intfree)
1637 {1638 structiphdr *iph;
1639 unsignedchar *ptr;
1640
1641 /* Sanity check */1642 if (dev == NULL)
1643 {1644 printk("IP: ip_queue_xmit dev = NULL\n");
1645 return;
1646 }1647
1648 IS_SKB(skb);
1649
1650 /*1651 * Do some book-keeping in the packet for later1652 */1653
1654
1655 skb->dev = dev;
1656 skb->when = jiffies;
1657
1658 /*1659 * Find the IP header and set the length. This is bad1660 * but once we get the skb data handling code in the1661 * hardware will push its header sensibly and we will1662 * set skb->ip_hdr to avoid this mess and the fixed1663 * header length problem1664 */1665
1666 ptr = skb->data;
1667 ptr += dev->hard_header_len;
1668 iph = (structiphdr *)ptr;
1669 skb->ip_hdr = iph;
1670 iph->tot_len = ntohs(skb->len-dev->hard_header_len);
1671
1672 /*1673 * No reassigning numbers to fragments...1674 */1675
1676 if(free!=2)
1677 iph->id = htons(ip_id_count++);
1678 else1679 free=1;
1680
1681 /* All buffers without an owner socket get freed */1682 if (sk == NULL)
1683 free = 1;
1684
1685 skb->free = free;
1686
1687 /*1688 * Do we need to fragment. Again this is inefficient.1689 * We need to somehow lock the original buffer and use1690 * bits of it.1691 */1692
1693 if(skb->len > dev->mtu + dev->hard_header_len)
1694 {1695 ip_fragment(sk,skb,dev,0);
1696 IS_SKB(skb);
1697 kfree_skb(skb,FREE_WRITE);
1698 return;
1699 }1700
1701 /*1702 * Add an IP checksum1703 */1704
1705 ip_send_check(iph);
1706
1707 /*1708 * Print the frame when debugging1709 */1710
1711 /*1712 * More debugging. You cannot queue a packet already on a list1713 * Spot this and moan loudly.1714 */1715 if (skb->next != NULL)
1716 {1717 printk("ip_queue_xmit: next != NULL\n");
1718 skb_unlink(skb);
1719 }1720
1721 /*1722 * If a sender wishes the packet to remain unfreed1723 * we add it to his send queue. This arguably belongs1724 * in the TCP level since nobody else uses it. BUT1725 * remember IPng might change all the rules.1726 */1727
1728 if (!free)
1729 {1730 unsignedlongflags;
1731 /* The socket now has more outstanding blocks */1732
1733 sk->packets_out++;
1734
1735 /* Protect the list for a moment */1736 save_flags(flags);
1737 cli();
1738
1739 if (skb->link3 != NULL)
1740 {1741 printk("ip.c: link3 != NULL\n");
1742 skb->link3 = NULL;
1743 }1744 if (sk->send_head == NULL)
1745 {1746 sk->send_tail = skb;
1747 sk->send_head = skb;
1748 }1749 else1750 {1751 sk->send_tail->link3 = skb;
1752 sk->send_tail = skb;
1753 }1754 /* skb->link3 is NULL */1755
1756 /* Interrupt restore */1757 restore_flags(flags);
1758 /* Set the IP write timeout to the round trip time for the packet.1759 If an acknowledge has not arrived by then we may wish to act */1760 reset_timer(sk, TIME_WRITE, sk->rto);
1761 }1762 else1763 /* Remember who owns the buffer */1764 skb->sk = sk;
1765
1766 /*1767 * If the indicated interface is up and running, send the packet.1768 */1769 ip_statistics.IpOutRequests++;
1770
1771 if (dev->flags & IFF_UP)
1772 {1773 /*1774 * If we have an owner use its priority setting,1775 * otherwise use NORMAL1776 */1777
1778 if (sk != NULL)
1779 {1780 dev_queue_xmit(skb, dev, sk->priority);
1781 }1782 else1783 {1784 dev_queue_xmit(skb, dev, SOPRI_NORMAL);
1785 }1786 }1787 else1788 {1789 ip_statistics.IpOutDiscards++;
1790 if (free)
1791 kfree_skb(skb, FREE_WRITE);
1792 }1793 }1794
1795
1796 /*1797 * A socket has timed out on its send queue and wants to do a1798 * little retransmitting. Currently this means TCP.1799 */1800
1801 voidip_do_retransmit(structsock *sk, intall)
/* */1802 {1803 structsk_buff * skb;
1804 structproto *prot;
1805 structdevice *dev;
1806 intretransmits;
1807
1808 prot = sk->prot;
1809 skb = sk->send_head;
1810 retransmits = sk->retransmits;
1811
1812 while (skb != NULL)
1813 {1814 dev = skb->dev;
1815 IS_SKB(skb);
1816 skb->when = jiffies;
1817
1818 /*1819 * In general it's OK just to use the old packet. However we1820 * need to use the current ack and window fields. Urg and1821 * urg_ptr could possibly stand to be updated as well, but we1822 * don't keep the necessary data. That shouldn't be a problem,1823 * if the other end is doing the right thing. Since we're1824 * changing the packet, we have to issue a new IP identifier.1825 */1826
1827 /* this check may be unnecessary - retransmit only for TCP */1828 if (sk->protocol == IPPROTO_TCP) {1829 structtcphdr *th;
1830 structiphdr *iph;
1831 intsize;
1832
1833 iph = (structiphdr *)(skb->data + dev->hard_header_len);
1834 th = (structtcphdr *)(((char *)iph) + (iph->ihl << 2));
1835 size = skb->len - (((unsignedchar *) th) - skb->data);
1836
1837 iph->id = htons(ip_id_count++);
1838 ip_send_check(iph);
1839
1840 th->ack_seq = ntohl(sk->acked_seq);
1841 th->window = ntohs(tcp_select_window(sk));
1842 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
1843 }1844
1845 /*1846 * If the interface is (still) up and running, kick it.1847 */1848
1849 if (dev->flags & IFF_UP)
1850 {1851 /*1852 * If the packet is still being sent by the device/protocol1853 * below then don't retransmit. This is both needed, and good -1854 * especially with connected mode AX.25 where it stops resends1855 * occuring of an as yet unsent anyway frame!1856 * We still add up the counts as the round trip time wants1857 * adjusting.1858 */1859 if (sk && !skb_device_locked(skb))
1860 {1861 /* Remove it from any existing driver queue first! */1862 skb_unlink(skb);
1863 /* Now queue it */1864 ip_statistics.IpOutRequests++;
1865 dev_queue_xmit(skb, dev, sk->priority);
1866 }1867 }1868
1869 /*1870 * Count retransmissions1871 */1872 retransmits++;
1873 sk->prot->retransmits ++;
1874
1875 /*1876 * Only one retransmit requested.1877 */1878 if (!all)
1879 break;
1880
1881 /*1882 * This should cut it off before we send too many packets.1883 */1884 if (sk->retransmits > sk->cong_window)
1885 break;
1886 skb = skb->link3;
1887 }1888 }1889
1890 /*1891 * This is the normal code called for timeouts. It does the retransmission1892 * and then does backoff. ip_do_retransmit is separated out because1893 * tcp_ack needs to send stuff from the retransmit queue without1894 * initiating a backoff.1895 */1896
1897 voidip_retransmit(structsock *sk, intall)
/* */1898 {1899 ip_do_retransmit(sk, all);
1900
1901 /*1902 * Increase the timeout each time we retransmit. Note that1903 * we do not increase the rtt estimate. rto is initialized1904 * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests1905 * that doubling rto each time is the least we can get away with.1906 * In KA9Q, Karn uses this for the first few times, and then1907 * goes to quadratic. netBSD doubles, but only goes up to *64,1908 * and clamps at 1 to 64 sec afterwards. Note that 120 sec is1909 * defined in the protocol as the maximum possible RTT. I guess1910 * we'll have to use something other than TCP to talk to the1911 * University of Mars.1912 */1913
1914 sk->retransmits++;
1915 sk->backoff++;
1916 sk->rto = min(sk->rto << 1, 120*HZ);
1917 reset_timer(sk, TIME_WRITE, sk->rto);
1918 }1919
1920 /*1921 * Socket option code for IP. This is the end of the line after any TCP,UDP etc options on1922 * an IP socket.1923 *1924 * We implement IP_TOS (type of service), IP_TTL (time to live).1925 *1926 * Next release we will sort out IP_OPTIONS since for some people are kind of important.1927 */1928
1929 intip_setsockopt(structsock *sk, intlevel, intoptname, char *optval, intoptlen)
/* */1930 {1931 intval,err;
1932
1933 if (optval == NULL)
1934 return(-EINVAL);
1935
1936 err=verify_area(VERIFY_READ, optval, sizeof(int));
1937 if(err)
1938 returnerr;
1939
1940 val = get_fs_long((unsignedlong *)optval);
1941
1942 if(level!=SOL_IP)
1943 return -EOPNOTSUPP;
1944
1945 switch(optname)
1946 {1947 caseIP_TOS:
1948 if(val<0||val>255)
1949 return -EINVAL;
1950 sk->ip_tos=val;
1951 if(val==IPTOS_LOWDELAY)
1952 sk->priority=SOPRI_INTERACTIVE;
1953 if(val==IPTOS_THROUGHPUT)
1954 sk->priority=SOPRI_BACKGROUND;
1955 return 0;
1956 caseIP_TTL:
1957 if(val<1||val>255)
1958 return -EINVAL;
1959 sk->ip_ttl=val;
1960 return 0;
1961 /* IP_OPTIONS and friends go here eventually */1962 default:
1963 return(-ENOPROTOOPT);
1964 }1965 }1966
1967 /*1968 * Get the options. Note for future reference. The GET of IP options gets the1969 * _received_ ones. The set sets the _sent_ ones.1970 */1971
1972 intip_getsockopt(structsock *sk, intlevel, intoptname, char *optval, int *optlen)
/* */1973 {1974 intval,err;
1975
1976 if(level!=SOL_IP)
1977 return -EOPNOTSUPP;
1978
1979 switch(optname)
1980 {1981 caseIP_TOS:
1982 val=sk->ip_tos;
1983 break;
1984 caseIP_TTL:
1985 val=sk->ip_ttl;
1986 break;
1987 default:
1988 return(-ENOPROTOOPT);
1989 }1990 err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
1991 if(err)
1992 returnerr;
1993 put_fs_long(sizeof(int),(unsignedlong *) optlen);
1994
1995 err=verify_area(VERIFY_WRITE, optval, sizeof(int));
1996 if(err)
1997 returnerr;
1998 put_fs_long(val,(unsignedlong *)optval);
1999
2000 return(0);
2001 }2002
2003 /*2004 * IP protocol layer initialiser2005 */2006
2007 staticstructpacket_typeip_packet_type =
2008 {2009 0, /* MUTTER ntohs(ETH_P_IP),*/2010 0, /* copy */2011 ip_rcv,
2012 NULL,
2013 NULL,
2014 };
2015
2016
2017 /*2018 * IP registers the packet type and then calls the subprotocol initialisers2019 */2020
2021 voidip_init(void)
/* */2022 {2023 ip_packet_type.type=htons(ETH_P_IP);
2024 dev_add_pack(&ip_packet_type);
2025 /* ip_raw_init();2026 ip_packet_init();2027 ip_tcp_init();2028 ip_udp_init();*/2029 }