1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * The Internet Protocol (IP) module. 7 * 8 * Version: @(#)ip.c 1.0.16b 9/1/93 9 * 10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu> 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Donald Becker, <becker@super.org> 13 * Alan Cox, <gw4pts@gw4pts.ampr.org> 14 * 15 * Fixes: 16 * Alan Cox : Commented a couple of minor bits of surplus code 17 * Alan Cox : Undefining IP_FORWARD doesn't include the code 18 * (just stops a compiler warning). 19 * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes 20 * are junked rather than corrupting things. 21 * Alan Cox : Frames to bad broadcast subnets are dumped 22 * We used to process them non broadcast and 23 * boy could that cause havoc. 24 * Alan Cox : ip_forward sets the free flag on the 25 * new frame it queues. Still crap because 26 * it copies the frame but at least it 27 * doesn't eat memory too. 28 * Alan Cox : Generic queue code and memory fixes. 29 * Fred Van Kempen : IP fragment support (borrowed from NET2E) 30 * Gerhard Koerting: Forward fragmented frames correctly. 31 * Gerhard Koerting: Fixes to my fix of the above 8-). 32 * Gerhard Koerting: IP interface addressing fix. 33 * Linus Torvalds : More robustness checks 34 * Alan Cox : Even more checks: Still not as robust as it ought to be 35 * Alan Cox : Save IP header pointer for later 36 * Alan Cox : ip option setting 37 * Alan Cox : Use ip_tos/ip_ttl settings 38 * Alan Cox : Fragmentation bogosity removed 39 * (Thanks to Mark.Bush@prg.ox.ac.uk) 40 * Dmitry Gorodchanin : Send of a raw packet crash fix. 41 * Alan Cox : Silly ip bug when an overlength 42 * fragment turns up. Now frees the 43 * queue. 44 * Linus Torvalds/ : Memory leakage on fragmentation 45 * Alan Cox : handling. 46 * Gerhard Koerting: Forwarding uses IP priority hints 47 * Teemu Rantanen : Fragment problems. 48 * Alan Cox : General cleanup, comments and reformat 49 * Alan Cox : SNMP statistics 50 * Alan Cox : BSD address rule semantics. Also see 51 * UDP as there is a nasty checksum issue 52 * if you do things the wrong way. 53 * Alan Cox : Always defrag, moved IP_FORWARD to the config.in file 54 * Alan Cox : IP options adjust sk->priority. 55 * Pedro Roque : Fix mtu/length error in ip_forward. 56 * Alan Cox : Avoid ip_chk_addr when possible. 57 * 58 * To Fix: 59 * IP option processing is mostly not needed. ip_forward needs to know about routing rules 60 * and time stamp but that's about all. Use the route mtu field here too 61 * 62 * This program is free software; you can redistribute it and/or 63 * modify it under the terms of the GNU General Public License 64 * as published by the Free Software Foundation; either version 65 * 2 of the License, or (at your option) any later version. 66 */ 67 #include <asm/segment.h>
68 #include <asm/system.h>
69 #include <linux/types.h>
70 #include <linux/kernel.h>
71 #include <linux/sched.h>
72 #include <linux/string.h>
73 #include <linux/errno.h>
74 #include <linux/socket.h>
75 #include <linux/sockios.h>
76 #include <linux/in.h>
77 #include <linux/inet.h>
78 #include <linux/netdevice.h>
79 #include <linux/etherdevice.h>
80 #include "snmp.h"
81 #include "ip.h"
82 #include "protocol.h"
83 #include "route.h"
84 #include "tcp.h"
85 #include <linux/skbuff.h>
86 #include "sock.h"
87 #include "arp.h"
88 #include "icmp.h"
89
90 #define CONFIG_IP_DEFRAG
91
92 externintlast_retran;
93 externvoid sort_send(structsock *sk);
94
95 #definemin(a,b) ((a)<(b)?(a):(b))
96 #defineLOOPBACK(x) (((x) & htonl(0xff000000)) == htonl(0x7f000000))
97
98 /* 99 * SNMP management statistics 100 */ 101
102 structip_mibip_statistics={1,64,}; /* Forwarding=Yes, Default TTL=64 */ 103
104 /* 105 * Handle the issuing of an ioctl() request 106 * for the ip device. This is scheduled to 107 * disappear 108 */ 109
110 intip_ioctl(structsock *sk, intcmd, unsignedlongarg)
/* */ 111 { 112 switch(cmd)
113 { 114 default:
115 return(-EINVAL);
116 } 117 } 118
119
120 /* these two routines will do routing. */ 121
122 staticvoid 123 strict_route(structiphdr *iph, structoptions *opt)
/* */ 124 { 125 } 126
127
128 staticvoid 129 loose_route(structiphdr *iph, structoptions *opt)
/* */ 130 { 131 } 132
133
134
135
136 /* This routine will check to see if we have lost a gateway. */ 137 void 138 ip_route_check(unsignedlongdaddr)
/* */ 139 { 140 } 141
142
143 #if 0
144 /* this routine puts the options at the end of an ip header. */ 145 staticint 146 build_options(structiphdr *iph, structoptions *opt)
/* */ 147 { 148 unsignedchar *ptr;
149 /* currently we don't support any options. */ 150 ptr = (unsignedchar *)(iph+1);
151 *ptr = 0;
152 return (4);
153 } 154 #endif 155
156
157 /* 158 * Take an skb, and fill in the MAC header. 159 */ 160
161 staticintip_send(structsk_buff *skb, unsignedlongdaddr, intlen, structdevice *dev, unsignedlongsaddr)
/* */ 162 { 163 intmac = 0;
164
165 skb->dev = dev;
166 skb->arp = 1;
167 if (dev->hard_header)
168 { 169 /* 170 * Build a hardware header. Source address is our mac, destination unknown 171 * (rebuild header will sort this out) 172 */ 173 mac = dev->hard_header(skb->data, dev, ETH_P_IP, NULL, NULL, len, skb);
174 if (mac < 0)
175 { 176 mac = -mac;
177 skb->arp = 0;
178 skb->raddr = daddr; /* next routing address */ 179 } 180 } 181 returnmac;
182 } 183
184 intip_id_count = 0;
185
186 /* 187 * This routine builds the appropriate hardware/IP headers for 188 * the routine. It assumes that if *dev != NULL then the 189 * protocol knows what it's doing, otherwise it uses the 190 * routing/ARP tables to select a device struct. 191 */ 192 intip_build_header(structsk_buff *skb, unsignedlongsaddr, unsignedlongdaddr,
/* */ 193 structdevice **dev, inttype, structoptions *opt, intlen, inttos, intttl)
194 { 195 staticstructoptionsoptmem;
196 structiphdr *iph;
197 structrtable *rt;
198 unsignedchar *buff;
199 unsignedlongraddr;
200 inttmp;
201 unsignedlongsrc;
202
203 buff = skb->data;
204
205 /* 206 * See if we need to look up the device. 207 */ 208
209 if (*dev == NULL)
210 { 211 if(skb->localroute)
212 rt = ip_rt_local(daddr, &optmem, &src);
213 else 214 rt = ip_rt_route(daddr, &optmem, &src);
215 if (rt == NULL)
216 { 217 ip_statistics.IpOutNoRoutes++;
218 return(-ENETUNREACH);
219 } 220
221 *dev = rt->rt_dev;
222 /* 223 * If the frame is from us and going off machine it MUST MUST MUST 224 * have the output device ip address and never the loopback 225 */ 226 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
227 saddr = src;/*rt->rt_dev->pa_addr;*/ 228 raddr = rt->rt_gateway;
229
230 opt = &optmem;
231 } 232 else 233 { 234 /* 235 * We still need the address of the first hop. 236 */ 237 if(skb->localroute)
238 rt = ip_rt_local(daddr, &optmem, &src);
239 else 240 rt = ip_rt_route(daddr, &optmem, &src);
241 /* 242 * If the frame is from us and going off machine it MUST MUST MUST 243 * have the output device ip address and never the loopback 244 */ 245 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
246 saddr = src;/*rt->rt_dev->pa_addr;*/ 247
248 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
249 } 250
251 /* 252 * No source addr so make it our addr 253 */ 254 if (saddr == 0)
255 saddr = src;
256
257 /* 258 * No gateway so aim at the real destination 259 */ 260 if (raddr == 0)
261 raddr = daddr;
262
263 /* 264 * Now build the MAC header. 265 */ 266
267 tmp = ip_send(skb, raddr, len, *dev, saddr);
268 buff += tmp;
269 len -= tmp;
270
271 /* 272 * Book keeping 273 */ 274
275 skb->dev = *dev;
276 skb->saddr = saddr;
277 if (skb->sk)
278 skb->sk->saddr = saddr;
279
280 /* 281 * Now build the IP header. 282 */ 283
284 /* 285 * If we are using IPPROTO_RAW, then we don't need an IP header, since 286 * one is being supplied to us by the user 287 */ 288
289 if(type == IPPROTO_RAW)
290 return (tmp);
291
292 iph = (structiphdr *)buff;
293 iph->version = 4;
294 iph->tos = tos;
295 iph->frag_off = 0;
296 iph->ttl = ttl;
297 iph->daddr = daddr;
298 iph->saddr = saddr;
299 iph->protocol = type;
300 iph->ihl = 5;
301
302 /* Setup the IP options. */ 303 #ifdef Not_Yet_Avail
304 build_options(iph, opt);
305 #endif 306
307 return(20 + tmp); /* IP header plus MAC header size */ 308 } 309
310
311 staticint 312 do_options(structiphdr *iph, structoptions *opt)
/* */ 313 { 314 unsignedchar *buff;
315 intdone = 0;
316 inti, len = sizeof(structiphdr);
317
318 /* Zero out the options. */ 319 opt->record_route.route_size = 0;
320 opt->loose_route.route_size = 0;
321 opt->strict_route.route_size = 0;
322 opt->tstamp.ptr = 0;
323 opt->security = 0;
324 opt->compartment = 0;
325 opt->handling = 0;
326 opt->stream = 0;
327 opt->tcc = 0;
328 return(0);
329
330 /* Advance the pointer to start at the options. */ 331 buff = (unsignedchar *)(iph + 1);
332
333 /* Now start the processing. */ 334 while (!done && len < iph->ihl*4) switch(*buff) { 335 caseIPOPT_END:
336 done = 1;
337 break;
338 caseIPOPT_NOOP:
339 buff++;
340 len++;
341 break;
342 caseIPOPT_SEC:
343 buff++;
344 if (*buff != 11) return(1);
345 buff++;
346 opt->security = ntohs(*(unsignedshort *)buff);
347 buff += 2;
348 opt->compartment = ntohs(*(unsignedshort *)buff);
349 buff += 2;
350 opt->handling = ntohs(*(unsignedshort *)buff);
351 buff += 2;
352 opt->tcc = ((*buff) << 16) + ntohs(*(unsignedshort *)(buff+1));
353 buff += 3;
354 len += 11;
355 break;
356 caseIPOPT_LSRR:
357 buff++;
358 if ((*buff - 3)% 4 != 0) return(1);
359 len += *buff;
360 opt->loose_route.route_size = (*buff -3)/4;
361 buff++;
362 if (*buff % 4 != 0) return(1);
363 opt->loose_route.pointer = *buff/4 - 1;
364 buff++;
365 buff++;
366 for (i = 0; i < opt->loose_route.route_size; i++) { 367 if(i>=MAX_ROUTE)
368 return(1);
369 opt->loose_route.route[i] = *(unsignedlong *)buff;
370 buff += 4;
371 } 372 break;
373 caseIPOPT_SSRR:
374 buff++;
375 if ((*buff - 3)% 4 != 0) return(1);
376 len += *buff;
377 opt->strict_route.route_size = (*buff -3)/4;
378 buff++;
379 if (*buff % 4 != 0) return(1);
380 opt->strict_route.pointer = *buff/4 - 1;
381 buff++;
382 buff++;
383 for (i = 0; i < opt->strict_route.route_size; i++) { 384 if(i>=MAX_ROUTE)
385 return(1);
386 opt->strict_route.route[i] = *(unsignedlong *)buff;
387 buff += 4;
388 } 389 break;
390 caseIPOPT_RR:
391 buff++;
392 if ((*buff - 3)% 4 != 0) return(1);
393 len += *buff;
394 opt->record_route.route_size = (*buff -3)/4;
395 buff++;
396 if (*buff % 4 != 0) return(1);
397 opt->record_route.pointer = *buff/4 - 1;
398 buff++;
399 buff++;
400 for (i = 0; i < opt->record_route.route_size; i++) { 401 if(i>=MAX_ROUTE)
402 return 1;
403 opt->record_route.route[i] = *(unsignedlong *)buff;
404 buff += 4;
405 } 406 break;
407 caseIPOPT_SID:
408 len += 4;
409 buff +=2;
410 opt->stream = *(unsignedshort *)buff;
411 buff += 2;
412 break;
413 caseIPOPT_TIMESTAMP:
414 buff++;
415 len += *buff;
416 if (*buff % 4 != 0) return(1);
417 opt->tstamp.len = *buff / 4 - 1;
418 buff++;
419 if ((*buff - 1) % 4 != 0) return(1);
420 opt->tstamp.ptr = (*buff-1)/4;
421 buff++;
422 opt->tstamp.x.full_char = *buff;
423 buff++;
424 for (i = 0; i < opt->tstamp.len; i++) { 425 opt->tstamp.data[i] = *(unsignedlong *)buff;
426 buff += 4;
427 } 428 break;
429 default:
430 return(1);
431 } 432
433 if (opt->record_route.route_size == 0) { 434 if (opt->strict_route.route_size != 0) { 435 memcpy(&(opt->record_route), &(opt->strict_route),
436 sizeof(opt->record_route));
437 }elseif (opt->loose_route.route_size != 0) { 438 memcpy(&(opt->record_route), &(opt->loose_route),
439 sizeof(opt->record_route));
440 } 441 } 442
443 if (opt->strict_route.route_size != 0 &&
444 opt->strict_route.route_size != opt->strict_route.pointer) { 445 strict_route(iph, opt);
446 return(0);
447 } 448
449 if (opt->loose_route.route_size != 0 &&
450 opt->loose_route.route_size != opt->loose_route.pointer) { 451 loose_route(iph, opt);
452 return(0);
453 } 454
455 return(0);
456 } 457
458 /* 459 * This is a version of ip_compute_csum() optimized for IP headers, which 460 * always checksum on 4 octet boundaries. 461 */ 462
463 staticinlineunsignedshortip_fast_csum(unsignedchar * buff, intwlen)
/* */ 464 { 465 unsignedlongsum = 0;
466
467 if (wlen)
468 { 469 unsignedlongbogus;
470 __asm__("clc\n"
471 "1:\t"
472 "lodsl\n\t"
473 "adcl %3, %0\n\t"
474 "decl %2\n\t"
475 "jne 1b\n\t"
476 "adcl $0, %0\n\t"
477 "movl %0, %3\n\t"
478 "shrl $16, %3\n\t"
479 "addw %w3, %w0\n\t"
480 "adcw $0, %w0"
481 : "=r" (sum), "=S" (buff), "=r" (wlen), "=a" (bogus)
482 : "0" (sum), "1" (buff), "2" (wlen));
483 } 484 return (~sum) & 0xffff;
485 } 486
487 /* 488 * This routine does all the checksum computations that don't 489 * require anything special (like copying or special headers). 490 */ 491
492 unsignedshortip_compute_csum(unsignedchar * buff, intlen)
/* */ 493 { 494 unsignedlongsum = 0;
495
496 /* Do the first multiple of 4 bytes and convert to 16 bits. */ 497 if (len > 3)
498 { 499 __asm__("clc\n"
500 "1:\t"
501 "lodsl\n\t"
502 "adcl %%eax, %%ebx\n\t"
503 "loop 1b\n\t"
504 "adcl $0, %%ebx\n\t"
505 "movl %%ebx, %%eax\n\t"
506 "shrl $16, %%eax\n\t"
507 "addw %%ax, %%bx\n\t"
508 "adcw $0, %%bx"
509 : "=b" (sum) , "=S" (buff)
510 : "0" (sum), "c" (len >> 2) ,"1" (buff)
511 : "ax", "cx", "si", "bx" );
512 } 513 if (len & 2)
514 { 515 __asm__("lodsw\n\t"
516 "addw %%ax, %%bx\n\t"
517 "adcw $0, %%bx"
518 : "=b" (sum), "=S" (buff)
519 : "0" (sum), "1" (buff)
520 : "bx", "ax", "si");
521 } 522 if (len & 1)
523 { 524 __asm__("lodsb\n\t"
525 "movb $0, %%ah\n\t"
526 "addw %%ax, %%bx\n\t"
527 "adcw $0, %%bx"
528 : "=b" (sum), "=S" (buff)
529 : "0" (sum), "1" (buff)
530 : "bx", "ax", "si");
531 } 532 sum =~sum;
533 return(sum & 0xffff);
534 } 535
536 /* 537 * Check the header of an incoming IP datagram. This version is still used in slhc.c. 538 */ 539
540 intip_csum(structiphdr *iph)
/* */ 541 { 542 returnip_fast_csum((unsignedchar *)iph, iph->ihl);
543 } 544
545 /* 546 * Generate a checksum for an outgoing IP datagram. 547 */ 548
549 staticvoidip_send_check(structiphdr *iph)
/* */ 550 { 551 iph->check = 0;
552 iph->check = ip_fast_csum((unsignedchar *)iph, iph->ihl);
553 } 554
555 /************************ Fragment Handlers From NET2E not yet with tweaks to beat 4K **********************************/ 556
557
558 /* 559 * This fragment handler is a bit of a heap. On the other hand it works quite 560 * happily and handles things quite well. 561 */ 562
563 staticstructipq *ipqueue = NULL; /* IP fragment queue */ 564
565 /* 566 * Create a new fragment entry. 567 */ 568
569 staticstructipfrag *ip_frag_create(intoffset, intend, structsk_buff *skb, unsignedchar *ptr)
/* */ 570 { 571 structipfrag *fp;
572
573 fp = (structipfrag *) kmalloc(sizeof(structipfrag), GFP_ATOMIC);
574 if (fp == NULL)
575 { 576 printk("IP: frag_create: no memory left !\n");
577 return(NULL);
578 } 579 memset(fp, 0, sizeof(structipfrag));
580
581 /* Fill in the structure. */ 582 fp->offset = offset;
583 fp->end = end;
584 fp->len = end - offset;
585 fp->skb = skb;
586 fp->ptr = ptr;
587
588 return(fp);
589 } 590
591
592 /* 593 * Find the correct entry in the "incomplete datagrams" queue for 594 * this IP datagram, and return the queue entry address if found. 595 */ 596
597 staticstructipq *ip_find(structiphdr *iph)
/* */ 598 { 599 structipq *qp;
600 structipq *qplast;
601
602 cli();
603 qplast = NULL;
604 for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
605 { 606 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
607 iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
608 { 609 del_timer(&qp->timer); /* So it doesn't vanish on us. The timer will be reset anyway */ 610 sti();
611 return(qp);
612 } 613 } 614 sti();
615 return(NULL);
616 } 617
618
619 /* 620 * Remove an entry from the "incomplete datagrams" queue, either 621 * because we completed, reassembled and processed it, or because 622 * it timed out. 623 */ 624
625 staticvoidip_free(structipq *qp)
/* */ 626 { 627 structipfrag *fp;
628 structipfrag *xp;
629
630 /* 631 * Stop the timer for this entry. 632 */ 633
634 del_timer(&qp->timer);
635
636 /* Remove this entry from the "incomplete datagrams" queue. */ 637 cli();
638 if (qp->prev == NULL)
639 { 640 ipqueue = qp->next;
641 if (ipqueue != NULL)
642 ipqueue->prev = NULL;
643 } 644 else 645 { 646 qp->prev->next = qp->next;
647 if (qp->next != NULL)
648 qp->next->prev = qp->prev;
649 } 650
651 /* Release all fragment data. */ 652
653 fp = qp->fragments;
654 while (fp != NULL)
655 { 656 xp = fp->next;
657 IS_SKB(fp->skb);
658 kfree_skb(fp->skb,FREE_READ);
659 kfree_s(fp, sizeof(structipfrag));
660 fp = xp;
661 } 662
663 /* Release the MAC header. */ 664 kfree_s(qp->mac, qp->maclen);
665
666 /* Release the IP header. */ 667 kfree_s(qp->iph, qp->ihlen + 8);
668
669 /* Finally, release the queue descriptor itself. */ 670 kfree_s(qp, sizeof(structipq));
671 sti();
672 } 673
674
675 /* 676 * Oops- a fragment queue timed out. Kill it and send an ICMP reply. 677 */ 678
679 staticvoidip_expire(unsignedlongarg)
/* */ 680 { 681 structipq *qp;
682
683 qp = (structipq *)arg;
684
685 /* 686 * Send an ICMP "Fragment Reassembly Timeout" message. 687 */ 688
689 ip_statistics.IpReasmTimeout++;
690 ip_statistics.IpReasmFails++;
691 /* This if is always true... shrug */ 692 if(qp->fragments!=NULL)
693 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
694 ICMP_EXC_FRAGTIME, qp->dev);
695
696 /* 697 * Nuke the fragment queue. 698 */ 699 ip_free(qp);
700 } 701
702
703 /* 704 * Add an entry to the 'ipq' queue for a newly received IP datagram. 705 * We will (hopefully :-) receive all other fragments of this datagram 706 * in time, so we just create a queue for this datagram, in which we 707 * will insert the received fragments at their respective positions. 708 */ 709
710 staticstructipq *ip_create(structsk_buff *skb, structiphdr *iph, structdevice *dev)
/* */ 711 { 712 structipq *qp;
713 intmaclen;
714 intihlen;
715
716 qp = (structipq *) kmalloc(sizeof(structipq), GFP_ATOMIC);
717 if (qp == NULL)
718 { 719 printk("IP: create: no memory left !\n");
720 return(NULL);
721 skb->dev = qp->dev;
722 } 723 memset(qp, 0, sizeof(structipq));
724
725 /* 726 * Allocate memory for the MAC header. 727 * 728 * FIXME: We have a maximum MAC address size limit and define 729 * elsewhere. We should use it here and avoid the 3 kmalloc() calls 730 */ 731
732 maclen = ((unsignedlong) iph) - ((unsignedlong) skb->data);
733 qp->mac = (unsignedchar *) kmalloc(maclen, GFP_ATOMIC);
734 if (qp->mac == NULL)
735 { 736 printk("IP: create: no memory left !\n");
737 kfree_s(qp, sizeof(structipq));
738 return(NULL);
739 } 740
741 /* 742 * Allocate memory for the IP header (plus 8 octets for ICMP). 743 */ 744
745 ihlen = (iph->ihl * sizeof(unsignedlong));
746 qp->iph = (structiphdr *) kmalloc(ihlen + 8, GFP_ATOMIC);
747 if (qp->iph == NULL)
748 { 749 printk("IP: create: no memory left !\n");
750 kfree_s(qp->mac, maclen);
751 kfree_s(qp, sizeof(structipq));
752 return(NULL);
753 } 754
755 /* Fill in the structure. */ 756 memcpy(qp->mac, skb->data, maclen);
757 memcpy(qp->iph, iph, ihlen + 8);
758 qp->len = 0;
759 qp->ihlen = ihlen;
760 qp->maclen = maclen;
761 qp->fragments = NULL;
762 qp->dev = dev;
763
764 /* Start a timer for this entry. */ 765 qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */ 766 qp->timer.data = (unsignedlong) qp; /* pointer to queue */ 767 qp->timer.function = ip_expire; /* expire function */ 768 add_timer(&qp->timer);
769
770 /* Add this entry to the queue. */ 771 qp->prev = NULL;
772 cli();
773 qp->next = ipqueue;
774 if (qp->next != NULL)
775 qp->next->prev = qp;
776 ipqueue = qp;
777 sti();
778 return(qp);
779 } 780
781
782 /* 783 * See if a fragment queue is complete. 784 */ 785
786 staticintip_done(structipq *qp)
/* */ 787 { 788 structipfrag *fp;
789 intoffset;
790
791 /* Only possible if we received the final fragment. */ 792 if (qp->len == 0)
793 return(0);
794
795 /* Check all fragment offsets to see if they connect. */ 796 fp = qp->fragments;
797 offset = 0;
798 while (fp != NULL)
799 { 800 if (fp->offset > offset)
801 return(0); /* fragment(s) missing */ 802 offset = fp->end;
803 fp = fp->next;
804 } 805
806 /* All fragments are present. */ 807 return(1);
808 } 809
810
811 /* 812 * Build a new IP datagram from all its fragments. 813 * 814 * FIXME: We copy here because we lack an effective way of handling lists 815 * of bits on input. Until the new skb data handling is in I'm not going 816 * to touch this with a bargepole. This also causes a 4Kish limit on 817 * packet sizes. 818 */ 819
820 staticstructsk_buff *ip_glue(structipq *qp)
/* */ 821 { 822 structsk_buff *skb;
823 structiphdr *iph;
824 structipfrag *fp;
825 unsignedchar *ptr;
826 intcount, len;
827
828 /* 829 * Allocate a new buffer for the datagram. 830 */ 831
832 len = qp->maclen + qp->ihlen + qp->len;
833
834 if ((skb = alloc_skb(len,GFP_ATOMIC)) == NULL)
835 { 836 ip_statistics.IpReasmFails++;
837 printk("IP: queue_glue: no memory for gluing queue 0x%X\n", (int) qp);
838 ip_free(qp);
839 return(NULL);
840 } 841
842 /* Fill in the basic details. */ 843 skb->len = (len - qp->maclen);
844 skb->h.raw = skb->data;
845 skb->free = 1;
846
847 /* Copy the original MAC and IP headers into the new buffer. */ 848 ptr = (unsignedchar *) skb->h.raw;
849 memcpy(ptr, ((unsignedchar *) qp->mac), qp->maclen);
850 ptr += qp->maclen;
851 memcpy(ptr, ((unsignedchar *) qp->iph), qp->ihlen);
852 ptr += qp->ihlen;
853 skb->h.raw += qp->maclen;
854
855 count = 0;
856
857 /* Copy the data portions of all fragments into the new buffer. */ 858 fp = qp->fragments;
859 while(fp != NULL)
860 { 861 if(count+fp->len>skb->len)
862 { 863 printk("Invalid fragment list: Fragment over size.\n");
864 ip_free(qp);
865 kfree_skb(skb,FREE_WRITE);
866 ip_statistics.IpReasmFails++;
867 returnNULL;
868 } 869 memcpy((ptr + fp->offset), fp->ptr, fp->len);
870 count += fp->len;
871 fp = fp->next;
872 } 873
874 /* We glued together all fragments, so remove the queue entry. */ 875 ip_free(qp);
876
877 /* Done with all fragments. Fixup the new IP header. */ 878 iph = skb->h.iph;
879 iph->frag_off = 0;
880 iph->tot_len = htons((iph->ihl * sizeof(unsignedlong)) + count);
881 skb->ip_hdr = iph;
882
883 ip_statistics.IpReasmOKs++;
884 return(skb);
885 } 886
887
888 /* 889 * Process an incoming IP datagram fragment. 890 */ 891
892 staticstructsk_buff *ip_defrag(structiphdr *iph, structsk_buff *skb, structdevice *dev)
/* */ 893 { 894 structipfrag *prev, *next;
895 structipfrag *tfp;
896 structipq *qp;
897 structsk_buff *skb2;
898 unsignedchar *ptr;
899 intflags, offset;
900 inti, ihl, end;
901
902 ip_statistics.IpReasmReqds++;
903
904 /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */ 905 qp = ip_find(iph);
906
907 /* Is this a non-fragmented datagram? */ 908 offset = ntohs(iph->frag_off);
909 flags = offset & ~IP_OFFSET;
910 offset &= IP_OFFSET;
911 if (((flags & IP_MF) == 0) && (offset == 0))
912 { 913 if (qp != NULL)
914 ip_free(qp); /* Huh? How could this exist?? */ 915 return(skb);
916 } 917
918 offset <<= 3; /* offset is in 8-byte chunks */ 919
920 /* 921 * If the queue already existed, keep restarting its timer as long 922 * as we still are receiving fragments. Otherwise, create a fresh 923 * queue entry. 924 */ 925
926 if (qp != NULL)
927 { 928 del_timer(&qp->timer);
929 qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */ 930 qp->timer.data = (unsignedlong) qp; /* pointer to queue */ 931 qp->timer.function = ip_expire; /* expire function */ 932 add_timer(&qp->timer);
933 } 934 else 935 { 936 /* 937 * If we failed to create it, then discard the frame 938 */ 939 if ((qp = ip_create(skb, iph, dev)) == NULL)
940 { 941 skb->sk = NULL;
942 kfree_skb(skb, FREE_READ);
943 ip_statistics.IpReasmFails++;
944 returnNULL;
945 } 946 } 947
948 /* 949 * Determine the position of this fragment. 950 */ 951
952 ihl = (iph->ihl * sizeof(unsignedlong));
953 end = offset + ntohs(iph->tot_len) - ihl;
954
955 /* 956 * Point into the IP datagram 'data' part. 957 */ 958
959 ptr = skb->data + dev->hard_header_len + ihl;
960
961 /* 962 * Is this the final fragment? 963 */ 964
965 if ((flags & IP_MF) == 0)
966 qp->len = end;
967
968 /* 969 * Find out which fragments are in front and at the back of us 970 * in the chain of fragments so far. We must know where to put 971 * this fragment, right? 972 */ 973
974 prev = NULL;
975 for(next = qp->fragments; next != NULL; next = next->next)
976 { 977 if (next->offset > offset)
978 break; /* bingo! */ 979 prev = next;
980 } 981
982 /* 983 * We found where to put this one. 984 * Check for overlap with preceding fragment, and, if needed, 985 * align things so that any overlaps are eliminated. 986 */ 987 if (prev != NULL && offset < prev->end)
988 { 989 i = prev->end - offset;
990 offset += i; /* ptr into datagram */ 991 ptr += i; /* ptr into fragment data */ 992 } 993
994 /* 995 * Look for overlap with succeeding segments. 996 * If we can merge fragments, do it. 997 */ 998
999 for(; next != NULL; next = tfp)
1000 {1001 tfp = next->next;
1002 if (next->offset >= end)
1003 break; /* no overlaps at all */1004
1005 i = end - next->offset; /* overlap is 'i' bytes */1006 next->len -= i; /* so reduce size of */1007 next->offset += i; /* next fragment */1008 next->ptr += i;
1009
1010 /*1011 * If we get a frag size of <= 0, remove it and the packet1012 * that it goes with.1013 */1014 if (next->len <= 0)
1015 {1016 if (next->prev != NULL)
1017 next->prev->next = next->next;
1018 else1019 qp->fragments = next->next;
1020
1021 if (tfp->next != NULL)
1022 next->next->prev = next->prev;
1023
1024 kfree_skb(next->skb,FREE_READ);
1025 kfree_s(next, sizeof(structipfrag));
1026 }1027 }1028
1029 /*1030 * Insert this fragment in the chain of fragments.1031 */1032
1033 tfp = NULL;
1034 tfp = ip_frag_create(offset, end, skb, ptr);
1035
1036 /*1037 * No memory to save the fragment - so throw the lot1038 */1039
1040 if (!tfp)
1041 {1042 skb->sk = NULL;
1043 kfree_skb(skb, FREE_READ);
1044 returnNULL;
1045 }1046 tfp->prev = prev;
1047 tfp->next = next;
1048 if (prev != NULL)
1049 prev->next = tfp;
1050 else1051 qp->fragments = tfp;
1052
1053 if (next != NULL)
1054 next->prev = tfp;
1055
1056 /*1057 * OK, so we inserted this new fragment into the chain.1058 * Check if we now have a full IP datagram which we can1059 * bump up to the IP layer...1060 */1061
1062 if (ip_done(qp))
1063 {1064 skb2 = ip_glue(qp); /* glue together the fragments */1065 return(skb2);
1066 }1067 return(NULL);
1068 }1069
1070
1071 /*1072 * This IP datagram is too large to be sent in one piece. Break it up into1073 * smaller pieces (each of size equal to the MAC header plus IP header plus1074 * a block of the data of the original IP data part) that will yet fit in a1075 * single device frame, and queue such a frame for sending by calling the1076 * ip_queue_xmit(). Note that this is recursion, and bad things will happen1077 * if this function causes a loop...1078 *1079 * Yes this is inefficient, feel free to submit a quicker one.1080 *1081 * **Protocol Violation**1082 * We copy all the options to each fragment. !FIXME!1083 */1084 voidip_fragment(structsock *sk, structsk_buff *skb, structdevice *dev, intis_frag)
/* */1085 {1086 structiphdr *iph;
1087 unsignedchar *raw;
1088 unsignedchar *ptr;
1089 structsk_buff *skb2;
1090 intleft, mtu, hlen, len;
1091 intoffset;
1092 unsignedlongflags;
1093
1094 /*1095 * Point into the IP datagram header.1096 */1097
1098 raw = skb->data;
1099 iph = (structiphdr *) (raw + dev->hard_header_len);
1100
1101 skb->ip_hdr = iph;
1102
1103 /*1104 * Setup starting values.1105 */1106
1107 hlen = (iph->ihl * sizeof(unsignedlong));
1108 left = ntohs(iph->tot_len) - hlen; /* Space per frame */1109 hlen += dev->hard_header_len; /* Total header size */1110 mtu = (dev->mtu - hlen); /* Size of data space */1111 ptr = (raw + hlen); /* Where to start from */1112
1113 /*1114 * Check for any "DF" flag. [DF means do not fragment]1115 */1116
1117 if (ntohs(iph->frag_off) & IP_DF)
1118 {1119 ip_statistics.IpFragFails++;
1120 icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev);
1121 return;
1122 }1123
1124 /*1125 * The protocol doesn't seem to say what to do in the case that the1126 * frame + options doesn't fit the mtu. As it used to fall down dead1127 * in this case we were fortunate it didn't happen1128 */1129
1130 if(mtu<8)
1131 {1132 /* It's wrong but its better than nothing */1133 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev);
1134 ip_statistics.IpFragFails++;
1135 return;
1136 }1137
1138 /*1139 * Fragment the datagram.1140 */1141
1142 /*1143 * The initial offset is 0 for a complete frame. When1144 * fragmenting fragments its wherever this one starts.1145 */1146
1147 if (is_frag & 2)
1148 offset = (ntohs(iph->frag_off) & 0x1fff) << 3;
1149 else1150 offset = 0;
1151
1152
1153 /*1154 * Keep copying data until we run out.1155 */1156
1157 while(left > 0)
1158 {1159 len = left;
1160 /* IF: it doesn't fit, use 'mtu' - the data space left */1161 if (len > mtu)
1162 len = mtu;
1163 /* IF: we are not sending upto and including the packet end1164 then align the next start on an eight byte boundary */1165 if (len < left)
1166 {1167 len/=8;
1168 len*=8;
1169 }1170 /*1171 * Allocate buffer.1172 */1173
1174 if ((skb2 = alloc_skb(len + hlen,GFP_ATOMIC)) == NULL)
1175 {1176 printk("IP: frag: no memory for new fragment!\n");
1177 ip_statistics.IpFragFails++;
1178 return;
1179 }1180
1181 /*1182 * Set up data on packet1183 */1184
1185 skb2->arp = skb->arp;
1186 if(skb->free==0)
1187 printk("IP fragmenter: BUG free!=1 in fragmenter\n");
1188 skb2->free = 1;
1189 skb2->len = len + hlen;
1190 skb2->h.raw=(char *) skb2->data;
1191 /*1192 * Charge the memory for the fragment to any owner1193 * it might possess1194 */1195
1196 save_flags(flags);
1197 if (sk)
1198 {1199 cli();
1200 sk->wmem_alloc += skb2->mem_len;
1201 skb2->sk=sk;
1202 }1203 restore_flags(flags);
1204 skb2->raddr = skb->raddr; /* For rebuild_header - must be here */1205
1206 /*1207 * Copy the packet header into the new buffer.1208 */1209
1210 memcpy(skb2->h.raw, raw, hlen);
1211
1212 /*1213 * Copy a block of the IP datagram.1214 */1215 memcpy(skb2->h.raw + hlen, ptr, len);
1216 left -= len;
1217
1218 skb2->h.raw+=dev->hard_header_len;
1219
1220 /*1221 * Fill in the new header fields.1222 */1223 iph = (structiphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
1224 iph->frag_off = htons((offset >> 3));
1225 /*1226 * Added AC : If we are fragmenting a fragment thats not the1227 * last fragment then keep MF on each bit1228 */1229 if (left > 0 || (is_frag & 1))
1230 iph->frag_off |= htons(IP_MF);
1231 ptr += len;
1232 offset += len;
1233
1234 /*1235 * Put this fragment into the sending queue.1236 */1237
1238 ip_statistics.IpFragCreates++;
1239
1240 ip_queue_xmit(sk, dev, skb2, 2);
1241 }1242 ip_statistics.IpFragOKs++;
1243 }1244
1245
1246
1247 #ifdefCONFIG_IP_FORWARD1248
1249 /*1250 * Forward an IP datagram to its next destination.1251 */1252
1253 staticvoidip_forward(structsk_buff *skb, structdevice *dev, intis_frag)
/* */1254 {1255 structdevice *dev2; /* Output device */1256 structiphdr *iph; /* Our header */1257 structsk_buff *skb2; /* Output packet */1258 structrtable *rt; /* Route we use */1259 unsignedchar *ptr; /* Data pointer */1260 unsignedlongraddr; /* Router IP address */1261
1262 /*1263 * According to the RFC, we must first decrease the TTL field. If1264 * that reaches zero, we must reply an ICMP control message telling1265 * that the packet's lifetime expired.1266 *1267 * Exception:1268 * We may not generate an ICMP for an ICMP. icmp_send does the1269 * enforcement of this so we can forget it here. It is however1270 * sometimes VERY important.1271 */1272
1273 iph = skb->h.iph;
1274 iph->ttl--;
1275 if (iph->ttl <= 0)
1276 {1277 /* Tell the sender its packet died... */1278 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, dev);
1279 return;
1280 }1281
1282 /*1283 * Re-compute the IP header checksum.1284 * This is inefficient. We know what has happened to the header1285 * and could thus adjust the checksum as Phil Karn does in KA9Q1286 */1287
1288 ip_send_check(iph);
1289
1290 /*1291 * OK, the packet is still valid. Fetch its destination address,1292 * and give it to the IP sender for further processing.1293 */1294
1295 rt = ip_rt_route(iph->daddr, NULL, NULL);
1296 if (rt == NULL)
1297 {1298 /*1299 * Tell the sender its packet cannot be delivered. Again1300 * ICMP is screened later.1301 */1302 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, dev);
1303 return;
1304 }1305
1306
1307 /*1308 * Gosh. Not only is the packet valid; we even know how to1309 * forward it onto its final destination. Can we say this1310 * is being plain lucky?1311 * If the router told us that there is no GW, use the dest.1312 * IP address itself- we seem to be connected directly...1313 */1314
1315 raddr = rt->rt_gateway;
1316
1317 if (raddr != 0)
1318 {1319 /*1320 * There is a gateway so find the correct route for it.1321 * Gateways cannot in turn be gatewayed.1322 */1323 rt = ip_rt_route(raddr, NULL, NULL);
1324 if (rt == NULL)
1325 {1326 /*1327 * Tell the sender its packet cannot be delivered...1328 */1329 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, dev);
1330 return;
1331 }1332 if (rt->rt_gateway != 0)
1333 raddr = rt->rt_gateway;
1334 }1335 else1336 raddr = iph->daddr;
1337
1338 /*1339 * Having picked a route we can now send the frame out.1340 */1341
1342 dev2 = rt->rt_dev;
1343
1344 /*1345 * In IP you never forward a frame on the interface that it arrived1346 * upon. We should generate an ICMP HOST REDIRECT giving the route1347 * we calculated.1348 * For now just dropping the packet is an acceptable compromise.1349 */1350
1351 if (dev == dev2)
1352 return;
1353
1354 /*1355 * We now allocate a new buffer, and copy the datagram into it.1356 * If the indicated interface is up and running, kick it.1357 */1358
1359 if (dev2->flags & IFF_UP)
1360 {1361
1362 /*1363 * Current design decrees we copy the packet. For identical header1364 * lengths we could avoid it. The new skb code will let us push1365 * data so the problem goes away then.1366 */1367
1368 skb2 = alloc_skb(dev2->hard_header_len + skb->len, GFP_ATOMIC);
1369 /*1370 * This is rare and since IP is tolerant of network failures1371 * quite harmless.1372 */1373 if (skb2 == NULL)
1374 {1375 printk("\nIP: No memory available for IP forward\n");
1376 return;
1377 }1378 ptr = skb2->data;
1379 skb2->free = 1;
1380 skb2->len = skb->len + dev2->hard_header_len;
1381 skb2->h.raw = ptr;
1382
1383 /*1384 * Copy the packet data into the new buffer.1385 */1386 memcpy(ptr + dev2->hard_header_len, skb->h.raw, skb->len);
1387
1388 /* Now build the MAC header. */1389 (void) ip_send(skb2, raddr, skb->len, dev2, dev2->pa_addr);
1390
1391 ip_statistics.IpForwDatagrams++;
1392
1393 /*1394 * See if it needs fragmenting. Note in ip_rcv we tagged1395 * the fragment type. This must be right so that1396 * the fragmenter does the right thing.1397 */1398
1399 if(skb2->len > dev2->mtu + dev2->hard_header_len)
1400 {1401 ip_fragment(NULL,skb2,dev2, is_frag);
1402 kfree_skb(skb2,FREE_WRITE);
1403 }1404 else1405 {1406 /*1407 * Map service types to priority. We lie about1408 * throughput being low priority, but its a good1409 * choice to help improve general usage.1410 */1411 if(iph->tos & IPTOS_LOWDELAY)
1412 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
1413 elseif(iph->tos & IPTOS_THROUGHPUT)
1414 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
1415 else1416 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
1417 }1418 }1419 }1420
1421
1422 #endif1423
1424 /*1425 * This function receives all incoming IP datagrams.1426 */1427
1428 intip_rcv(structsk_buff *skb, structdevice *dev, structpacket_type *pt)
/* */1429 {1430 structiphdr *iph = skb->h.iph;
1431 unsignedcharhash;
1432 unsignedcharflag = 0;
1433 unsignedcharopts_p = 0; /* Set iff the packet has options. */1434 structinet_protocol *ipprot;
1435 staticstructoptionsopt; /* since we don't use these yet, and they1436 take up stack space. */1437 intbrd=IS_MYADDR;
1438 intis_frag=0;
1439
1440
1441 ip_statistics.IpInReceives++;
1442
1443 /*1444 * Tag the ip header of this packet so we can find it1445 */1446
1447 skb->ip_hdr = iph;
1448
1449 /*1450 * Is the datagram acceptable?1451 *1452 * 1. Length at least the size of an ip header1453 * 2. Version of 41454 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums]1455 * (4. We ought to check for IP multicast addresses and undefined types.. does this matter ?)1456 */1457
1458 if (skb->len<sizeof(structiphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsignedchar *)iph, iph->ihl) !=0)
1459 {1460 ip_statistics.IpInHdrErrors++;
1461 kfree_skb(skb, FREE_WRITE);
1462 return(0);
1463 }1464
1465 /*1466 * Our transport medium may have padded the buffer out. Now we know it1467 * is IP we can trim to the true length of the frame.1468 */1469
1470 skb->len=ntohs(iph->tot_len);
1471
1472 /*1473 * Next analyse the packet for options. Studies show under one packet in1474 * a thousand have options....1475 */1476
1477 if (iph->ihl != 5)
1478 {/* Fast path for the typical optionless IP packet. */1479 memset((char *) &opt, 0, sizeof(opt));
1480 if (do_options(iph, &opt) != 0)
1481 return 0;
1482 opts_p = 1;
1483 }1484
1485 /*1486 * Remember if the frame is fragmented.1487 */1488
1489 if(iph->frag_off)
1490 {1491 if (iph->frag_off & 0x0020)
1492 is_frag|=1;
1493 /*1494 * Last fragment ?1495 */1496
1497 if (ntohs(iph->frag_off) & 0x1fff)
1498 is_frag|=2;
1499 }1500
1501 /*1502 * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday.1503 *1504 * This is inefficient. While finding out if it is for us we could also compute1505 * the routing table entry. This is where the great unified cache theory comes1506 * in as and when someone implements it1507 *1508 * For most hosts over 99% of packets match the first conditional1509 * and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at1510 * function entry.1511 */1512
1513 if ( iph->daddr != skb->dev->pa_addr && (brd = ip_chk_addr(iph->daddr)) == 0)
1514 {1515 /*1516 * Don't forward multicast or broadcast frames.1517 */1518
1519 if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST)
1520 {1521 kfree_skb(skb,FREE_WRITE);
1522 return 0;
1523 }1524
1525 /*1526 * The packet is for another target. Forward the frame1527 */1528
1529 #ifdefCONFIG_IP_FORWARD1530 ip_forward(skb, dev, is_frag);
1531 #else1532 /* printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n",1533 iph->saddr,iph->daddr);*/1534 ip_statistics.IpInAddrErrors++;
1535 #endif1536 /*1537 * The forwarder is inefficient and copies the packet. We1538 * free the original now.1539 */1540
1541 kfree_skb(skb, FREE_WRITE);
1542 return(0);
1543 }1544
1545 /*1546 * Reassemble IP fragments.1547 */1548
1549 if(is_frag)
1550 {1551 /* Defragment. Obtain the complete packet if there is one */1552 skb=ip_defrag(iph,skb,dev);
1553 if(skb==NULL)
1554 return 0;
1555 iph=skb->h.iph;
1556 }1557
1558 /*1559 * Point into the IP datagram, just past the header.1560 */1561
1562 skb->ip_hdr = iph;
1563 skb->h.raw += iph->ihl*4;
1564
1565 /*1566 * skb->h.raw now points at the protocol beyond the IP header.1567 */1568
1569 hash = iph->protocol & (MAX_INET_PROTOS -1);
1570 for (ipprot = (structinet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(structinet_protocol *)ipprot->next)
1571 {1572 structsk_buff *skb2;
1573
1574 if (ipprot->protocol != iph->protocol)
1575 continue;
1576 /*1577 * See if we need to make a copy of it. This will1578 * only be set if more than one protocol wants it.1579 * and then not for the last one.1580 *1581 * This is an artifact of poor upper protocol design.1582 * Because the upper protocols damage the actual packet1583 * we must do copying. In actual fact it's even worse1584 * than this as TCP may hold on to the buffer.1585 */1586 if (ipprot->copy)
1587 {1588 skb2 = skb_clone(skb, GFP_ATOMIC);
1589 if(skb2==NULL)
1590 continue;
1591 }1592 else1593 {1594 skb2 = skb;
1595 }1596 flag = 1;
1597
1598 /*1599 * Pass on the datagram to each protocol that wants it,1600 * based on the datagram protocol. We should really1601 * check the protocol handler's return values here...1602 */1603 ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr,
1604 (ntohs(iph->tot_len) - (iph->ihl * 4)),
1605 iph->saddr, 0, ipprot);
1606
1607 }1608
1609 /*1610 * All protocols checked.1611 * If this packet was a broadcast, we may *not* reply to it, since that1612 * causes (proven, grin) ARP storms and a leakage of memory (i.e. all1613 * ICMP reply messages get queued up for transmission...)1614 */1615
1616 if (!flag)
1617 {1618 if (brd != IS_BROADCAST && brd!=IS_MULTICAST)
1619 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
1620 kfree_skb(skb, FREE_WRITE);
1621 }1622
1623 return(0);
1624 }1625
1626
1627 /*1628 * Queues a packet to be sent, and starts the transmitter1629 * if necessary. if free = 1 then we free the block after1630 * transmit, otherwise we don't. If free==2 we not only1631 * free the block but also don't assign a new ip seq number.1632 * This routine also needs to put in the total length,1633 * and compute the checksum1634 */1635
1636 voidip_queue_xmit(structsock *sk, structdevice *dev,
/* */1637 structsk_buff *skb, intfree)
1638 {1639 structiphdr *iph;
1640 unsignedchar *ptr;
1641
1642 /* Sanity check */1643 if (dev == NULL)
1644 {1645 printk("IP: ip_queue_xmit dev = NULL\n");
1646 return;
1647 }1648
1649 IS_SKB(skb);
1650
1651 /*1652 * Do some book-keeping in the packet for later1653 */1654
1655
1656 skb->dev = dev;
1657 skb->when = jiffies;
1658
1659 /*1660 * Find the IP header and set the length. This is bad1661 * but once we get the skb data handling code in the1662 * hardware will push its header sensibly and we will1663 * set skb->ip_hdr to avoid this mess and the fixed1664 * header length problem1665 */1666
1667 ptr = skb->data;
1668 ptr += dev->hard_header_len;
1669 iph = (structiphdr *)ptr;
1670 skb->ip_hdr = iph;
1671 iph->tot_len = ntohs(skb->len-dev->hard_header_len);
1672
1673 /*1674 * No reassigning numbers to fragments...1675 */1676
1677 if(free!=2)
1678 iph->id = htons(ip_id_count++);
1679 else1680 free=1;
1681
1682 /* All buffers without an owner socket get freed */1683 if (sk == NULL)
1684 free = 1;
1685
1686 skb->free = free;
1687
1688 /*1689 * Do we need to fragment. Again this is inefficient.1690 * We need to somehow lock the original buffer and use1691 * bits of it.1692 */1693
1694 if(skb->len > dev->mtu + dev->hard_header_len)
1695 {1696 ip_fragment(sk,skb,dev,0);
1697 IS_SKB(skb);
1698 kfree_skb(skb,FREE_WRITE);
1699 return;
1700 }1701
1702 /*1703 * Add an IP checksum1704 */1705
1706 ip_send_check(iph);
1707
1708 /*1709 * Print the frame when debugging1710 */1711
1712 /*1713 * More debugging. You cannot queue a packet already on a list1714 * Spot this and moan loudly.1715 */1716 if (skb->next != NULL)
1717 {1718 printk("ip_queue_xmit: next != NULL\n");
1719 skb_unlink(skb);
1720 }1721
1722 /*1723 * If a sender wishes the packet to remain unfreed1724 * we add it to his send queue. This arguably belongs1725 * in the TCP level since nobody else uses it. BUT1726 * remember IPng might change all the rules.1727 */1728
1729 if (!free)
1730 {1731 unsignedlongflags;
1732 /* The socket now has more outstanding blocks */1733
1734 sk->packets_out++;
1735
1736 /* Protect the list for a moment */1737 save_flags(flags);
1738 cli();
1739
1740 if (skb->link3 != NULL)
1741 {1742 printk("ip.c: link3 != NULL\n");
1743 skb->link3 = NULL;
1744 }1745 if (sk->send_head == NULL)
1746 {1747 sk->send_tail = skb;
1748 sk->send_head = skb;
1749 }1750 else1751 {1752 sk->send_tail->link3 = skb;
1753 sk->send_tail = skb;
1754 }1755 /* skb->link3 is NULL */1756
1757 /* Interrupt restore */1758 restore_flags(flags);
1759 /* Set the IP write timeout to the round trip time for the packet.1760 If an acknowledge has not arrived by then we may wish to act */1761 reset_timer(sk, TIME_WRITE, sk->rto);
1762 }1763 else1764 /* Remember who owns the buffer */1765 skb->sk = sk;
1766
1767 /*1768 * If the indicated interface is up and running, send the packet.1769 */1770 ip_statistics.IpOutRequests++;
1771
1772 if (dev->flags & IFF_UP)
1773 {1774 /*1775 * If we have an owner use its priority setting,1776 * otherwise use NORMAL1777 */1778
1779 if (sk != NULL)
1780 {1781 dev_queue_xmit(skb, dev, sk->priority);
1782 }1783 else1784 {1785 dev_queue_xmit(skb, dev, SOPRI_NORMAL);
1786 }1787 }1788 else1789 {1790 ip_statistics.IpOutDiscards++;
1791 if (free)
1792 kfree_skb(skb, FREE_WRITE);
1793 }1794 }1795
1796
1797 /*1798 * A socket has timed out on its send queue and wants to do a1799 * little retransmitting. Currently this means TCP.1800 */1801
1802 voidip_do_retransmit(structsock *sk, intall)
/* */1803 {1804 structsk_buff * skb;
1805 structproto *prot;
1806 structdevice *dev;
1807
1808 prot = sk->prot;
1809 skb = sk->send_head;
1810
1811 while (skb != NULL)
1812 {1813 dev = skb->dev;
1814 IS_SKB(skb);
1815 skb->when = jiffies;
1816
1817 /*1818 * In general it's OK just to use the old packet. However we1819 * need to use the current ack and window fields. Urg and1820 * urg_ptr could possibly stand to be updated as well, but we1821 * don't keep the necessary data. That shouldn't be a problem,1822 * if the other end is doing the right thing. Since we're1823 * changing the packet, we have to issue a new IP identifier.1824 */1825
1826 /* this check may be unnecessary - retransmit only for TCP */1827 if (sk->protocol == IPPROTO_TCP) {1828 structtcphdr *th;
1829 structiphdr *iph;
1830 intsize;
1831
1832 iph = (structiphdr *)(skb->data + dev->hard_header_len);
1833 th = (structtcphdr *)(((char *)iph) + (iph->ihl << 2));
1834 size = skb->len - (((unsignedchar *) th) - skb->data);
1835
1836 iph->id = htons(ip_id_count++);
1837 ip_send_check(iph);
1838
1839 th->ack_seq = ntohl(sk->acked_seq);
1840 th->window = ntohs(tcp_select_window(sk));
1841 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
1842 }1843
1844 /*1845 * If the interface is (still) up and running, kick it.1846 */1847
1848 if (dev->flags & IFF_UP)
1849 {1850 /*1851 * If the packet is still being sent by the device/protocol1852 * below then don't retransmit. This is both needed, and good -1853 * especially with connected mode AX.25 where it stops resends1854 * occurring of an as yet unsent anyway frame!1855 * We still add up the counts as the round trip time wants1856 * adjusting.1857 */1858 if (sk && !skb_device_locked(skb))
1859 {1860 /* Remove it from any existing driver queue first! */1861 skb_unlink(skb);
1862 /* Now queue it */1863 ip_statistics.IpOutRequests++;
1864 dev_queue_xmit(skb, dev, sk->priority);
1865 }1866 }1867
1868 /*1869 * Count retransmissions1870 */1871 sk->retransmits++;
1872 sk->prot->retransmits ++;
1873
1874 /*1875 * Only one retransmit requested.1876 */1877 if (!all)
1878 break;
1879
1880 /*1881 * This should cut it off before we send too many packets.1882 */1883 if (sk->retransmits >= sk->cong_window)
1884 break;
1885 skb = skb->link3;
1886 }1887 }1888
1889 /*1890 * This is the normal code called for timeouts. It does the retransmission1891 * and then does backoff. ip_do_retransmit is separated out because1892 * tcp_ack needs to send stuff from the retransmit queue without1893 * initiating a backoff.1894 */1895
1896 voidip_retransmit(structsock *sk, intall)
/* */1897 {1898 ip_do_retransmit(sk, all);
1899
1900 /*1901 * Increase the timeout each time we retransmit. Note that1902 * we do not increase the rtt estimate. rto is initialized1903 * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests1904 * that doubling rto each time is the least we can get away with.1905 * In KA9Q, Karn uses this for the first few times, and then1906 * goes to quadratic. netBSD doubles, but only goes up to *64,1907 * and clamps at 1 to 64 sec afterwards. Note that 120 sec is1908 * defined in the protocol as the maximum possible RTT. I guess1909 * we'll have to use something other than TCP to talk to the1910 * University of Mars.1911 */1912
1913 sk->retransmits++;
1914 sk->backoff++;
1915 sk->rto = min(sk->rto << 1, 120*HZ);
1916 reset_timer(sk, TIME_WRITE, sk->rto);
1917 }1918
1919 /*1920 * Socket option code for IP. This is the end of the line after any TCP,UDP etc options on1921 * an IP socket.1922 *1923 * We implement IP_TOS (type of service), IP_TTL (time to live).1924 *1925 * Next release we will sort out IP_OPTIONS since for some people are kind of important.1926 */1927
1928 intip_setsockopt(structsock *sk, intlevel, intoptname, char *optval, intoptlen)
/* */1929 {1930 intval,err;
1931
1932 if (optval == NULL)
1933 return(-EINVAL);
1934
1935 err=verify_area(VERIFY_READ, optval, sizeof(int));
1936 if(err)
1937 returnerr;
1938
1939 val = get_fs_long((unsignedlong *)optval);
1940
1941 if(level!=SOL_IP)
1942 return -EOPNOTSUPP;
1943
1944 switch(optname)
1945 {1946 caseIP_TOS:
1947 if(val<0||val>255)
1948 return -EINVAL;
1949 sk->ip_tos=val;
1950 if(val==IPTOS_LOWDELAY)
1951 sk->priority=SOPRI_INTERACTIVE;
1952 if(val==IPTOS_THROUGHPUT)
1953 sk->priority=SOPRI_BACKGROUND;
1954 return 0;
1955 caseIP_TTL:
1956 if(val<1||val>255)
1957 return -EINVAL;
1958 sk->ip_ttl=val;
1959 return 0;
1960 /* IP_OPTIONS and friends go here eventually */1961 default:
1962 return(-ENOPROTOOPT);
1963 }1964 }1965
1966 /*1967 * Get the options. Note for future reference. The GET of IP options gets the1968 * _received_ ones. The set sets the _sent_ ones.1969 */1970
1971 intip_getsockopt(structsock *sk, intlevel, intoptname, char *optval, int *optlen)
/* */1972 {1973 intval,err;
1974
1975 if(level!=SOL_IP)
1976 return -EOPNOTSUPP;
1977
1978 switch(optname)
1979 {1980 caseIP_TOS:
1981 val=sk->ip_tos;
1982 break;
1983 caseIP_TTL:
1984 val=sk->ip_ttl;
1985 break;
1986 default:
1987 return(-ENOPROTOOPT);
1988 }1989 err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
1990 if(err)
1991 returnerr;
1992 put_fs_long(sizeof(int),(unsignedlong *) optlen);
1993
1994 err=verify_area(VERIFY_WRITE, optval, sizeof(int));
1995 if(err)
1996 returnerr;
1997 put_fs_long(val,(unsignedlong *)optval);
1998
1999 return(0);
2000 }2001
2002 /*2003 * IP protocol layer initialiser2004 */2005
2006 staticstructpacket_typeip_packet_type =
2007 {2008 0, /* MUTTER ntohs(ETH_P_IP),*/2009 0, /* copy */2010 ip_rcv,
2011 NULL,
2012 NULL,
2013 };
2014
2015
2016 /*2017 * IP registers the packet type and then calls the subprotocol initialisers2018 */2019
2020 voidip_init(void)
/* */2021 {2022 ip_packet_type.type=htons(ETH_P_IP);
2023 dev_add_pack(&ip_packet_type);
2024 /* ip_raw_init();2025 ip_packet_init();2026 ip_tcp_init();2027 ip_udp_init();*/2028 }