net/inet/ip.c

/* */
This source file includes following definitions.
ip_ioctl
strict_route
loose_route
ip_route_check
build_options
ip_send
ip_build_header
do_options
ip_fast_csum
ip_compute_csum
ip_csum
ip_send_check
ip_frag_create
ip_find
ip_free
ip_expire
ip_create
ip_done
ip_glue
ip_defrag
ip_fragment
ip_forward
ip_rcv
ip_queue_xmit
ip_do_retransmit
ip_retransmit
ip_setsockopt
ip_getsockopt
ip_init
   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              The Internet Protocol (IP) module.
   7  *
   8  * Version:     @(#)ip.c        1.0.16b 9/1/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Donald Becker, <becker@super.org>
  13  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  14  *
  15  * Fixes:
  16  *              Alan Cox        :       Commented a couple of minor bits of surplus code
  17  *              Alan Cox        :       Undefining IP_FORWARD doesn't include the code
  18  *                                      (just stops a compiler warning).
  19  *              Alan Cox        :       Frames with >=MAX_ROUTE record routes, strict routes or loose routes
  20  *                                      are junked rather than corrupting things.
  21  *              Alan Cox        :       Frames to bad broadcast subnets are dumped
  22  *                                      We used to process them non broadcast and
  23  *                                      boy could that cause havoc.
  24  *              Alan Cox        :       ip_forward sets the free flag on the
  25  *                                      new frame it queues. Still crap because
  26  *                                      it copies the frame but at least it
  27  *                                      doesn't eat memory too.
  28  *              Alan Cox        :       Generic queue code and memory fixes.
  29  *              Fred Van Kempen :       IP fragment support (borrowed from NET2E)
  30  *              Gerhard Koerting:       Forward fragmented frames correctly.
  31  *              Gerhard Koerting:       Fixes to my fix of the above 8-).
  32  *              Gerhard Koerting:       IP interface addressing fix.
  33  *              Linus Torvalds  :       More robustness checks
  34  *              Alan Cox        :       Even more checks: Still not as robust as it ought to be
  35  *              Alan Cox        :       Save IP header pointer for later
  36  *              Alan Cox        :       ip option setting
  37  *              Alan Cox        :       Use ip_tos/ip_ttl settings
  38  *              Alan Cox        :       Fragmentation bogosity removed
  39  *                                      (Thanks to Mark.Bush@prg.ox.ac.uk)
  40  *              Dmitry Gorodchanin :    Send of a raw packet crash fix.
  41  *              Alan Cox        :       Silly ip bug when an overlength
  42  *                                      fragment turns up. Now frees the
  43  *                                      queue.
  44  *              Linus Torvalds/ :       Memory leakage on fragmentation
  45  *              Alan Cox        :       handling.
  46  *              Gerhard Koerting:       Forwarding uses IP priority hints
  47  *              Teemu Rantanen  :       Fragment problems.
  48  *              Alan Cox        :       General cleanup, comments and reformat
  49  *              Alan Cox        :       SNMP statistics
  50  *              Alan Cox        :       BSD address rule semantics. Also see
  51  *                                      UDP as there is a nasty checksum issue
  52  *                                      if you do things the wrong way.
  53  *              Alan Cox        :       Always defrag, moved IP_FORWARD to the config.in file
  54  *              Alan Cox        :       IP options adjust sk->priority.
  55  *              Pedro Roque     :       Fix mtu/length error in ip_forward.
  56  *              Alan Cox        :       Avoid ip_chk_addr when possible.
  57  *
  58  * To Fix:
  59  *              IP option processing is mostly not needed. ip_forward needs to know about routing rules
  60  *              and time stamp but that's about all. Use the route mtu field here too
  61  *
  62  *              This program is free software; you can redistribute it and/or
  63  *              modify it under the terms of the GNU General Public License
  64  *              as published by the Free Software Foundation; either version
  65  *              2 of the License, or (at your option) any later version.
  66  */
  67 #include <asm/segment.h>
  68 #include <asm/system.h>
  69 #include <linux/types.h>
  70 #include <linux/kernel.h>
  71 #include <linux/sched.h>
  72 #include <linux/string.h>
  73 #include <linux/errno.h>
  74 #include <linux/socket.h>
  75 #include <linux/sockios.h>
  76 #include <linux/in.h>
  77 #include <linux/inet.h>
  78 #include <linux/netdevice.h>
  79 #include <linux/etherdevice.h>
  80 #include "snmp.h"
  81 #include "ip.h"
  82 #include "protocol.h"
  83 #include "route.h"
  84 #include "tcp.h"
  85 #include <linux/skbuff.h>
  86 #include "sock.h"
  87 #include "arp.h"
  88 #include "icmp.h"
  89 
  90 #define CONFIG_IP_DEFRAG
  91 
  92 extern int last_retran;
  93 extern void sort_send(struct sock *sk);
  94 
  95 #define min(a,b)        ((a)<(b)?(a):(b))
  96 #define LOOPBACK(x)     (((x) & htonl(0xff000000)) == htonl(0x7f000000))
  97 
  98 /*
  99  *      SNMP management statistics
 100  */
 101 
 102 struct ip_mib ip_statistics={1,64,};    /* Forwarding=Yes, Default TTL=64 */
 103 
 104 /*
 105  *      Handle the issuing of an ioctl() request
 106  *      for the ip device. This is scheduled to
 107  *      disappear
 108  */
 109 
 110 int ip_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /*  */
 111 {
 112         switch(cmd)
 113         {
 114                 default:
 115                         return(-EINVAL);
 116         }
 117 }
 118 
 119 
 120 /* these two routines will do routing. */
 121 
 122 static void
 123 strict_route(struct iphdr *iph, struct options *opt)
     /*  */
 124 {
 125 }
 126 
 127 
 128 static void
 129 loose_route(struct iphdr *iph, struct options *opt)
     /*  */
 130 {
 131 }
 132 
 133 
 134 
 135 
 136 /* This routine will check to see if we have lost a gateway. */
 137 void
 138 ip_route_check(unsigned long daddr)
     /*  */
 139 {
 140 }
 141 
 142 
 143 #if 0
 144 /* this routine puts the options at the end of an ip header. */
 145 static int
 146 build_options(struct iphdr *iph, struct options *opt)
     /*  */
 147 {
 148   unsigned char *ptr;
 149   /* currently we don't support any options. */
 150   ptr = (unsigned char *)(iph+1);
 151   *ptr = 0;
 152   return (4);
 153 }
 154 #endif
 155 
 156 
 157 /*
 158  *      Take an skb, and fill in the MAC header.
 159  */
 160 
 161 static int ip_send(struct sk_buff *skb, unsigned long daddr, int len, struct device *dev, unsigned long saddr)
     /*  */
 162 {
 163         int mac = 0;
 164 
 165         skb->dev = dev;
 166         skb->arp = 1;
 167         if (dev->hard_header)
 168         {
 169                 /*
 170                  *      Build a hardware header. Source address is our mac, destination unknown
 171                  *      (rebuild header will sort this out)
 172                  */
 173                 mac = dev->hard_header(skb->data, dev, ETH_P_IP, NULL, NULL, len, skb);
 174                 if (mac < 0)
 175                 {
 176                         mac = -mac;
 177                         skb->arp = 0;
 178                         skb->raddr = daddr;     /* next routing address */
 179                 }
 180         }
 181         return mac;
 182 }
 183 
 184 int ip_id_count = 0;
 185 
 186 /*
 187  * This routine builds the appropriate hardware/IP headers for
 188  * the routine.  It assumes that if *dev != NULL then the
 189  * protocol knows what it's doing, otherwise it uses the
 190  * routing/ARP tables to select a device struct.
 191  */
 192 int ip_build_header(struct sk_buff *skb, unsigned long saddr, unsigned long daddr,
     /*  */
 193                 struct device **dev, int type, struct options *opt, int len, int tos, int ttl)
 194 {
 195         static struct options optmem;
 196         struct iphdr *iph;
 197         struct rtable *rt;
 198         unsigned char *buff;
 199         unsigned long raddr;
 200         int tmp;
 201         unsigned long src;
 202 
 203         buff = skb->data;
 204 
 205         /*
 206          *      See if we need to look up the device.
 207          */
 208 
 209         if (*dev == NULL)
 210         {
 211                 if(skb->localroute)
 212                         rt = ip_rt_local(daddr, &optmem, &src);
 213                 else
 214                         rt = ip_rt_route(daddr, &optmem, &src);
 215                 if (rt == NULL)
 216                 {
 217                         ip_statistics.IpOutNoRoutes++;
 218                         return(-ENETUNREACH);
 219                 }
 220 
 221                 *dev = rt->rt_dev;
 222                 /*
 223                  *      If the frame is from us and going off machine it MUST MUST MUST
 224                  *      have the output device ip address and never the loopback
 225                  */
 226                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 227                         saddr = src;/*rt->rt_dev->pa_addr;*/
 228                 raddr = rt->rt_gateway;
 229 
 230                 opt = &optmem;
 231         }
 232         else
 233         {
 234                 /*
 235                  *      We still need the address of the first hop.
 236                  */
 237                 if(skb->localroute)
 238                         rt = ip_rt_local(daddr, &optmem, &src);
 239                 else
 240                         rt = ip_rt_route(daddr, &optmem, &src);
 241                 /*
 242                  *      If the frame is from us and going off machine it MUST MUST MUST
 243                  *      have the output device ip address and never the loopback
 244                  */
 245                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 246                         saddr = src;/*rt->rt_dev->pa_addr;*/
 247 
 248                 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
 249         }
 250 
 251         /*
 252          *      No source addr so make it our addr
 253          */
 254         if (saddr == 0)
 255                 saddr = src;
 256 
 257         /*
 258          *      No gateway so aim at the real destination
 259          */
 260         if (raddr == 0)
 261                 raddr = daddr;
 262 
 263         /*
 264          *      Now build the MAC header.
 265          */
 266 
 267         tmp = ip_send(skb, raddr, len, *dev, saddr);
 268         buff += tmp;
 269         len -= tmp;
 270 
 271         /*
 272          *      Book keeping
 273          */
 274 
 275         skb->dev = *dev;
 276         skb->saddr = saddr;
 277         if (skb->sk)
 278                 skb->sk->saddr = saddr;
 279 
 280         /*
 281          *      Now build the IP header.
 282          */
 283 
 284         /*
 285          *      If we are using IPPROTO_RAW, then we don't need an IP header, since
 286          *      one is being supplied to us by the user
 287          */
 288 
 289         if(type == IPPROTO_RAW)
 290                 return (tmp);
 291 
 292         iph = (struct iphdr *)buff;
 293         iph->version  = 4;
 294         iph->tos      = tos;
 295         iph->frag_off = 0;
 296         iph->ttl      = ttl;
 297         iph->daddr    = daddr;
 298         iph->saddr    = saddr;
 299         iph->protocol = type;
 300         iph->ihl      = 5;
 301 
 302         /* Setup the IP options. */
 303 #ifdef Not_Yet_Avail
 304         build_options(iph, opt);
 305 #endif
 306 
 307         return(20 + tmp);       /* IP header plus MAC header size */
 308 }
 309 
 310 
 311 static int
 312 do_options(struct iphdr *iph, struct options *opt)
     /*  */
 313 {
 314   unsigned char *buff;
 315   int done = 0;
 316   int i, len = sizeof(struct iphdr);
 317 
 318   /* Zero out the options. */
 319   opt->record_route.route_size = 0;
 320   opt->loose_route.route_size  = 0;
 321   opt->strict_route.route_size = 0;
 322   opt->tstamp.ptr              = 0;
 323   opt->security                = 0;
 324   opt->compartment             = 0;
 325   opt->handling                = 0;
 326   opt->stream                  = 0;
 327   opt->tcc                     = 0;
 328   return(0);
 329 
 330   /* Advance the pointer to start at the options. */
 331   buff = (unsigned char *)(iph + 1);
 332 
 333   /* Now start the processing. */
 334   while (!done && len < iph->ihl*4) switch(*buff) {
 335         case IPOPT_END:
 336                 done = 1;
 337                 break;
 338         case IPOPT_NOOP:
 339                 buff++;
 340                 len++;
 341                 break;
 342         case IPOPT_SEC:
 343                 buff++;
 344                 if (*buff != 11) return(1);
 345                 buff++;
 346                 opt->security = ntohs(*(unsigned short *)buff);
 347                 buff += 2;
 348                 opt->compartment = ntohs(*(unsigned short *)buff);
 349                 buff += 2;
 350                 opt->handling = ntohs(*(unsigned short *)buff);
 351                 buff += 2;
 352                 opt->tcc = ((*buff) << 16) + ntohs(*(unsigned short *)(buff+1));
 353                 buff += 3;
 354                 len += 11;
 355                 break;
 356         case IPOPT_LSRR:
 357                 buff++;
 358                 if ((*buff - 3)% 4 != 0) return(1);
 359                 len += *buff;
 360                 opt->loose_route.route_size = (*buff -3)/4;
 361                 buff++;
 362                 if (*buff % 4 != 0) return(1);
 363                 opt->loose_route.pointer = *buff/4 - 1;
 364                 buff++;
 365                 buff++;
 366                 for (i = 0; i < opt->loose_route.route_size; i++) {
 367                         if(i>=MAX_ROUTE)
 368                                 return(1);
 369                         opt->loose_route.route[i] = *(unsigned long *)buff;
 370                         buff += 4;
 371                 }
 372                 break;
 373         case IPOPT_SSRR:
 374                 buff++;
 375                 if ((*buff - 3)% 4 != 0) return(1);
 376                 len += *buff;
 377                 opt->strict_route.route_size = (*buff -3)/4;
 378                 buff++;
 379                 if (*buff % 4 != 0) return(1);
 380                 opt->strict_route.pointer = *buff/4 - 1;
 381                 buff++;
 382                 buff++;
 383                 for (i = 0; i < opt->strict_route.route_size; i++) {
 384                         if(i>=MAX_ROUTE)
 385                                 return(1);
 386                         opt->strict_route.route[i] = *(unsigned long *)buff;
 387                         buff += 4;
 388                 }
 389                 break;
 390         case IPOPT_RR:
 391                 buff++;
 392                 if ((*buff - 3)% 4 != 0) return(1);
 393                 len += *buff;
 394                 opt->record_route.route_size = (*buff -3)/4;
 395                 buff++;
 396                 if (*buff % 4 != 0) return(1);
 397                 opt->record_route.pointer = *buff/4 - 1;
 398                 buff++;
 399                 buff++;
 400                 for (i = 0; i < opt->record_route.route_size; i++) {
 401                         if(i>=MAX_ROUTE)
 402                                 return 1;
 403                         opt->record_route.route[i] = *(unsigned long *)buff;
 404                         buff += 4;
 405                 }
 406                 break;
 407         case IPOPT_SID:
 408                 len += 4;
 409                 buff +=2;
 410                 opt->stream = *(unsigned short *)buff;
 411                 buff += 2;
 412                 break;
 413         case IPOPT_TIMESTAMP:
 414                 buff++;
 415                 len += *buff;
 416                 if (*buff % 4 != 0) return(1);
 417                 opt->tstamp.len = *buff / 4 - 1;
 418                 buff++;
 419                 if ((*buff - 1) % 4 != 0) return(1);
 420                 opt->tstamp.ptr = (*buff-1)/4;
 421                 buff++;
 422                 opt->tstamp.x.full_char = *buff;
 423                 buff++;
 424                 for (i = 0; i < opt->tstamp.len; i++) {
 425                         opt->tstamp.data[i] = *(unsigned long *)buff;
 426                         buff += 4;
 427                 }
 428                 break;
 429         default:
 430                 return(1);
 431   }
 432 
 433   if (opt->record_route.route_size == 0) {
 434         if (opt->strict_route.route_size != 0) {
 435                 memcpy(&(opt->record_route), &(opt->strict_route),
 436                                              sizeof(opt->record_route));
 437         } else if (opt->loose_route.route_size != 0) {
 438                 memcpy(&(opt->record_route), &(opt->loose_route),
 439                                              sizeof(opt->record_route));
 440         }
 441   }
 442 
 443   if (opt->strict_route.route_size != 0 &&
 444       opt->strict_route.route_size != opt->strict_route.pointer) {
 445         strict_route(iph, opt);
 446         return(0);
 447   }
 448 
 449   if (opt->loose_route.route_size != 0 &&
 450       opt->loose_route.route_size != opt->loose_route.pointer) {
 451         loose_route(iph, opt);
 452         return(0);
 453   }
 454 
 455   return(0);
 456 }
 457 
 458 /*
 459  *      This is a version of ip_compute_csum() optimized for IP headers, which
 460  *      always checksum on 4 octet boundaries.
 461  */
 462 
 463 static inline unsigned short ip_fast_csum(unsigned char * buff, int wlen)
     /*  */
 464 {
 465         unsigned long sum = 0;
 466 
 467         if (wlen)
 468         {
 469         unsigned long bogus;
 470          __asm__("clc\n"
 471                 "1:\t"
 472                 "lodsl\n\t"
 473                 "adcl %3, %0\n\t"
 474                 "decl %2\n\t"
 475                 "jne 1b\n\t"
 476                 "adcl $0, %0\n\t"
 477                 "movl %0, %3\n\t"
 478                 "shrl $16, %3\n\t"
 479                 "addw %w3, %w0\n\t"
 480                 "adcw $0, %w0"
 481             : "=r" (sum), "=S" (buff), "=r" (wlen), "=a" (bogus)
 482             : "0"  (sum),  "1" (buff),  "2" (wlen));
 483         }
 484         return (~sum) & 0xffff;
 485 }
 486 
 487 /*
 488  * This routine does all the checksum computations that don't
 489  * require anything special (like copying or special headers).
 490  */
 491 
 492 unsigned short ip_compute_csum(unsigned char * buff, int len)
     /*  */
 493 {
 494         unsigned long sum = 0;
 495 
 496         /* Do the first multiple of 4 bytes and convert to 16 bits. */
 497         if (len > 3)
 498         {
 499                 __asm__("clc\n"
 500                 "1:\t"
 501                 "lodsl\n\t"
 502                 "adcl %%eax, %%ebx\n\t"
 503                 "loop 1b\n\t"
 504                 "adcl $0, %%ebx\n\t"
 505                 "movl %%ebx, %%eax\n\t"
 506                 "shrl $16, %%eax\n\t"
 507                 "addw %%ax, %%bx\n\t"
 508                 "adcw $0, %%bx"
 509                 : "=b" (sum) , "=S" (buff)
 510                 : "0" (sum), "c" (len >> 2) ,"1" (buff)
 511                 : "ax", "cx", "si", "bx" );
 512         }
 513         if (len & 2)
 514         {
 515                 __asm__("lodsw\n\t"
 516                 "addw %%ax, %%bx\n\t"
 517                 "adcw $0, %%bx"
 518                 : "=b" (sum), "=S" (buff)
 519                 : "0" (sum), "1" (buff)
 520                 : "bx", "ax", "si");
 521         }
 522         if (len & 1)
 523         {
 524                 __asm__("lodsb\n\t"
 525                 "movb $0, %%ah\n\t"
 526                 "addw %%ax, %%bx\n\t"
 527                 "adcw $0, %%bx"
 528                 : "=b" (sum), "=S" (buff)
 529                 : "0" (sum), "1" (buff)
 530                 : "bx", "ax", "si");
 531         }
 532         sum =~sum;
 533         return(sum & 0xffff);
 534 }
 535 
 536 /*
 537  *      Check the header of an incoming IP datagram.  This version is still used in slhc.c.
 538  */
 539 
 540 int ip_csum(struct iphdr *iph)
     /*  */
 541 {
 542         return ip_fast_csum((unsigned char *)iph, iph->ihl);
 543 }
 544 
 545 /*
 546  *      Generate a checksum for an outgoing IP datagram.
 547  */
 548 
 549 static void ip_send_check(struct iphdr *iph)
     /*  */
 550 {
 551         iph->check = 0;
 552         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 553 }
 554 
 555 /************************ Fragment Handlers From NET2E not yet with tweaks to beat 4K **********************************/
 556 
 557 
 558 /*
 559  *      This fragment handler is a bit of a heap. On the other hand it works quite
 560  *      happily and handles things quite well.
 561  */
 562 
 563 static struct ipq *ipqueue = NULL;              /* IP fragment queue    */
 564 
 565 /*
 566  *      Create a new fragment entry.
 567  */
 568 
 569 static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, unsigned char *ptr)
     /*  */
 570 {
 571         struct ipfrag *fp;
 572 
 573         fp = (struct ipfrag *) kmalloc(sizeof(struct ipfrag), GFP_ATOMIC);
 574         if (fp == NULL)
 575         {
 576                 printk("IP: frag_create: no memory left !\n");
 577                 return(NULL);
 578         }
 579         memset(fp, 0, sizeof(struct ipfrag));
 580 
 581         /* Fill in the structure. */
 582         fp->offset = offset;
 583         fp->end = end;
 584         fp->len = end - offset;
 585         fp->skb = skb;
 586         fp->ptr = ptr;
 587 
 588         return(fp);
 589 }
 590 
 591 
 592 /*
 593  *      Find the correct entry in the "incomplete datagrams" queue for
 594  *      this IP datagram, and return the queue entry address if found.
 595  */
 596 
 597 static struct ipq *ip_find(struct iphdr *iph)
     /*  */
 598 {
 599         struct ipq *qp;
 600         struct ipq *qplast;
 601 
 602         cli();
 603         qplast = NULL;
 604         for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
 605         {
 606                 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
 607                         iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
 608                 {
 609                         del_timer(&qp->timer);  /* So it doesn't vanish on us. The timer will be reset anyway */
 610                         sti();
 611                         return(qp);
 612                 }
 613         }
 614         sti();
 615         return(NULL);
 616 }
 617 
 618 
 619 /*
 620  *      Remove an entry from the "incomplete datagrams" queue, either
 621  *      because we completed, reassembled and processed it, or because
 622  *      it timed out.
 623  */
 624 
 625 static void ip_free(struct ipq *qp)
     /*  */
 626 {
 627         struct ipfrag *fp;
 628         struct ipfrag *xp;
 629 
 630         /*
 631          * Stop the timer for this entry.
 632          */
 633 
 634         del_timer(&qp->timer);
 635 
 636         /* Remove this entry from the "incomplete datagrams" queue. */
 637         cli();
 638         if (qp->prev == NULL)
 639         {
 640                 ipqueue = qp->next;
 641                 if (ipqueue != NULL)
 642                         ipqueue->prev = NULL;
 643         }
 644         else
 645         {
 646                 qp->prev->next = qp->next;
 647                 if (qp->next != NULL)
 648                         qp->next->prev = qp->prev;
 649         }
 650 
 651         /* Release all fragment data. */
 652 
 653         fp = qp->fragments;
 654         while (fp != NULL)
 655         {
 656                 xp = fp->next;
 657                 IS_SKB(fp->skb);
 658                 kfree_skb(fp->skb,FREE_READ);
 659                 kfree_s(fp, sizeof(struct ipfrag));
 660                 fp = xp;
 661         }
 662 
 663         /* Release the MAC header. */
 664         kfree_s(qp->mac, qp->maclen);
 665 
 666         /* Release the IP header. */
 667         kfree_s(qp->iph, qp->ihlen + 8);
 668 
 669         /* Finally, release the queue descriptor itself. */
 670         kfree_s(qp, sizeof(struct ipq));
 671         sti();
 672 }
 673 
 674 
 675 /*
 676  *      Oops- a fragment queue timed out.  Kill it and send an ICMP reply.
 677  */
 678 
 679 static void ip_expire(unsigned long arg)
     /*  */
 680 {
 681         struct ipq *qp;
 682 
 683         qp = (struct ipq *)arg;
 684 
 685         /*
 686          *      Send an ICMP "Fragment Reassembly Timeout" message.
 687          */
 688 
 689         ip_statistics.IpReasmTimeout++;
 690         ip_statistics.IpReasmFails++;   
 691         /* This if is always true... shrug */
 692         if(qp->fragments!=NULL)
 693                 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
 694                                 ICMP_EXC_FRAGTIME, qp->dev);
 695 
 696         /*
 697          *      Nuke the fragment queue.
 698          */
 699         ip_free(qp);
 700 }
 701 
 702 
 703 /*
 704  *      Add an entry to the 'ipq' queue for a newly received IP datagram.
 705  *      We will (hopefully :-) receive all other fragments of this datagram
 706  *      in time, so we just create a queue for this datagram, in which we
 707  *      will insert the received fragments at their respective positions.
 708  */
 709 
 710 static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph, struct device *dev)
     /*  */
 711 {
 712         struct ipq *qp;
 713         int maclen;
 714         int ihlen;
 715 
 716         qp = (struct ipq *) kmalloc(sizeof(struct ipq), GFP_ATOMIC);
 717         if (qp == NULL)
 718         {
 719                 printk("IP: create: no memory left !\n");
 720                 return(NULL);
 721                 skb->dev = qp->dev;
 722         }
 723         memset(qp, 0, sizeof(struct ipq));
 724 
 725         /*
 726          *      Allocate memory for the MAC header.
 727          *
 728          *      FIXME: We have a maximum MAC address size limit and define
 729          *      elsewhere. We should use it here and avoid the 3 kmalloc() calls
 730          */
 731 
 732         maclen = ((unsigned long) iph) - ((unsigned long) skb->data);
 733         qp->mac = (unsigned char *) kmalloc(maclen, GFP_ATOMIC);
 734         if (qp->mac == NULL)
 735         {
 736                 printk("IP: create: no memory left !\n");
 737                 kfree_s(qp, sizeof(struct ipq));
 738                 return(NULL);
 739         }
 740 
 741         /*
 742          *      Allocate memory for the IP header (plus 8 octets for ICMP).
 743          */
 744 
 745         ihlen = (iph->ihl * sizeof(unsigned long));
 746         qp->iph = (struct iphdr *) kmalloc(ihlen + 8, GFP_ATOMIC);
 747         if (qp->iph == NULL)
 748         {
 749                 printk("IP: create: no memory left !\n");
 750                 kfree_s(qp->mac, maclen);
 751                 kfree_s(qp, sizeof(struct ipq));
 752                 return(NULL);
 753         }
 754 
 755         /* Fill in the structure. */
 756         memcpy(qp->mac, skb->data, maclen);
 757         memcpy(qp->iph, iph, ihlen + 8);
 758         qp->len = 0;
 759         qp->ihlen = ihlen;
 760         qp->maclen = maclen;
 761         qp->fragments = NULL;
 762         qp->dev = dev;
 763 
 764         /* Start a timer for this entry. */
 765         qp->timer.expires = IP_FRAG_TIME;               /* about 30 seconds     */
 766         qp->timer.data = (unsigned long) qp;            /* pointer to queue     */
 767         qp->timer.function = ip_expire;                 /* expire function      */
 768         add_timer(&qp->timer);
 769 
 770         /* Add this entry to the queue. */
 771         qp->prev = NULL;
 772         cli();
 773         qp->next = ipqueue;
 774         if (qp->next != NULL)
 775                 qp->next->prev = qp;
 776         ipqueue = qp;
 777         sti();
 778         return(qp);
 779 }
 780 
 781 
 782 /*
 783  *      See if a fragment queue is complete.
 784  */
 785 
 786 static int ip_done(struct ipq *qp)
     /*  */
 787 {
 788         struct ipfrag *fp;
 789         int offset;
 790 
 791         /* Only possible if we received the final fragment. */
 792         if (qp->len == 0)
 793                 return(0);
 794 
 795         /* Check all fragment offsets to see if they connect. */
 796         fp = qp->fragments;
 797         offset = 0;
 798         while (fp != NULL)
 799         {
 800                 if (fp->offset > offset)
 801                         return(0);      /* fragment(s) missing */
 802                 offset = fp->end;
 803                 fp = fp->next;
 804         }
 805 
 806         /* All fragments are present. */
 807         return(1);
 808 }
 809 
 810 
 811 /*
 812  *      Build a new IP datagram from all its fragments.
 813  *
 814  *      FIXME: We copy here because we lack an effective way of handling lists
 815  *      of bits on input. Until the new skb data handling is in I'm not going
 816  *      to touch this with a bargepole. This also causes a 4Kish limit on
 817  *      packet sizes.
 818  */
 819 
 820 static struct sk_buff *ip_glue(struct ipq *qp)
     /*  */
 821 {
 822         struct sk_buff *skb;
 823         struct iphdr *iph;
 824         struct ipfrag *fp;
 825         unsigned char *ptr;
 826         int count, len;
 827 
 828         /*
 829          *      Allocate a new buffer for the datagram.
 830          */
 831 
 832         len = qp->maclen + qp->ihlen + qp->len;
 833 
 834         if ((skb = alloc_skb(len,GFP_ATOMIC)) == NULL)
 835         {
 836                 ip_statistics.IpReasmFails++;
 837                 printk("IP: queue_glue: no memory for gluing queue 0x%X\n", (int) qp);
 838                 ip_free(qp);
 839                 return(NULL);
 840         }
 841 
 842         /* Fill in the basic details. */
 843         skb->len = (len - qp->maclen);
 844         skb->h.raw = skb->data;
 845         skb->free = 1;
 846 
 847         /* Copy the original MAC and IP headers into the new buffer. */
 848         ptr = (unsigned char *) skb->h.raw;
 849         memcpy(ptr, ((unsigned char *) qp->mac), qp->maclen);
 850         ptr += qp->maclen;
 851         memcpy(ptr, ((unsigned char *) qp->iph), qp->ihlen);
 852         ptr += qp->ihlen;
 853         skb->h.raw += qp->maclen;
 854 
 855         count = 0;
 856 
 857         /* Copy the data portions of all fragments into the new buffer. */
 858         fp = qp->fragments;
 859         while(fp != NULL)
 860         {
 861                 if(count+fp->len>skb->len)
 862                 {
 863                         printk("Invalid fragment list: Fragment over size.\n");
 864                         ip_free(qp);
 865                         kfree_skb(skb,FREE_WRITE);
 866                         ip_statistics.IpReasmFails++;
 867                         return NULL;
 868                 }
 869                 memcpy((ptr + fp->offset), fp->ptr, fp->len);
 870                 count += fp->len;
 871                 fp = fp->next;
 872         }
 873 
 874         /* We glued together all fragments, so remove the queue entry. */
 875         ip_free(qp);
 876 
 877         /* Done with all fragments. Fixup the new IP header. */
 878         iph = skb->h.iph;
 879         iph->frag_off = 0;
 880         iph->tot_len = htons((iph->ihl * sizeof(unsigned long)) + count);
 881         skb->ip_hdr = iph;
 882 
 883         ip_statistics.IpReasmOKs++;
 884         return(skb);
 885 }
 886 
 887 
 888 /*
 889  *      Process an incoming IP datagram fragment.
 890  */
 891 
 892 static struct sk_buff *ip_defrag(struct iphdr *iph, struct sk_buff *skb, struct device *dev)
     /*  */
 893 {
 894         struct ipfrag *prev, *next;
 895         struct ipfrag *tfp;
 896         struct ipq *qp;
 897         struct sk_buff *skb2;
 898         unsigned char *ptr;
 899         int flags, offset;
 900         int i, ihl, end;
 901 
 902         ip_statistics.IpReasmReqds++;
 903 
 904         /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
 905         qp = ip_find(iph);
 906 
 907         /* Is this a non-fragmented datagram? */
 908         offset = ntohs(iph->frag_off);
 909         flags = offset & ~IP_OFFSET;
 910         offset &= IP_OFFSET;
 911         if (((flags & IP_MF) == 0) && (offset == 0))
 912         {
 913                 if (qp != NULL)
 914                         ip_free(qp);    /* Huh? How could this exist?? */
 915                 return(skb);
 916         }
 917 
 918         offset <<= 3;           /* offset is in 8-byte chunks */
 919 
 920         /*
 921          * If the queue already existed, keep restarting its timer as long
 922          * as we still are receiving fragments.  Otherwise, create a fresh
 923          * queue entry.
 924          */
 925 
 926         if (qp != NULL)
 927         {
 928                 del_timer(&qp->timer);
 929                 qp->timer.expires = IP_FRAG_TIME;       /* about 30 seconds */
 930                 qp->timer.data = (unsigned long) qp;    /* pointer to queue */
 931                 qp->timer.function = ip_expire;         /* expire function */
 932                 add_timer(&qp->timer);
 933         }
 934         else
 935         {
 936                 /*
 937                  *      If we failed to create it, then discard the frame
 938                  */
 939                 if ((qp = ip_create(skb, iph, dev)) == NULL)
 940                 {
 941                         skb->sk = NULL;
 942                         kfree_skb(skb, FREE_READ);
 943                         ip_statistics.IpReasmFails++;
 944                         return NULL;
 945                 }
 946         }
 947 
 948         /*
 949          *      Determine the position of this fragment.
 950          */
 951 
 952         ihl = (iph->ihl * sizeof(unsigned long));
 953         end = offset + ntohs(iph->tot_len) - ihl;
 954 
 955         /*
 956          *      Point into the IP datagram 'data' part.
 957          */
 958 
 959         ptr = skb->data + dev->hard_header_len + ihl;
 960 
 961         /*
 962          *      Is this the final fragment?
 963          */
 964 
 965         if ((flags & IP_MF) == 0)
 966                 qp->len = end;
 967 
 968         /*
 969          *      Find out which fragments are in front and at the back of us
 970          *      in the chain of fragments so far.  We must know where to put
 971          *      this fragment, right?
 972          */
 973 
 974         prev = NULL;
 975         for(next = qp->fragments; next != NULL; next = next->next)
 976         {
 977                 if (next->offset > offset)
 978                         break;  /* bingo! */
 979                 prev = next;
 980         }
 981 
 982         /*
 983          *      We found where to put this one.
 984          *      Check for overlap with preceding fragment, and, if needed,
 985          *      align things so that any overlaps are eliminated.
 986          */
 987         if (prev != NULL && offset < prev->end)
 988         {
 989                 i = prev->end - offset;
 990                 offset += i;    /* ptr into datagram */
 991                 ptr += i;       /* ptr into fragment data */
 992         }
 993 
 994         /*
 995          * Look for overlap with succeeding segments.
 996          * If we can merge fragments, do it.
 997          */
 998 
 999         for(; next != NULL; next = tfp)
1000         {
1001                 tfp = next->next;
1002                 if (next->offset >= end)
1003                         break;          /* no overlaps at all */
1004 
1005                 i = end - next->offset;                 /* overlap is 'i' bytes */
1006                 next->len -= i;                         /* so reduce size of    */
1007                 next->offset += i;                      /* next fragment        */
1008                 next->ptr += i;
1009 
1010                 /*
1011                  *      If we get a frag size of <= 0, remove it and the packet
1012                  *      that it goes with.
1013                  */
1014                 if (next->len <= 0)
1015                 {
1016                         if (next->prev != NULL)
1017                                 next->prev->next = next->next;
1018                         else
1019                                 qp->fragments = next->next;
1020 
1021                         if (tfp->next != NULL)
1022                                 next->next->prev = next->prev;
1023 
1024                         kfree_skb(next->skb,FREE_READ);
1025                         kfree_s(next, sizeof(struct ipfrag));
1026                 }
1027         }
1028 
1029         /*
1030          *      Insert this fragment in the chain of fragments.
1031          */
1032 
1033         tfp = NULL;
1034         tfp = ip_frag_create(offset, end, skb, ptr);
1035 
1036         /*
1037          *      No memory to save the fragment - so throw the lot
1038          */
1039 
1040         if (!tfp)
1041         {
1042                 skb->sk = NULL;
1043                 kfree_skb(skb, FREE_READ);
1044                 return NULL;
1045         }
1046         tfp->prev = prev;
1047         tfp->next = next;
1048         if (prev != NULL)
1049                 prev->next = tfp;
1050         else
1051                 qp->fragments = tfp;
1052 
1053         if (next != NULL)
1054                 next->prev = tfp;
1055 
1056         /*
1057          *      OK, so we inserted this new fragment into the chain.
1058          *      Check if we now have a full IP datagram which we can
1059          *      bump up to the IP layer...
1060          */
1061 
1062         if (ip_done(qp))
1063         {
1064                 skb2 = ip_glue(qp);             /* glue together the fragments */
1065                 return(skb2);
1066         }
1067         return(NULL);
1068 }
1069 
1070 
1071 /*
1072  *      This IP datagram is too large to be sent in one piece.  Break it up into
1073  *      smaller pieces (each of size equal to the MAC header plus IP header plus
1074  *      a block of the data of the original IP data part) that will yet fit in a
1075  *      single device frame, and queue such a frame for sending by calling the
1076  *      ip_queue_xmit().  Note that this is recursion, and bad things will happen
1077  *      if this function causes a loop...
1078  *
1079  *      Yes this is inefficient, feel free to submit a quicker one.
1080  *
1081  *      **Protocol Violation**
1082  *      We copy all the options to each fragment. !FIXME!
1083  */
1084 void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int is_frag)
     /*  */
1085 {
1086         struct iphdr *iph;
1087         unsigned char *raw;
1088         unsigned char *ptr;
1089         struct sk_buff *skb2;
1090         int left, mtu, hlen, len;
1091         int offset;
1092         unsigned long flags;
1093 
1094         /*
1095          *      Point into the IP datagram header.
1096          */
1097 
1098         raw = skb->data;
1099         iph = (struct iphdr *) (raw + dev->hard_header_len);
1100 
1101         skb->ip_hdr = iph;
1102 
1103         /*
1104          *      Setup starting values.
1105          */
1106 
1107         hlen = (iph->ihl * sizeof(unsigned long));
1108         left = ntohs(iph->tot_len) - hlen;      /* Space per frame */
1109         hlen += dev->hard_header_len;           /* Total header size */
1110         mtu = (dev->mtu - hlen);                /* Size of data space */
1111         ptr = (raw + hlen);                     /* Where to start from */
1112 
1113         /*
1114          *      Check for any "DF" flag. [DF means do not fragment]
1115          */
1116 
1117         if (ntohs(iph->frag_off) & IP_DF)
1118         {
1119                 ip_statistics.IpFragFails++;
1120                 icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev);
1121                 return;
1122         }
1123 
1124         /*
1125          *      The protocol doesn't seem to say what to do in the case that the
1126          *      frame + options doesn't fit the mtu. As it used to fall down dead
1127          *      in this case we were fortunate it didn't happen
1128          */
1129 
1130         if(mtu<8)
1131         {
1132                 /* It's wrong but its better than nothing */
1133                 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev);
1134                 ip_statistics.IpFragFails++;
1135                 return;
1136         }
1137 
1138         /*
1139          *      Fragment the datagram.
1140          */
1141 
1142         /*
1143          *      The initial offset is 0 for a complete frame. When
1144          *      fragmenting fragments its wherever this one starts.
1145          */
1146 
1147         if (is_frag & 2)
1148                 offset = (ntohs(iph->frag_off) & 0x1fff) << 3;
1149         else
1150                 offset = 0;
1151 
1152 
1153         /*
1154          *      Keep copying data until we run out.
1155          */
1156 
1157         while(left > 0)
1158         {
1159                 len = left;
1160                 /* IF: it doesn't fit, use 'mtu' - the data space left */
1161                 if (len > mtu)
1162                         len = mtu;
1163                 /* IF: we are not sending upto and including the packet end
1164                    then align the next start on an eight byte boundary */
1165                 if (len < left)
1166                 {
1167                         len/=8;
1168                         len*=8;
1169                 }
1170                 /*
1171                  *      Allocate buffer.
1172                  */
1173 
1174                 if ((skb2 = alloc_skb(len + hlen,GFP_ATOMIC)) == NULL)
1175                 {
1176                         printk("IP: frag: no memory for new fragment!\n");
1177                         ip_statistics.IpFragFails++;
1178                         return;
1179                 }
1180 
1181                 /*
1182                  *      Set up data on packet
1183                  */
1184 
1185                 skb2->arp = skb->arp;
1186                 if(skb->free==0)
1187                         printk("IP fragmenter: BUG free!=1 in fragmenter\n");
1188                 skb2->free = 1;
1189                 skb2->len = len + hlen;
1190                 skb2->h.raw=(char *) skb2->data;
1191                 /*
1192                  *      Charge the memory for the fragment to any owner
1193                  *      it might possess
1194                  */
1195 
1196                 save_flags(flags);
1197                 if (sk)
1198                 {
1199                         cli();
1200                         sk->wmem_alloc += skb2->mem_len;
1201                         skb2->sk=sk;
1202                 }
1203                 restore_flags(flags);
1204                 skb2->raddr = skb->raddr;       /* For rebuild_header - must be here */
1205 
1206                 /*
1207                  *      Copy the packet header into the new buffer.
1208                  */
1209 
1210                 memcpy(skb2->h.raw, raw, hlen);
1211 
1212                 /*
1213                  *      Copy a block of the IP datagram.
1214                  */
1215                 memcpy(skb2->h.raw + hlen, ptr, len);
1216                 left -= len;
1217 
1218                 skb2->h.raw+=dev->hard_header_len;
1219 
1220                 /*
1221                  *      Fill in the new header fields.
1222                  */
1223                 iph = (struct iphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
1224                 iph->frag_off = htons((offset >> 3));
1225                 /*
1226                  *      Added AC : If we are fragmenting a fragment thats not the
1227                  *                 last fragment then keep MF on each bit
1228                  */
1229                 if (left > 0 || (is_frag & 1))
1230                         iph->frag_off |= htons(IP_MF);
1231                 ptr += len;
1232                 offset += len;
1233 
1234                 /*
1235                  *      Put this fragment into the sending queue.
1236                  */
1237 
1238                 ip_statistics.IpFragCreates++;
1239 
1240                 ip_queue_xmit(sk, dev, skb2, 2);
1241         }
1242         ip_statistics.IpFragOKs++;
1243 }
1244 
1245 
1246 
1247 #ifdef CONFIG_IP_FORWARD
1248 
1249 /*
1250  *      Forward an IP datagram to its next destination.
1251  */
1252 
1253 static void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag)
     /*  */
1254 {
1255         struct device *dev2;    /* Output device */
1256         struct iphdr *iph;      /* Our header */
1257         struct sk_buff *skb2;   /* Output packet */
1258         struct rtable *rt;      /* Route we use */
1259         unsigned char *ptr;     /* Data pointer */
1260         unsigned long raddr;    /* Router IP address */
1261 
1262         /*
1263          *      According to the RFC, we must first decrease the TTL field. If
1264          *      that reaches zero, we must reply an ICMP control message telling
1265          *      that the packet's lifetime expired.
1266          *
1267          *      Exception:
1268          *      We may not generate an ICMP for an ICMP. icmp_send does the
1269          *      enforcement of this so we can forget it here. It is however
1270          *      sometimes VERY important.
1271          */
1272 
1273         iph = skb->h.iph;
1274         iph->ttl--;
1275         if (iph->ttl <= 0)
1276         {
1277                 /* Tell the sender its packet died... */
1278                 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, dev);
1279                 return;
1280         }
1281 
1282         /*
1283          *      Re-compute the IP header checksum.
1284          *      This is inefficient. We know what has happened to the header
1285          *      and could thus adjust the checksum as Phil Karn does in KA9Q
1286          */
1287 
1288         ip_send_check(iph);
1289 
1290         /*
1291          * OK, the packet is still valid.  Fetch its destination address,
1292          * and give it to the IP sender for further processing.
1293          */
1294 
1295         rt = ip_rt_route(iph->daddr, NULL, NULL);
1296         if (rt == NULL)
1297         {
1298                 /*
1299                  *      Tell the sender its packet cannot be delivered. Again
1300                  *      ICMP is screened later.
1301                  */
1302                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, dev);
1303                 return;
1304         }
1305 
1306 
1307         /*
1308          * Gosh.  Not only is the packet valid; we even know how to
1309          * forward it onto its final destination.  Can we say this
1310          * is being plain lucky?
1311          * If the router told us that there is no GW, use the dest.
1312          * IP address itself- we seem to be connected directly...
1313          */
1314 
1315         raddr = rt->rt_gateway;
1316 
1317         if (raddr != 0)
1318         {
1319                 /*
1320                  *      There is a gateway so find the correct route for it.
1321                  *      Gateways cannot in turn be gatewayed.
1322                  */
1323                 rt = ip_rt_route(raddr, NULL, NULL);
1324                 if (rt == NULL)
1325                 {
1326                         /*
1327                          *      Tell the sender its packet cannot be delivered...
1328                          */
1329                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, dev);
1330                         return;
1331                 }
1332                 if (rt->rt_gateway != 0)
1333                         raddr = rt->rt_gateway;
1334         }
1335         else
1336                 raddr = iph->daddr;
1337 
1338         /*
1339          *      Having picked a route we can now send the frame out.
1340          */
1341 
1342         dev2 = rt->rt_dev;
1343 
1344         /*
1345          *      In IP you never forward a frame on the interface that it arrived
1346          *      upon. We should generate an ICMP HOST REDIRECT giving the route
1347          *      we calculated.
1348          *      For now just dropping the packet is an acceptable compromise.
1349          */
1350 
1351         if (dev == dev2)
1352                 return;
1353 
1354         /*
1355          * We now allocate a new buffer, and copy the datagram into it.
1356          * If the indicated interface is up and running, kick it.
1357          */
1358 
1359         if (dev2->flags & IFF_UP)
1360         {
1361 
1362                 /*
1363                  *      Current design decrees we copy the packet. For identical header
1364                  *      lengths we could avoid it. The new skb code will let us push
1365                  *      data so the problem goes away then.
1366                  */
1367 
1368                 skb2 = alloc_skb(dev2->hard_header_len + skb->len, GFP_ATOMIC);
1369                 /*
1370                  *      This is rare and since IP is tolerant of network failures
1371                  *      quite harmless.
1372                  */
1373                 if (skb2 == NULL)
1374                 {
1375                         printk("\nIP: No memory available for IP forward\n");
1376                         return;
1377                 }
1378                 ptr = skb2->data;
1379                 skb2->free = 1;
1380                 skb2->len = skb->len + dev2->hard_header_len;
1381                 skb2->h.raw = ptr;
1382 
1383                 /*
1384                  *      Copy the packet data into the new buffer.
1385                  */
1386                 memcpy(ptr + dev2->hard_header_len, skb->h.raw, skb->len);
1387 
1388                 /* Now build the MAC header. */
1389                 (void) ip_send(skb2, raddr, skb->len, dev2, dev2->pa_addr);
1390 
1391                 ip_statistics.IpForwDatagrams++;
1392 
1393                 /*
1394                  *      See if it needs fragmenting. Note in ip_rcv we tagged
1395                  *      the fragment type. This must be right so that
1396                  *      the fragmenter does the right thing.
1397                  */
1398 
1399                 if(skb2->len > dev2->mtu + dev2->hard_header_len)
1400                 {
1401                         ip_fragment(NULL,skb2,dev2, is_frag);
1402                         kfree_skb(skb2,FREE_WRITE);
1403                 }
1404                 else
1405                 {
1406                         /*
1407                          *      Map service types to priority. We lie about
1408                          *      throughput being low priority, but its a good
1409                          *      choice to help improve general usage.
1410                          */
1411                         if(iph->tos & IPTOS_LOWDELAY)
1412                                 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
1413                         else if(iph->tos & IPTOS_THROUGHPUT)
1414                                 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
1415                         else
1416                                 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
1417                 }
1418         }
1419 }
1420 
1421 
1422 #endif
1423 
1424 /*
1425  *      This function receives all incoming IP datagrams.
1426  */
1427 
1428 int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
     /*  */
1429 {
1430         struct iphdr *iph = skb->h.iph;
1431         unsigned char hash;
1432         unsigned char flag = 0;
1433         unsigned char opts_p = 0;       /* Set iff the packet has options. */
1434         struct inet_protocol *ipprot;
1435         static struct options opt; /* since we don't use these yet, and they
1436                                 take up stack space. */
1437         int brd=IS_MYADDR;
1438         int is_frag=0;
1439 
1440 
1441         ip_statistics.IpInReceives++;
1442 
1443         /*
1444          *      Tag the ip header of this packet so we can find it
1445          */
1446 
1447         skb->ip_hdr = iph;
1448 
1449         /*
1450          *      Is the datagram acceptable?
1451          *
1452          *      1.      Length at least the size of an ip header
1453          *      2.      Version of 4
1454          *      3.      Checksums correctly. [Speed optimisation for later, skip loopback checksums]
1455          *      (4.     We ought to check for IP multicast addresses and undefined types.. does this matter ?)
1456          */
1457 
1458         if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0)
1459         {
1460                 ip_statistics.IpInHdrErrors++;
1461                 kfree_skb(skb, FREE_WRITE);
1462                 return(0);
1463         }
1464 
1465         /*
1466          *      Our transport medium may have padded the buffer out. Now we know it
1467          *      is IP we can trim to the true length of the frame.
1468          */
1469 
1470         skb->len=ntohs(iph->tot_len);
1471 
1472         /*
1473          *      Next analyse the packet for options. Studies show under one packet in
1474          *      a thousand have options....
1475          */
1476 
1477         if (iph->ihl != 5)
1478         {       /* Fast path for the typical optionless IP packet. */
1479                 memset((char *) &opt, 0, sizeof(opt));
1480                 if (do_options(iph, &opt) != 0)
1481                         return 0;
1482                 opts_p = 1;
1483         }
1484 
1485         /*
1486          *      Remember if the frame is fragmented.
1487          */
1488          
1489         if(iph->frag_off)
1490         {
1491                 if (iph->frag_off & 0x0020)
1492                         is_frag|=1;
1493                 /*
1494                  *      Last fragment ?
1495                  */
1496         
1497                 if (ntohs(iph->frag_off) & 0x1fff)
1498                         is_frag|=2;
1499         }
1500         
1501         /*
1502          *      Do any IP forwarding required.  chk_addr() is expensive -- avoid it someday.
1503          *
1504          *      This is inefficient. While finding out if it is for us we could also compute
1505          *      the routing table entry. This is where the great unified cache theory comes
1506          *      in as and when someone implements it
1507          *
1508          *      For most hosts over 99% of packets match the first conditional
1509          *      and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at
1510          *      function entry.
1511          */
1512 
1513         if ( iph->daddr != skb->dev->pa_addr && (brd = ip_chk_addr(iph->daddr)) == 0)
1514         {
1515                 /*
1516                  *      Don't forward multicast or broadcast frames.
1517                  */
1518 
1519                 if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST)
1520                 {
1521                         kfree_skb(skb,FREE_WRITE);
1522                         return 0;
1523                 }
1524 
1525                 /*
1526                  *      The packet is for another target. Forward the frame
1527                  */
1528 
1529 #ifdef CONFIG_IP_FORWARD
1530                 ip_forward(skb, dev, is_frag);
1531 #else
1532 /*              printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n",
1533                         iph->saddr,iph->daddr);*/
1534                 ip_statistics.IpInAddrErrors++;
1535 #endif
1536                 /*
1537                  *      The forwarder is inefficient and copies the packet. We
1538                  *      free the original now.
1539                  */
1540 
1541                 kfree_skb(skb, FREE_WRITE);
1542                 return(0);
1543         }
1544 
1545         /*
1546          * Reassemble IP fragments.
1547          */
1548 
1549         if(is_frag)
1550         {
1551                 /* Defragment. Obtain the complete packet if there is one */
1552                 skb=ip_defrag(iph,skb,dev);
1553                 if(skb==NULL)
1554                         return 0;
1555                 iph=skb->h.iph;
1556         }
1557 
1558         /*
1559          *      Point into the IP datagram, just past the header.
1560          */
1561 
1562         skb->ip_hdr = iph;
1563         skb->h.raw += iph->ihl*4;
1564 
1565         /*
1566          *      skb->h.raw now points at the protocol beyond the IP header.
1567          */
1568 
1569         hash = iph->protocol & (MAX_INET_PROTOS -1);
1570         for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next)
1571         {
1572                 struct sk_buff *skb2;
1573 
1574                 if (ipprot->protocol != iph->protocol)
1575                         continue;
1576        /*
1577         *       See if we need to make a copy of it.  This will
1578         *       only be set if more than one protocol wants it.
1579         *       and then not for the last one.
1580         *
1581         *       This is an artifact of poor upper protocol design.
1582         *       Because the upper protocols damage the actual packet
1583         *       we must do copying. In actual fact it's even worse
1584         *       than this as TCP may hold on to the buffer.
1585         */
1586                 if (ipprot->copy)
1587                 {
1588                         skb2 = skb_clone(skb, GFP_ATOMIC);
1589                         if(skb2==NULL)
1590                                 continue;
1591                 }
1592                 else
1593                 {
1594                         skb2 = skb;
1595                 }
1596                 flag = 1;
1597 
1598                /*
1599                 * Pass on the datagram to each protocol that wants it,
1600                 * based on the datagram protocol.  We should really
1601                 * check the protocol handler's return values here...
1602                 */
1603                 ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr,
1604                                 (ntohs(iph->tot_len) - (iph->ihl * 4)),
1605                                 iph->saddr, 0, ipprot);
1606 
1607         }
1608 
1609         /*
1610          * All protocols checked.
1611          * If this packet was a broadcast, we may *not* reply to it, since that
1612          * causes (proven, grin) ARP storms and a leakage of memory (i.e. all
1613          * ICMP reply messages get queued up for transmission...)
1614          */
1615 
1616         if (!flag)
1617         {
1618                 if (brd != IS_BROADCAST && brd!=IS_MULTICAST)
1619                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
1620                 kfree_skb(skb, FREE_WRITE);
1621         }
1622 
1623         return(0);
1624 }
1625 
1626 
1627 /*
1628  * Queues a packet to be sent, and starts the transmitter
1629  * if necessary.  if free = 1 then we free the block after
1630  * transmit, otherwise we don't. If free==2 we not only
1631  * free the block but also don't assign a new ip seq number.
1632  * This routine also needs to put in the total length,
1633  * and compute the checksum
1634  */
1635 
1636 void ip_queue_xmit(struct sock *sk, struct device *dev,
     /*  */
1637               struct sk_buff *skb, int free)
1638 {
1639         struct iphdr *iph;
1640         unsigned char *ptr;
1641 
1642         /* Sanity check */
1643         if (dev == NULL)
1644         {
1645                 printk("IP: ip_queue_xmit dev = NULL\n");
1646                 return;
1647         }
1648 
1649         IS_SKB(skb);
1650 
1651         /*
1652          *      Do some book-keeping in the packet for later
1653          */
1654 
1655 
1656         skb->dev = dev;
1657         skb->when = jiffies;
1658 
1659         /*
1660          *      Find the IP header and set the length. This is bad
1661          *      but once we get the skb data handling code in the
1662          *      hardware will push its header sensibly and we will
1663          *      set skb->ip_hdr to avoid this mess and the fixed
1664          *      header length problem
1665          */
1666 
1667         ptr = skb->data;
1668         ptr += dev->hard_header_len;
1669         iph = (struct iphdr *)ptr;
1670         skb->ip_hdr = iph;
1671         iph->tot_len = ntohs(skb->len-dev->hard_header_len);
1672 
1673         /*
1674          *      No reassigning numbers to fragments...
1675          */
1676 
1677         if(free!=2)
1678                 iph->id      = htons(ip_id_count++);
1679         else
1680                 free=1;
1681 
1682         /* All buffers without an owner socket get freed */
1683         if (sk == NULL)
1684                 free = 1;
1685 
1686         skb->free = free;
1687 
1688         /*
1689          *      Do we need to fragment. Again this is inefficient.
1690          *      We need to somehow lock the original buffer and use
1691          *      bits of it.
1692          */
1693 
1694         if(skb->len > dev->mtu + dev->hard_header_len)
1695         {
1696                 ip_fragment(sk,skb,dev,0);
1697                 IS_SKB(skb);
1698                 kfree_skb(skb,FREE_WRITE);
1699                 return;
1700         }
1701 
1702         /*
1703          *      Add an IP checksum
1704          */
1705 
1706         ip_send_check(iph);
1707 
1708         /*
1709          *      Print the frame when debugging
1710          */
1711 
1712         /*
1713          *      More debugging. You cannot queue a packet already on a list
1714          *      Spot this and moan loudly.
1715          */
1716         if (skb->next != NULL)
1717         {
1718                 printk("ip_queue_xmit: next != NULL\n");
1719                 skb_unlink(skb);
1720         }
1721 
1722         /*
1723          *      If a sender wishes the packet to remain unfreed
1724          *      we add it to his send queue. This arguably belongs
1725          *      in the TCP level since nobody else uses it. BUT
1726          *      remember IPng might change all the rules.
1727          */
1728 
1729         if (!free)
1730         {
1731                 unsigned long flags;
1732                 /* The socket now has more outstanding blocks */
1733 
1734                 sk->packets_out++;
1735 
1736                 /* Protect the list for a moment */
1737                 save_flags(flags);
1738                 cli();
1739 
1740                 if (skb->link3 != NULL)
1741                 {
1742                         printk("ip.c: link3 != NULL\n");
1743                         skb->link3 = NULL;
1744                 }
1745                 if (sk->send_head == NULL)
1746                 {
1747                         sk->send_tail = skb;
1748                         sk->send_head = skb;
1749                 }
1750                 else
1751                 {
1752                         sk->send_tail->link3 = skb;
1753                         sk->send_tail = skb;
1754                 }
1755                 /* skb->link3 is NULL */
1756 
1757                 /* Interrupt restore */
1758                 restore_flags(flags);
1759                 /* Set the IP write timeout to the round trip time for the packet.
1760                    If an acknowledge has not arrived by then we may wish to act */
1761                 reset_timer(sk, TIME_WRITE, sk->rto);
1762         }
1763         else
1764                 /* Remember who owns the buffer */
1765                 skb->sk = sk;
1766 
1767         /*
1768          *      If the indicated interface is up and running, send the packet.
1769          */
1770         ip_statistics.IpOutRequests++;
1771 
1772         if (dev->flags & IFF_UP)
1773         {
1774                 /*
1775                  *      If we have an owner use its priority setting,
1776                  *      otherwise use NORMAL
1777                  */
1778 
1779                 if (sk != NULL)
1780                 {
1781                         dev_queue_xmit(skb, dev, sk->priority);
1782                 }
1783                 else
1784                 {
1785                         dev_queue_xmit(skb, dev, SOPRI_NORMAL);
1786                 }
1787         }
1788         else
1789         {
1790                 ip_statistics.IpOutDiscards++;
1791                 if (free)
1792                         kfree_skb(skb, FREE_WRITE);
1793         }
1794 }
1795 
1796 
1797 /*
1798  *      A socket has timed out on its send queue and wants to do a
1799  *      little retransmitting. Currently this means TCP.
1800  */
1801 
1802 void ip_do_retransmit(struct sock *sk, int all)
     /*  */
1803 {
1804         struct sk_buff * skb;
1805         struct proto *prot;
1806         struct device *dev;
1807 
1808         prot = sk->prot;
1809         skb = sk->send_head;
1810 
1811         while (skb != NULL)
1812         {
1813                 dev = skb->dev;
1814                 IS_SKB(skb);
1815                 skb->when = jiffies;
1816 
1817                 /*
1818                  * In general it's OK just to use the old packet.  However we
1819                  * need to use the current ack and window fields.  Urg and
1820                  * urg_ptr could possibly stand to be updated as well, but we
1821                  * don't keep the necessary data.  That shouldn't be a problem,
1822                  * if the other end is doing the right thing.  Since we're
1823                  * changing the packet, we have to issue a new IP identifier.
1824                  */
1825 
1826                 /* this check may be unnecessary - retransmit only for TCP */
1827                 if (sk->protocol == IPPROTO_TCP) {
1828                   struct tcphdr *th;
1829                   struct iphdr *iph;
1830                   int size;
1831 
1832                   iph = (struct iphdr *)(skb->data + dev->hard_header_len);
1833                   th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2));
1834                   size = skb->len - (((unsigned char *) th) - skb->data);
1835 
1836                   iph->id = htons(ip_id_count++);
1837                   ip_send_check(iph);
1838 
1839                   th->ack_seq = ntohl(sk->acked_seq);
1840                   th->window = ntohs(tcp_select_window(sk));
1841                   tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
1842                 }
1843 
1844                 /*
1845                  *      If the interface is (still) up and running, kick it.
1846                  */
1847 
1848                 if (dev->flags & IFF_UP)
1849                 {
1850                         /*
1851                          *      If the packet is still being sent by the device/protocol
1852                          *      below then don't retransmit. This is both needed, and good -
1853                          *      especially with connected mode AX.25 where it stops resends
1854                          *      occurring of an as yet unsent anyway frame!
1855                          *      We still add up the counts as the round trip time wants
1856                          *      adjusting.
1857                          */
1858                         if (sk && !skb_device_locked(skb))
1859                         {
1860                                 /* Remove it from any existing driver queue first! */
1861                                 skb_unlink(skb);
1862                                 /* Now queue it */
1863                                 ip_statistics.IpOutRequests++;
1864                                 dev_queue_xmit(skb, dev, sk->priority);
1865                         }
1866                 }
1867 
1868                 /*
1869                  *      Count retransmissions
1870                  */
1871                 sk->retransmits++;
1872                 sk->prot->retransmits ++;
1873 
1874                 /*
1875                  *      Only one retransmit requested.
1876                  */
1877                 if (!all)
1878                         break;
1879 
1880                 /*
1881                  *      This should cut it off before we send too many packets.
1882                  */
1883                 if (sk->retransmits >= sk->cong_window)
1884                         break;
1885                 skb = skb->link3;
1886         }
1887 }
1888 
1889 /*
1890  *      This is the normal code called for timeouts.  It does the retransmission
1891  *      and then does backoff.  ip_do_retransmit is separated out because
1892  *      tcp_ack needs to send stuff from the retransmit queue without
1893  *      initiating a backoff.
1894  */
1895 
1896 void ip_retransmit(struct sock *sk, int all)
     /*  */
1897 {
1898         ip_do_retransmit(sk, all);
1899 
1900         /*
1901          * Increase the timeout each time we retransmit.  Note that
1902          * we do not increase the rtt estimate.  rto is initialized
1903          * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
1904          * that doubling rto each time is the least we can get away with.
1905          * In KA9Q, Karn uses this for the first few times, and then
1906          * goes to quadratic.  netBSD doubles, but only goes up to *64,
1907          * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
1908          * defined in the protocol as the maximum possible RTT.  I guess
1909          * we'll have to use something other than TCP to talk to the
1910          * University of Mars.
1911          */
1912 
1913         sk->retransmits++;
1914         sk->backoff++;
1915         sk->rto = min(sk->rto << 1, 120*HZ);
1916         reset_timer(sk, TIME_WRITE, sk->rto);
1917 }
1918 
1919 /*
1920  *      Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
1921  *      an IP socket.
1922  *
1923  *      We implement IP_TOS (type of service), IP_TTL (time to live).
1924  *
1925  *      Next release we will sort out IP_OPTIONS since for some people are kind of important.
1926  */
1927 
1928 int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /*  */
1929 {
1930         int val,err;
1931 
1932         if (optval == NULL)
1933                 return(-EINVAL);
1934 
1935         err=verify_area(VERIFY_READ, optval, sizeof(int));
1936         if(err)
1937                 return err;
1938 
1939         val = get_fs_long((unsigned long *)optval);
1940 
1941         if(level!=SOL_IP)
1942                 return -EOPNOTSUPP;
1943 
1944         switch(optname)
1945         {
1946                 case IP_TOS:
1947                         if(val<0||val>255)
1948                                 return -EINVAL;
1949                         sk->ip_tos=val;
1950                         if(val==IPTOS_LOWDELAY)
1951                                 sk->priority=SOPRI_INTERACTIVE;
1952                         if(val==IPTOS_THROUGHPUT)
1953                                 sk->priority=SOPRI_BACKGROUND;
1954                         return 0;
1955                 case IP_TTL:
1956                         if(val<1||val>255)
1957                                 return -EINVAL;
1958                         sk->ip_ttl=val;
1959                         return 0;
1960                 /* IP_OPTIONS and friends go here eventually */
1961                 default:
1962                         return(-ENOPROTOOPT);
1963         }
1964 }
1965 
1966 /*
1967  *      Get the options. Note for future reference. The GET of IP options gets the
1968  *      _received_ ones. The set sets the _sent_ ones.
1969  */
1970 
1971 int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /*  */
1972 {
1973         int val,err;
1974 
1975         if(level!=SOL_IP)
1976                 return -EOPNOTSUPP;
1977 
1978         switch(optname)
1979         {
1980                 case IP_TOS:
1981                         val=sk->ip_tos;
1982                         break;
1983                 case IP_TTL:
1984                         val=sk->ip_ttl;
1985                         break;
1986                 default:
1987                         return(-ENOPROTOOPT);
1988         }
1989         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
1990         if(err)
1991                 return err;
1992         put_fs_long(sizeof(int),(unsigned long *) optlen);
1993 
1994         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
1995         if(err)
1996                 return err;
1997         put_fs_long(val,(unsigned long *)optval);
1998 
1999         return(0);
2000 }
2001 
2002 /*
2003  *      IP protocol layer initialiser
2004  */
2005 
2006 static struct packet_type ip_packet_type =
2007 {
2008         0,      /* MUTTER ntohs(ETH_P_IP),*/
2009         0,              /* copy */
2010         ip_rcv,
2011         NULL,
2012         NULL,
2013 };
2014 
2015 
2016 /*
2017  *      IP registers the packet type and then calls the subprotocol initialisers
2018  */
2019 
2020 void ip_init(void)
     /*  */
2021 {
2022         ip_packet_type.type=htons(ETH_P_IP);
2023         dev_add_pack(&ip_packet_type);
2024 /*      ip_raw_init();
2025         ip_packet_init();
2026         ip_tcp_init();
2027         ip_udp_init();*/
2028 }
/* */
root/net/inet/ip.c

DEFINITIONS