net/inet/ip.c

/* */
This source file includes following definitions.
ip_ioctl
strict_route
loose_route
ip_route_check
build_options
ip_send
ip_build_header
do_options
ip_fast_csum
ip_compute_csum
ip_csum
ip_send_check
ip_frag_create
ip_find
ip_free
ip_expire
ip_create
ip_done
ip_glue
ip_defrag
ip_fragment
ip_forward
ip_rcv
ip_queue_xmit
ip_do_retransmit
ip_retransmit
ip_setsockopt
ip_getsockopt
ip_init
   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              The Internet Protocol (IP) module.
   7  *
   8  * Version:     @(#)ip.c        1.0.16b 9/1/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Donald Becker, <becker@super.org>
  13  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  14  *
  15  * Fixes:
  16  *              Alan Cox        :       Commented a couple of minor bits of surplus code
  17  *              Alan Cox        :       Undefining IP_FORWARD doesn't include the code
  18  *                                      (just stops a compiler warning).
  19  *              Alan Cox        :       Frames with >=MAX_ROUTE record routes, strict routes or loose routes
  20  *                                      are junked rather than corrupting things.
  21  *              Alan Cox        :       Frames to bad broadcast subnets are dumped
  22  *                                      We used to process them non broadcast and
  23  *                                      boy could that cause havoc.
  24  *              Alan Cox        :       ip_forward sets the free flag on the 
  25  *                                      new frame it queues. Still crap because
  26  *                                      it copies the frame but at least it 
  27  *                                      doesn't eat memory too.
  28  *              Alan Cox        :       Generic queue code and memory fixes.
  29  *              Fred Van Kempen :       IP fragment support (borrowed from NET2E)
  30  *              Gerhard Koerting:       Forward fragmented frames correctly.
  31  *              Gerhard Koerting:       Fixes to my fix of the above 8-).
  32  *              Gerhard Koerting:       IP interface addressing fix.
  33  *              Linus Torvalds  :       More robustness checks
  34  *              Alan Cox        :       Even more checks: Still not as robust as it ought to be
  35  *              Alan Cox        :       Save IP header pointer for later
  36  *              Alan Cox        :       ip option setting
  37  *              Alan Cox        :       Use ip_tos/ip_ttl settings
  38  *              Alan Cox        :       Fragmentation bogosity removed
  39  *                                      (Thanks to Mark.Bush@prg.ox.ac.uk)
  40  *              Dmitry Gorodchanin :    Send of a raw packet crash fix.
  41  *              Alan Cox        :       Silly ip bug when an overlength
  42  *                                      fragment turns up. Now frees the
  43  *                                      queue.
  44  *              Linus Torvalds/ :       Memory leakage on fragmentation 
  45  *              Alan Cox        :       handling.
  46  *              Gerhard Koerting:       Forwarding uses IP priority hints
  47  *              Teemu Rantanen  :       Fragment problems.
  48  *              Alan Cox        :       General cleanup, comments and reformat
  49  *              Alan Cox        :       SNMP statistics
  50  *              Alan Cox        :       BSD address rule semantics. Also see
  51  *                                      UDP as there is a nasty checksum issue
  52  *                                      if you do things the wrong way.
  53  *              Alan Cox        :       Always defrag, moved IP_FORWARD to the config.in file
  54  *              Alan Cox        :       IP options adjust sk->priority.
  55  *
  56  * To Fix:
  57  *              IP option processing is mostly not needed. ip_forward needs to know about routing rules
  58  *              and time stamp but that's about all. Use the route mtu field here too
  59  *
  60  *              This program is free software; you can redistribute it and/or
  61  *              modify it under the terms of the GNU General Public License
  62  *              as published by the Free Software Foundation; either version
  63  *              2 of the License, or (at your option) any later version.
  64  */
  65 #include <asm/segment.h>
  66 #include <asm/system.h>
  67 #include <linux/types.h>
  68 #include <linux/kernel.h>
  69 #include <linux/sched.h>
  70 #include <linux/string.h>
  71 #include <linux/errno.h>
  72 #include <linux/socket.h>
  73 #include <linux/sockios.h>
  74 #include <linux/in.h>
  75 #include <linux/inet.h>
  76 #include <linux/netdevice.h>
  77 #include <linux/etherdevice.h>
  78 #include "snmp.h"
  79 #include "ip.h"
  80 #include "protocol.h"
  81 #include "route.h"
  82 #include "tcp.h"
  83 #include <linux/skbuff.h>
  84 #include "sock.h"
  85 #include "arp.h"
  86 #include "icmp.h"
  87 
  88 #define CONFIG_IP_DEFRAG
  89 
  90 extern int last_retran;
  91 extern void sort_send(struct sock *sk);
  92 
  93 #define min(a,b)        ((a)<(b)?(a):(b))
  94 #define LOOPBACK(x)     (((x) & htonl(0xff000000)) == htonl(0x7f000000))
  95 
  96 /*
  97  *      SNMP management statistics
  98  */
  99  
 100 struct ip_mib ip_statistics={1,64,};    /* Forwarding=Yes, Default TTL=64 */
 101  
 102 /*
 103  *      Handle the issuing of an ioctl() request 
 104  *      for the ip device. This is scheduled to
 105  *      disappear
 106  */
 107 
 108 int ip_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /*  */
 109 {
 110         switch(cmd) 
 111         {
 112                 default:
 113                         return(-EINVAL);
 114         }
 115 }
 116 
 117 
 118 /* these two routines will do routing. */
 119 
 120 static void
 121 strict_route(struct iphdr *iph, struct options *opt)
     /*  */
 122 {
 123 }
 124 
 125 
 126 static void
 127 loose_route(struct iphdr *iph, struct options *opt)
     /*  */
 128 {
 129 }
 130 
 131 
 132 
 133 
 134 /* This routine will check to see if we have lost a gateway. */
 135 void
 136 ip_route_check(unsigned long daddr)
     /*  */
 137 {
 138 }
 139 
 140 
 141 #if 0
 142 /* this routine puts the options at the end of an ip header. */
 143 static int
 144 build_options(struct iphdr *iph, struct options *opt)
     /*  */
 145 {
 146   unsigned char *ptr;
 147   /* currently we don't support any options. */
 148   ptr = (unsigned char *)(iph+1);
 149   *ptr = 0;
 150   return (4);
 151 }
 152 #endif
 153 
 154 
 155 /*
 156  *      Take an skb, and fill in the MAC header. 
 157  */
 158  
 159 static int ip_send(struct sk_buff *skb, unsigned long daddr, int len, struct device *dev, unsigned long saddr)
     /*  */
 160 {
 161         int mac = 0;
 162 
 163         skb->dev = dev;
 164         skb->arp = 1;
 165         if (dev->hard_header) 
 166         {
 167                 /*
 168                  *      Build a hardware header. Source address is our mac, destination unknown
 169                  *      (rebuild header will sort this out) 
 170                  */
 171                 mac = dev->hard_header(skb->data, dev, ETH_P_IP, NULL, NULL, len, skb);
 172                 if (mac < 0) 
 173                 {
 174                         mac = -mac;
 175                         skb->arp = 0;
 176                         skb->raddr = daddr;     /* next routing address */                      
 177                 }       
 178         }
 179         return mac;
 180 }
 181 
 182 int ip_id_count = 0;
 183 
 184 /*
 185  * This routine builds the appropriate hardware/IP headers for
 186  * the routine.  It assumes that if *dev != NULL then the
 187  * protocol knows what it's doing, otherwise it uses the
 188  * routing/ARP tables to select a device struct.
 189  */
 190 int ip_build_header(struct sk_buff *skb, unsigned long saddr, unsigned long daddr,
     /*  */
 191                 struct device **dev, int type, struct options *opt, int len, int tos, int ttl)
 192 {
 193         static struct options optmem;
 194         struct iphdr *iph;
 195         struct rtable *rt;
 196         unsigned char *buff;
 197         unsigned long raddr;
 198         int tmp;
 199         unsigned long src;
 200 
 201         /*
 202          *      If there is no 'from' address as yet, then make it our loopback
 203          */
 204          
 205         if (saddr == 0) 
 206                 saddr = ip_my_addr();
 207         
 208         buff = skb->data;
 209 
 210         /* 
 211          *      See if we need to look up the device. 
 212          */
 213          
 214         if (*dev == NULL) 
 215         {
 216                 if(skb->localroute)
 217                         rt = ip_rt_local(daddr, &optmem, &src);
 218                 else
 219                         rt = ip_rt_route(daddr, &optmem, &src);
 220                 if (rt == NULL) 
 221                 {
 222                         ip_statistics.IpOutNoRoutes++;
 223                         return(-ENETUNREACH);
 224                 }
 225         
 226                 *dev = rt->rt_dev;
 227                 /*
 228                  *      If the frame is from us and going off machine it MUST MUST MUST
 229                  *      have the output device ip address and never the loopback
 230                  */
 231                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 232                         saddr = src;/*rt->rt_dev->pa_addr;*/
 233                 raddr = rt->rt_gateway;
 234 
 235                 opt = &optmem;
 236         } 
 237         else 
 238         {
 239                 /* 
 240                  *      We still need the address of the first hop. 
 241                  */
 242                 if(skb->localroute)
 243                         rt = ip_rt_local(daddr, &optmem, &src);
 244                 else
 245                         rt = ip_rt_route(daddr, &optmem, &src);
 246                 /*
 247                  *      If the frame is from us and going off machine it MUST MUST MUST
 248                  *      have the output device ip address and never the loopback
 249                  */
 250                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 251                         saddr = src;/*rt->rt_dev->pa_addr;*/
 252 
 253                 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
 254         }
 255   
 256           /*
 257            *    No gateway so aim at the real destination
 258            */
 259         if (raddr == 0)
 260                 raddr = daddr;
 261 
 262         /* 
 263          *      Now build the MAC header. 
 264          */
 265          
 266         tmp = ip_send(skb, raddr, len, *dev, saddr);
 267         buff += tmp;
 268         len -= tmp;
 269 
 270         /*
 271          *      Book keeping
 272          */
 273 
 274         skb->dev = *dev;
 275         skb->saddr = saddr;
 276         if (skb->sk) 
 277                 skb->sk->saddr = saddr;
 278 
 279         /*
 280          *      Now build the IP header. 
 281          */
 282 
 283         /* 
 284          *      If we are using IPPROTO_RAW, then we don't need an IP header, since
 285          *      one is being supplied to us by the user 
 286          */
 287 
 288         if(type == IPPROTO_RAW) 
 289                 return (tmp);
 290 
 291         iph = (struct iphdr *)buff;
 292         iph->version  = 4;
 293         iph->tos      = tos;
 294         iph->frag_off = 0;
 295         iph->ttl      = ttl;
 296         iph->daddr    = daddr;
 297         iph->saddr    = saddr;
 298         iph->protocol = type;
 299         iph->ihl      = 5;
 300   
 301         /* Setup the IP options. */
 302 #ifdef Not_Yet_Avail
 303         build_options(iph, opt);
 304 #endif
 305 
 306         return(20 + tmp);       /* IP header plus MAC header size */
 307 }
 308 
 309 
 310 static int
 311 do_options(struct iphdr *iph, struct options *opt)
     /*  */
 312 {
 313   unsigned char *buff;
 314   int done = 0;
 315   int i, len = sizeof(struct iphdr);
 316 
 317   /* Zero out the options. */
 318   opt->record_route.route_size = 0;
 319   opt->loose_route.route_size  = 0;
 320   opt->strict_route.route_size = 0;
 321   opt->tstamp.ptr              = 0;
 322   opt->security                = 0;
 323   opt->compartment             = 0;
 324   opt->handling                = 0;
 325   opt->stream                  = 0;
 326   opt->tcc                     = 0;
 327   return(0);
 328 
 329   /* Advance the pointer to start at the options. */
 330   buff = (unsigned char *)(iph + 1);
 331 
 332   /* Now start the processing. */
 333   while (!done && len < iph->ihl*4) switch(*buff) {
 334         case IPOPT_END:
 335                 done = 1;
 336                 break;
 337         case IPOPT_NOOP:
 338                 buff++;
 339                 len++;
 340                 break;
 341         case IPOPT_SEC:
 342                 buff++;
 343                 if (*buff != 11) return(1);
 344                 buff++;
 345                 opt->security = ntohs(*(unsigned short *)buff);
 346                 buff += 2;
 347                 opt->compartment = ntohs(*(unsigned short *)buff);
 348                 buff += 2;
 349                 opt->handling = ntohs(*(unsigned short *)buff);
 350                 buff += 2;
 351                 opt->tcc = ((*buff) << 16) + ntohs(*(unsigned short *)(buff+1));
 352                 buff += 3;
 353                 len += 11;
 354                 break;
 355         case IPOPT_LSRR:
 356                 buff++;
 357                 if ((*buff - 3)% 4 != 0) return(1);
 358                 len += *buff;
 359                 opt->loose_route.route_size = (*buff -3)/4;
 360                 buff++;
 361                 if (*buff % 4 != 0) return(1);
 362                 opt->loose_route.pointer = *buff/4 - 1;
 363                 buff++;
 364                 buff++;
 365                 for (i = 0; i < opt->loose_route.route_size; i++) {
 366                         if(i>=MAX_ROUTE)
 367                                 return(1);
 368                         opt->loose_route.route[i] = *(unsigned long *)buff;
 369                         buff += 4;
 370                 }
 371                 break;
 372         case IPOPT_SSRR:
 373                 buff++;
 374                 if ((*buff - 3)% 4 != 0) return(1);
 375                 len += *buff;
 376                 opt->strict_route.route_size = (*buff -3)/4;
 377                 buff++;
 378                 if (*buff % 4 != 0) return(1);
 379                 opt->strict_route.pointer = *buff/4 - 1;
 380                 buff++;
 381                 buff++;
 382                 for (i = 0; i < opt->strict_route.route_size; i++) {
 383                         if(i>=MAX_ROUTE)
 384                                 return(1);
 385                         opt->strict_route.route[i] = *(unsigned long *)buff;
 386                         buff += 4;
 387                 }
 388                 break;
 389         case IPOPT_RR:
 390                 buff++;
 391                 if ((*buff - 3)% 4 != 0) return(1);
 392                 len += *buff;
 393                 opt->record_route.route_size = (*buff -3)/4;
 394                 buff++;
 395                 if (*buff % 4 != 0) return(1);
 396                 opt->record_route.pointer = *buff/4 - 1;
 397                 buff++;
 398                 buff++;
 399                 for (i = 0; i < opt->record_route.route_size; i++) {
 400                         if(i>=MAX_ROUTE)
 401                                 return 1;
 402                         opt->record_route.route[i] = *(unsigned long *)buff;
 403                         buff += 4;
 404                 }
 405                 break;
 406         case IPOPT_SID:
 407                 len += 4;
 408                 buff +=2;
 409                 opt->stream = *(unsigned short *)buff;
 410                 buff += 2;
 411                 break;
 412         case IPOPT_TIMESTAMP:
 413                 buff++;
 414                 len += *buff;
 415                 if (*buff % 4 != 0) return(1);
 416                 opt->tstamp.len = *buff / 4 - 1;
 417                 buff++;
 418                 if ((*buff - 1) % 4 != 0) return(1);
 419                 opt->tstamp.ptr = (*buff-1)/4;
 420                 buff++;
 421                 opt->tstamp.x.full_char = *buff;
 422                 buff++;
 423                 for (i = 0; i < opt->tstamp.len; i++) {
 424                         opt->tstamp.data[i] = *(unsigned long *)buff;
 425                         buff += 4;
 426                 }
 427                 break;
 428         default:
 429                 return(1);
 430   }
 431 
 432   if (opt->record_route.route_size == 0) {
 433         if (opt->strict_route.route_size != 0) {
 434                 memcpy(&(opt->record_route), &(opt->strict_route),
 435                                              sizeof(opt->record_route));
 436         } else if (opt->loose_route.route_size != 0) {
 437                 memcpy(&(opt->record_route), &(opt->loose_route),
 438                                              sizeof(opt->record_route));
 439         }
 440   }
 441 
 442   if (opt->strict_route.route_size != 0 &&
 443       opt->strict_route.route_size != opt->strict_route.pointer) {
 444         strict_route(iph, opt);
 445         return(0);
 446   }
 447 
 448   if (opt->loose_route.route_size != 0 &&
 449       opt->loose_route.route_size != opt->loose_route.pointer) {
 450         loose_route(iph, opt);
 451         return(0);
 452   }
 453 
 454   return(0);
 455 }
 456 
 457 /* 
 458  *      This is a version of ip_compute_csum() optimized for IP headers, which
 459  *      always checksum on 4 octet boundaries. 
 460  */
 461  
 462 static inline unsigned short ip_fast_csum(unsigned char * buff, int wlen)
     /*  */
 463 {
 464         unsigned long sum = 0;
 465 
 466         if (wlen) 
 467         {
 468         unsigned long bogus;
 469          __asm__("clc\n"
 470                 "1:\t"
 471                 "lodsl\n\t"
 472                 "adcl %3, %0\n\t"
 473                 "decl %2\n\t"
 474                 "jne 1b\n\t"
 475                 "adcl $0, %0\n\t"
 476                 "movl %0, %3\n\t"
 477                 "shrl $16, %3\n\t"
 478                 "addw %w3, %w0\n\t"
 479                 "adcw $0, %w0"
 480             : "=r" (sum), "=S" (buff), "=r" (wlen), "=a" (bogus)
 481             : "0"  (sum),  "1" (buff),  "2" (wlen));
 482         }
 483         return (~sum) & 0xffff;
 484 }
 485 
 486 /*
 487  * This routine does all the checksum computations that don't
 488  * require anything special (like copying or special headers).
 489  */
 490 
 491 unsigned short ip_compute_csum(unsigned char * buff, int len)
     /*  */
 492 {
 493         unsigned long sum = 0;
 494 
 495         /* Do the first multiple of 4 bytes and convert to 16 bits. */
 496         if (len > 3) 
 497         {
 498                 __asm__("clc\n"
 499                 "1:\t"
 500                 "lodsl\n\t"
 501                 "adcl %%eax, %%ebx\n\t"
 502                 "loop 1b\n\t"
 503                 "adcl $0, %%ebx\n\t"
 504                 "movl %%ebx, %%eax\n\t"
 505                 "shrl $16, %%eax\n\t"
 506                 "addw %%ax, %%bx\n\t"
 507                 "adcw $0, %%bx"
 508                 : "=b" (sum) , "=S" (buff)
 509                 : "0" (sum), "c" (len >> 2) ,"1" (buff)
 510                 : "ax", "cx", "si", "bx" );
 511         }
 512         if (len & 2) 
 513         {
 514                 __asm__("lodsw\n\t"
 515                 "addw %%ax, %%bx\n\t"
 516                 "adcw $0, %%bx"
 517                 : "=b" (sum), "=S" (buff)
 518                 : "0" (sum), "1" (buff)
 519                 : "bx", "ax", "si");
 520         }
 521         if (len & 1) 
 522         {
 523                 __asm__("lodsb\n\t"
 524                 "movb $0, %%ah\n\t"
 525                 "addw %%ax, %%bx\n\t"
 526                 "adcw $0, %%bx"
 527                 : "=b" (sum), "=S" (buff)
 528                 : "0" (sum), "1" (buff)
 529                 : "bx", "ax", "si");
 530         }
 531         sum =~sum;
 532         return(sum & 0xffff);
 533 }
 534 
 535 /* 
 536  *      Check the header of an incoming IP datagram.  This version is still used in slhc.c. 
 537  */
 538  
 539 int ip_csum(struct iphdr *iph)
     /*  */
 540 {
 541         return ip_fast_csum((unsigned char *)iph, iph->ihl);
 542 }
 543 
 544 /* 
 545  *      Generate a checksym for an outgoing IP datagram. 
 546  */
 547 
 548 static void ip_send_check(struct iphdr *iph)
     /*  */
 549 {
 550         iph->check = 0;
 551         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 552 }
 553 
 554 /************************ Fragment Handlers From NET2E not yet with tweaks to beat 4K **********************************/
 555 
 556 
 557 /*
 558  *      This fragment handler is a bit of a heap. On the other hand it works quite
 559  *      happily and handles things quite well.
 560  */
 561  
 562 static struct ipq *ipqueue = NULL;              /* IP fragment queue    */
 563 
 564 /*
 565  *      Create a new fragment entry. 
 566  */
 567  
 568 static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, unsigned char *ptr)
     /*  */
 569 {
 570         struct ipfrag *fp;
 571  
 572         fp = (struct ipfrag *) kmalloc(sizeof(struct ipfrag), GFP_ATOMIC);
 573         if (fp == NULL) 
 574         {
 575                 printk("IP: frag_create: no memory left !\n");
 576                 return(NULL);
 577         }
 578         memset(fp, 0, sizeof(struct ipfrag));
 579 
 580         /* Fill in the structure. */
 581         fp->offset = offset;
 582         fp->end = end;
 583         fp->len = end - offset;
 584         fp->skb = skb;
 585         fp->ptr = ptr;
 586  
 587         return(fp);
 588 }
 589  
 590  
 591 /*
 592  *      Find the correct entry in the "incomplete datagrams" queue for
 593  *      this IP datagram, and return the queue entry address if found.
 594  */
 595  
 596 static struct ipq *ip_find(struct iphdr *iph)
     /*  */
 597 {
 598         struct ipq *qp;
 599         struct ipq *qplast;
 600  
 601         cli();
 602         qplast = NULL;
 603         for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next) 
 604         {
 605                 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
 606                         iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol) 
 607                 {
 608                         del_timer(&qp->timer);  /* So it doesn't vanish on us. The timer will be reset anyway */
 609                         sti();
 610                         return(qp);
 611                 }
 612         }
 613         sti();
 614         return(NULL);
 615 }
 616  
 617  
 618 /*
 619  *      Remove an entry from the "incomplete datagrams" queue, either
 620  *      because we completed, reassembled and processed it, or because
 621  *      it timed out.
 622  */
 623 
 624 static void ip_free(struct ipq *qp)
     /*  */
 625 {
 626         struct ipfrag *fp;
 627         struct ipfrag *xp;
 628 
 629         /*
 630          * Stop the timer for this entry. 
 631          */
 632          
 633         del_timer(&qp->timer);
 634 
 635         /* Remove this entry from the "incomplete datagrams" queue. */
 636         cli();
 637         if (qp->prev == NULL) 
 638         {
 639                 ipqueue = qp->next;
 640                 if (ipqueue != NULL) 
 641                         ipqueue->prev = NULL;
 642         } 
 643         else 
 644         {
 645                 qp->prev->next = qp->next;
 646                 if (qp->next != NULL) 
 647                         qp->next->prev = qp->prev;
 648         }
 649  
 650         /* Release all fragment data. */
 651 
 652         fp = qp->fragments;
 653         while (fp != NULL) 
 654         {
 655                 xp = fp->next;
 656                 IS_SKB(fp->skb);
 657                 kfree_skb(fp->skb,FREE_READ);
 658                 kfree_s(fp, sizeof(struct ipfrag));
 659                 fp = xp;
 660         }
 661         
 662         /* Release the MAC header. */
 663         kfree_s(qp->mac, qp->maclen);
 664  
 665         /* Release the IP header. */
 666         kfree_s(qp->iph, qp->ihlen + 8);
 667  
 668         /* Finally, release the queue descriptor itself. */
 669         kfree_s(qp, sizeof(struct ipq));
 670         sti();
 671  }
 672  
 673  
 674 /*
 675  *      Oops- a fragment queue timed out.  Kill it and send an ICMP reply. 
 676  */
 677  
 678 static void ip_expire(unsigned long arg)
     /*  */
 679 {
 680         struct ipq *qp;
 681  
 682         qp = (struct ipq *)arg;
 683 
 684         /*
 685          *      Send an ICMP "Fragment Reassembly Timeout" message. 
 686          */
 687 
 688         ip_statistics.IpReasmTimeout++;
 689         ip_statistics.IpReasmFails++;            
 690         /* This if is always true... shrug */
 691         if(qp->fragments!=NULL)
 692                 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
 693                                 ICMP_EXC_FRAGTIME, qp->dev);
 694  
 695         /* 
 696          *      Nuke the fragment queue. 
 697          */
 698         ip_free(qp);
 699 }
 700  
 701  
 702 /*
 703  *      Add an entry to the 'ipq' queue for a newly received IP datagram.
 704  *      We will (hopefully :-) receive all other fragments of this datagram
 705  *      in time, so we just create a queue for this datagram, in which we
 706  *      will insert the received fragments at their respective positions.
 707  */
 708 
 709 static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph, struct device *dev)
     /*  */
 710 {
 711         struct ipq *qp;
 712         int maclen;
 713         int ihlen;
 714 
 715         qp = (struct ipq *) kmalloc(sizeof(struct ipq), GFP_ATOMIC);
 716         if (qp == NULL) 
 717         {
 718                 printk("IP: create: no memory left !\n");
 719                 return(NULL);
 720                 skb->dev = qp->dev;
 721         }
 722         memset(qp, 0, sizeof(struct ipq));
 723 
 724         /*
 725          *      Allocate memory for the MAC header. 
 726          *
 727          *      FIXME: We have a maximum MAC address size limit and define 
 728          *      elsewhere. We should use it here and avoid the 3 kmalloc() calls
 729          */
 730          
 731         maclen = ((unsigned long) iph) - ((unsigned long) skb->data);
 732         qp->mac = (unsigned char *) kmalloc(maclen, GFP_ATOMIC);
 733         if (qp->mac == NULL) 
 734         {
 735                 printk("IP: create: no memory left !\n");
 736                 kfree_s(qp, sizeof(struct ipq));
 737                 return(NULL);
 738         }
 739 
 740         /* 
 741          *      Allocate memory for the IP header (plus 8 octects for ICMP). 
 742          */
 743          
 744         ihlen = (iph->ihl * sizeof(unsigned long));
 745         qp->iph = (struct iphdr *) kmalloc(ihlen + 8, GFP_ATOMIC);
 746         if (qp->iph == NULL) 
 747         {
 748                 printk("IP: create: no memory left !\n");
 749                 kfree_s(qp->mac, maclen);
 750                 kfree_s(qp, sizeof(struct ipq));
 751                 return(NULL);
 752         }
 753 
 754         /* Fill in the structure. */
 755         memcpy(qp->mac, skb->data, maclen);
 756         memcpy(qp->iph, iph, ihlen + 8);
 757         qp->len = 0;
 758         qp->ihlen = ihlen;
 759         qp->maclen = maclen;
 760         qp->fragments = NULL;
 761         qp->dev = dev;
 762         
 763         /* Start a timer for this entry. */
 764         qp->timer.expires = IP_FRAG_TIME;               /* about 30 seconds     */
 765         qp->timer.data = (unsigned long) qp;            /* pointer to queue     */
 766         qp->timer.function = ip_expire;                 /* expire function      */
 767         add_timer(&qp->timer);
 768 
 769         /* Add this entry to the queue. */
 770         qp->prev = NULL;
 771         cli();
 772         qp->next = ipqueue;
 773         if (qp->next != NULL) 
 774                 qp->next->prev = qp;
 775         ipqueue = qp;
 776         sti();
 777         return(qp);
 778 }
 779  
 780  
 781 /*
 782  *      See if a fragment queue is complete. 
 783  */
 784  
 785 static int ip_done(struct ipq *qp)
     /*  */
 786 {
 787         struct ipfrag *fp;
 788         int offset;
 789  
 790         /* Only possible if we received the final fragment. */
 791         if (qp->len == 0) 
 792                 return(0);
 793  
 794         /* Check all fragment offsets to see if they connect. */
 795         fp = qp->fragments;
 796         offset = 0;
 797         while (fp != NULL) 
 798         {
 799                 if (fp->offset > offset) 
 800                         return(0);      /* fragment(s) missing */
 801                 offset = fp->end;
 802                 fp = fp->next;
 803         }
 804  
 805         /* All fragments are present. */
 806         return(1);
 807  }
 808  
 809  
 810 /* 
 811  *      Build a new IP datagram from all its fragments. 
 812  *
 813  *      FIXME: We copy here because we lack an effective way of handling lists
 814  *      of bits on input. Until the new skb data handling is in I'm not going
 815  *      to touch this with a bargepole. This also causes a 4Kish limit on
 816  *      packet sizes.
 817  */
 818  
 819 static struct sk_buff *ip_glue(struct ipq *qp)
     /*  */
 820 {
 821         struct sk_buff *skb;
 822         struct iphdr *iph;
 823         struct ipfrag *fp;
 824         unsigned char *ptr;
 825         int count, len;
 826  
 827         /*
 828          *      Allocate a new buffer for the datagram. 
 829          */
 830          
 831         len = qp->maclen + qp->ihlen + qp->len;
 832 
 833         if ((skb = alloc_skb(len,GFP_ATOMIC)) == NULL) 
 834         {
 835                 ip_statistics.IpReasmFails++;
 836                 printk("IP: queue_glue: no memory for glueing queue 0x%X\n", (int) qp);
 837                 ip_free(qp);
 838                 return(NULL);
 839         }
 840  
 841         /* Fill in the basic details. */
 842         skb->len = (len - qp->maclen);
 843         skb->h.raw = skb->data;
 844         skb->free = 1;
 845  
 846         /* Copy the original MAC and IP headers into the new buffer. */
 847         ptr = (unsigned char *) skb->h.raw;
 848         memcpy(ptr, ((unsigned char *) qp->mac), qp->maclen);
 849         ptr += qp->maclen;
 850         memcpy(ptr, ((unsigned char *) qp->iph), qp->ihlen);
 851         ptr += qp->ihlen;
 852         skb->h.raw += qp->maclen;
 853         
 854         count = 0;
 855  
 856         /* Copy the data portions of all fragments into the new buffer. */
 857         fp = qp->fragments;
 858         while(fp != NULL) 
 859         {
 860                 if(count+fp->len>skb->len)
 861                 {
 862                         printk("Invalid fragment list: Fragment over size.\n");
 863                         ip_free(qp);
 864                         kfree_skb(skb,FREE_WRITE);
 865                         ip_statistics.IpReasmFails++;
 866                         return NULL;
 867                 }
 868                 memcpy((ptr + fp->offset), fp->ptr, fp->len);
 869                 count += fp->len;
 870                 fp = fp->next;
 871         }
 872  
 873         /* We glued together all fragments, so remove the queue entry. */
 874         ip_free(qp);
 875  
 876         /* Done with all fragments. Fixup the new IP header. */
 877         iph = skb->h.iph;
 878         iph->frag_off = 0;
 879         iph->tot_len = htons((iph->ihl * sizeof(unsigned long)) + count);
 880         skb->ip_hdr = iph;
 881         
 882         ip_statistics.IpReasmOKs++;
 883         return(skb);
 884 }
 885  
 886 
 887 /*
 888  *      Process an incoming IP datagram fragment. 
 889  */
 890  
 891 static struct sk_buff *ip_defrag(struct iphdr *iph, struct sk_buff *skb, struct device *dev)
     /*  */
 892 {
 893         struct ipfrag *prev, *next;
 894         struct ipfrag *tfp;
 895         struct ipq *qp;
 896         struct sk_buff *skb2;
 897         unsigned char *ptr;
 898         int flags, offset;
 899         int i, ihl, end;
 900 
 901         ip_statistics.IpReasmReqds++;
 902         
 903         /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
 904         qp = ip_find(iph);
 905  
 906         /* Is this a non-fragmented datagram? */
 907         offset = ntohs(iph->frag_off);
 908         flags = offset & ~IP_OFFSET;
 909         offset &= IP_OFFSET;
 910         if (((flags & IP_MF) == 0) && (offset == 0)) 
 911         {
 912                 if (qp != NULL)
 913                         ip_free(qp);    /* Huh? How could this exist?? */
 914                 return(skb);
 915         }
 916 
 917         offset <<= 3;           /* offset is in 8-byte chunks */
 918  
 919         /*
 920          * If the queue already existed, keep restarting its timer as long
 921          * as we still are receiving fragments.  Otherwise, create a fresh
 922          * queue entry.
 923          */
 924 
 925         if (qp != NULL) 
 926         {
 927                 del_timer(&qp->timer);
 928                 qp->timer.expires = IP_FRAG_TIME;       /* about 30 seconds */
 929                 qp->timer.data = (unsigned long) qp;    /* pointer to queue */
 930                 qp->timer.function = ip_expire;         /* expire function */
 931                 add_timer(&qp->timer);
 932         } 
 933         else 
 934         {
 935                 /*
 936                  *      If we failed to create it, then discard the frame
 937                  */
 938                 if ((qp = ip_create(skb, iph, dev)) == NULL) 
 939                 {
 940                         skb->sk = NULL;
 941                         kfree_skb(skb, FREE_READ);
 942                         ip_statistics.IpReasmFails++;
 943                         return NULL;
 944                 }
 945         }
 946 
 947         /*
 948          *      Determine the position of this fragment. 
 949          */
 950          
 951         ihl = (iph->ihl * sizeof(unsigned long));
 952         end = offset + ntohs(iph->tot_len) - ihl;
 953 
 954         /*
 955          *      Point into the IP datagram 'data' part. 
 956          */
 957 
 958         ptr = skb->data + dev->hard_header_len + ihl;
 959  
 960         /* 
 961          *      Is this the final fragment? 
 962          */
 963 
 964         if ((flags & IP_MF) == 0) 
 965                 qp->len = end;
 966  
 967         /*
 968          *      Find out which fragments are in front and at the back of us
 969          *      in the chain of fragments so far.  We must know where to put
 970          *      this fragment, right?
 971          */
 972          
 973         prev = NULL;
 974         for(next = qp->fragments; next != NULL; next = next->next) 
 975         {
 976                 if (next->offset > offset) 
 977                         break;  /* bingo! */
 978                 prev = next;
 979         }       
 980  
 981         /*
 982          *      We found where to put this one.
 983          *      Check for overlap with preceeding fragment, and, if needed,
 984          *      align things so that any overlaps are eliminated.
 985          */
 986         if (prev != NULL && offset < prev->end) 
 987         {
 988                 i = prev->end - offset;
 989                 offset += i;    /* ptr into datagram */
 990                 ptr += i;       /* ptr into fragment data */
 991         }       
 992  
 993         /*
 994          * Look for overlap with succeeding segments.
 995          * If we can merge fragments, do it.
 996          */
 997    
 998         for(; next != NULL; next = tfp) 
 999         {
1000                 tfp = next->next;
1001                 if (next->offset >= end) 
1002                         break;          /* no overlaps at all */
1003  
1004                 i = end - next->offset;                 /* overlap is 'i' bytes */
1005                 next->len -= i;                         /* so reduce size of    */
1006                 next->offset += i;                      /* next fragment        */
1007                 next->ptr += i;
1008                 
1009                 /* 
1010                  *      If we get a frag size of <= 0, remove it and the packet
1011                  *      that it goes with.
1012                  */
1013                 if (next->len <= 0) 
1014                 {
1015                         if (next->prev != NULL) 
1016                                 next->prev->next = next->next;
1017                         else 
1018                                 qp->fragments = next->next;
1019                 
1020                         if (tfp->next != NULL) 
1021                                 next->next->prev = next->prev;
1022                                 
1023                         kfree_skb(next->skb,FREE_READ);                         
1024                         kfree_s(next, sizeof(struct ipfrag));
1025                 }
1026         }
1027  
1028         /* 
1029          *      Insert this fragment in the chain of fragments. 
1030          */
1031          
1032         tfp = NULL;
1033         tfp = ip_frag_create(offset, end, skb, ptr);
1034         
1035         /*
1036          *      No memory to save the fragment - so throw the lot
1037          */
1038         
1039         if (!tfp) 
1040         {
1041                 skb->sk = NULL;
1042                 kfree_skb(skb, FREE_READ);
1043                 return NULL;
1044         }
1045         tfp->prev = prev;
1046         tfp->next = next;
1047         if (prev != NULL) 
1048                 prev->next = tfp;
1049         else 
1050                 qp->fragments = tfp;
1051    
1052         if (next != NULL) 
1053                 next->prev = tfp;
1054  
1055         /*
1056          *      OK, so we inserted this new fragment into the chain.
1057          *      Check if we now have a full IP datagram which we can
1058          *      bump up to the IP layer...
1059          */
1060    
1061         if (ip_done(qp)) 
1062         {
1063                 skb2 = ip_glue(qp);             /* glue together the fragments */
1064                 return(skb2);
1065         }
1066         return(NULL);
1067  }
1068  
1069  
1070  /*
1071   *     This IP datagram is too large to be sent in one piece.  Break it up into
1072   *     smaller pieces (each of size equal to the MAC header plus IP header plus
1073   *     a block of the data of the original IP data part) that will yet fit in a
1074   *     single device frame, and queue such a frame for sending by calling the
1075   *     ip_queue_xmit().  Note that this is recursion, and bad things will happen
1076   *     if this function causes a loop...
1077   *
1078   *     Yes this is inefficient, feel free to submit a quicker one.
1079   *
1080   *     **Protocol Violation**
1081   *     We copy all the options to each fragment. !FIXME!
1082   */
1083   
1084  void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int is_frag)
     /*  */
1085  {
1086         struct iphdr *iph;
1087         unsigned char *raw;
1088         unsigned char *ptr;
1089         struct sk_buff *skb2;
1090         int left, mtu, hlen, len;
1091         int offset;
1092         unsigned long flags;
1093  
1094         /* 
1095          *      Point into the IP datagram header. 
1096          */
1097          
1098         raw = skb->data;
1099         iph = (struct iphdr *) (raw + dev->hard_header_len);
1100 
1101         skb->ip_hdr = iph;
1102                 
1103         /* 
1104          *      Setup starting values. 
1105          */
1106          
1107         hlen = (iph->ihl * sizeof(unsigned long));
1108         left = ntohs(iph->tot_len) - hlen;      /* Space per frame */
1109         hlen += dev->hard_header_len;           /* Total header size */
1110         mtu = (dev->mtu - hlen);                /* Size of data space */
1111         ptr = (raw + hlen);                     /* Where to start from */
1112         
1113         /*
1114          *      Check for any "DF" flag. [DF means do not fragment]
1115          */
1116          
1117         if (ntohs(iph->frag_off) & IP_DF) 
1118         {
1119                 ip_statistics.IpFragFails++;
1120                 icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev); 
1121                 return;
1122         }
1123  
1124         /*
1125          *      The protocol doesn't seem to say what to do in the case that the
1126          *      frame + options doesn't fit the mtu. As it used to fall down dead
1127          *      in this case we were fortunate it didn't happen
1128          */
1129          
1130         if(mtu<8)
1131         {
1132                 /* It's wrong but its better than nothing */
1133                 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev);
1134                 ip_statistics.IpFragFails++;
1135                 return;
1136         }
1137         
1138         /* 
1139          *      Fragment the datagram. 
1140          */
1141          
1142         /*
1143          *      The initial offset is 0 for a complete frame. When
1144          *      fragmenting fragments its wherever this one starts.
1145          */
1146 
1147         if (is_frag & 2)
1148                 offset = (ntohs(iph->frag_off) & 0x1fff) << 3;
1149         else
1150                 offset = 0;
1151 
1152 
1153         /*
1154          *      Keep copying data until we run out.
1155          */
1156                         
1157         while(left > 0) 
1158         {
1159                 len = left;
1160                 /* IF: it doesn't fit, use 'mtu' - the data space left */
1161                 if (len > mtu)
1162                         len = mtu;
1163                 /* IF: we are not sending upto and including the packet end
1164                    then align the next start on an eight byte boundary */
1165                 if (len < left)
1166                 {
1167                         len/=8;
1168                         len*=8;
1169                 }
1170                 /*
1171                  *      Allocate buffer. 
1172                  */
1173                  
1174                 if ((skb2 = alloc_skb(len + hlen,GFP_ATOMIC)) == NULL) 
1175                 {
1176                         printk("IP: frag: no memory for new fragment!\n");
1177                         ip_statistics.IpFragFails++;
1178                         return;
1179                 }
1180                 
1181                 /*
1182                  *      Set up data on packet
1183                  */
1184 
1185                 skb2->arp = skb->arp;
1186                 if(skb->free==0)
1187                         printk("IP fragmenter: BUG free!=1 in fragmenter\n");
1188                 skb2->free = 1;
1189                 skb2->len = len + hlen;
1190                 skb2->h.raw=(char *) skb2->data;
1191                 /*
1192                  *      Charge the memory for the fragment to any owner
1193                  *      it might posess
1194                  */
1195                  
1196                 save_flags(flags);
1197                 if (sk) 
1198                 {
1199                         cli();
1200                         sk->wmem_alloc += skb2->mem_len;
1201                         skb2->sk=sk;
1202                 }
1203                 restore_flags(flags);
1204                 skb2->raddr = skb->raddr;       /* For rebuild_header - must be here */ 
1205 
1206                 /* 
1207                  *      Copy the packet header into the new buffer. 
1208                  */
1209                  
1210                 memcpy(skb2->h.raw, raw, hlen);
1211  
1212                 /*
1213                  *      Copy a block of the IP datagram. 
1214                  */
1215                 memcpy(skb2->h.raw + hlen, ptr, len);
1216                 left -= len;
1217 
1218                 skb2->h.raw+=dev->hard_header_len; 
1219 
1220                 /*
1221                  *      Fill in the new header fields. 
1222                  */
1223                 iph = (struct iphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
1224                 iph->frag_off = htons((offset >> 3));
1225                 /* 
1226                  *      Added AC : If we are fragmenting a fragment thats not the
1227                  *                 last fragment then keep MF on each bit 
1228                  */
1229                 if (left > 0 || (is_frag & 1)) 
1230                         iph->frag_off |= htons(IP_MF);
1231                 ptr += len;
1232                 offset += len;
1233  
1234                 /* 
1235                  *      Put this fragment into the sending queue. 
1236                  */
1237                  
1238                 ip_statistics.IpFragCreates++;
1239                 
1240                 ip_queue_xmit(sk, dev, skb2, 2);
1241         }
1242         ip_statistics.IpFragOKs++;
1243 }
1244  
1245 
1246 
1247 #ifdef CONFIG_IP_FORWARD
1248 
1249 /*      
1250  *      Forward an IP datagram to its next destination. 
1251  */
1252 
1253 static void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag)
     /*  */
1254 {
1255         struct device *dev2;    /* Output device */
1256         struct iphdr *iph;      /* Our header */
1257         struct sk_buff *skb2;   /* Output packet */
1258         struct rtable *rt;      /* Route we use */
1259         unsigned char *ptr;     /* Data pointer */
1260         unsigned long raddr;    /* Router IP address */
1261 
1262         /*
1263          *      According to the RFC, we must first decrease the TTL field. If
1264          *      that reaches zero, we must reply an ICMP control message telling
1265          *      that the packet's lifetime expired.
1266          *
1267          *      Exception:
1268          *      We may not generate an ICMP for an ICMP. icmp_send does the
1269          *      enforcement of this so we can forget it here. It is however
1270          *      sometimes VERY important.
1271          */
1272 
1273         iph = skb->h.iph;
1274         iph->ttl--;
1275         if (iph->ttl <= 0) 
1276         {
1277                 /* Tell the sender its packet died... */
1278                 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, dev);
1279                 return;
1280         }
1281 
1282         /* 
1283          *      Re-compute the IP header checksum. 
1284          *      This is inefficient. We know what has happened to the header
1285          *      and could thus adjust the checksum as Phil Karn does in KA9Q
1286          */
1287          
1288         ip_send_check(iph);
1289 
1290         /*
1291          * OK, the packet is still valid.  Fetch its destination address,
1292          * and give it to the IP sender for further processing.
1293          */
1294 
1295         rt = ip_rt_route(iph->daddr, NULL, NULL);
1296         if (rt == NULL) 
1297         {
1298                 /*
1299                  *      Tell the sender its packet cannot be delivered. Again
1300                  *      ICMP is screened later.
1301                  */
1302                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, dev);
1303                 return;
1304         }
1305 
1306 
1307         /*
1308          * Gosh.  Not only is the packet valid; we even know how to
1309          * forward it onto its final destination.  Can we say this
1310          * is being plain lucky?
1311          * If the router told us that there is no GW, use the dest.
1312          * IP address itself- we seem to be connected directly...
1313          */
1314 
1315         raddr = rt->rt_gateway;
1316 
1317         if (raddr != 0) 
1318         {
1319                 /*
1320                  *      There is a gateway so find the correct route for it.
1321                  *      Gateways cannot in turn be gatewayed.
1322                  */
1323                 rt = ip_rt_route(raddr, NULL, NULL);
1324                 if (rt == NULL) 
1325                 {
1326                         /* 
1327                          *      Tell the sender its packet cannot be delivered... 
1328                          */
1329                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, dev);
1330                         return;
1331                 }
1332                 if (rt->rt_gateway != 0) 
1333                         raddr = rt->rt_gateway;
1334         } 
1335         else 
1336                 raddr = iph->daddr;
1337                 
1338         /*
1339          *      Having picked a route we can now send the frame out.
1340          */
1341 
1342         dev2 = rt->rt_dev;
1343 
1344         /*
1345          *      In IP you never forward a frame on the interface that it arrived
1346          *      upon. We should generate an ICMP HOST REDIRECT giving the route
1347          *      we calculated.
1348          *      For now just dropping the packet is an acceptable compromise.
1349          */
1350 
1351         if (dev == dev2)
1352                 return;
1353 
1354         /*
1355          * We now allocate a new buffer, and copy the datagram into it.
1356          * If the indicated interface is up and running, kick it.
1357          */
1358 
1359         if (dev2->flags & IFF_UP) 
1360         {
1361         
1362                 /*
1363                  *      Current design decrees we copy the packet. For identical header
1364                  *      lengths we could avoid it. The new skb code will let us push
1365                  *      data so the problem goes away then.
1366                  */
1367                  
1368                 skb2 = alloc_skb(dev2->hard_header_len + skb->len, GFP_ATOMIC);
1369                 /*
1370                  *      This is rare and since IP is tolerant of network failures
1371                  *      quite harmless.
1372                  */
1373                 if (skb2 == NULL) 
1374                 {
1375                         printk("\nIP: No memory available for IP forward\n");
1376                         return;
1377                 }
1378                 ptr = skb2->data;
1379                 skb2->free = 1;
1380                 skb2->len = skb->len + dev2->hard_header_len;
1381                 skb2->h.raw = ptr;
1382 
1383                 /* 
1384                  *      Copy the packet data into the new buffer. 
1385                  */
1386                 memcpy(ptr + dev2->hard_header_len, skb->h.raw, skb->len);
1387                 
1388                 /* Now build the MAC header. */
1389                 (void) ip_send(skb2, raddr, skb->len, dev2, dev2->pa_addr);
1390 
1391                 ip_statistics.IpForwDatagrams++;
1392 
1393                 /*
1394                  *      See if it needs fragmenting. Note in ip_rcv we tagged
1395                  *      the fragment type. This must be right so that
1396                  *      the fragmenter does the right thing.
1397                  */
1398                  
1399                 if(skb2->len > dev2->mtu)
1400                 {
1401                         ip_fragment(NULL,skb2,dev2, is_frag);
1402                         kfree_skb(skb2,FREE_WRITE);
1403                 }
1404                 else
1405                 {
1406                         /*
1407                          *      Map service types to priority. We lie about
1408                          *      throughput being low priority, but its a good
1409                          *      choice to help improve general usage.
1410                          */
1411                         if(iph->tos & IPTOS_LOWDELAY)
1412                                 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
1413                         else if(iph->tos & IPTOS_THROUGHPUT)
1414                                 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
1415                         else
1416                                 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
1417                 }
1418         }
1419 }
1420 
1421 
1422 #endif
1423 
1424 /*
1425  *      This function receives all incoming IP datagrams. 
1426  */
1427  
1428 int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
     /*  */
1429 {
1430         struct iphdr *iph = skb->h.iph;
1431         unsigned char hash;
1432         unsigned char flag = 0;
1433         unsigned char opts_p = 0;       /* Set iff the packet has options. */
1434         struct inet_protocol *ipprot;
1435         static struct options opt; /* since we don't use these yet, and they
1436                                 take up stack space. */
1437         int brd;
1438         int is_frag=0;
1439 
1440 
1441         ip_statistics.IpInReceives++;
1442         
1443         /*
1444          *      Tag the ip header of this packet so we can find it
1445          */
1446          
1447         skb->ip_hdr = iph;
1448 
1449         /*
1450          *      Is the datagram acceptable? 
1451          *
1452          *      1.      Length at least the size of an ip header
1453          *      2.      Version of 4
1454          *      3.      Checksums correctly. [Speed optimisation for later, skip loopback checksums]
1455          *      (4.     We ought to check for IP multicast addresses and undefined types.. does this matter ?)
1456          */
1457 
1458         if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0) 
1459         {
1460                 ip_statistics.IpInHdrErrors++;
1461                 kfree_skb(skb, FREE_WRITE);
1462                 return(0);
1463         }
1464         
1465         /*
1466          *      Our transport medium may have padded the buffer out. Now we know it
1467          *      is IP we can trim to the true length of the frame.
1468          */
1469          
1470         skb->len=ntohs(iph->tot_len);
1471 
1472         /*
1473          *      Next anaylse the packet for options. Studies show under one packet in
1474          *      a thousand have options....
1475          */
1476            
1477         if (iph->ihl != 5) 
1478         {       /* Fast path for the typical optionless IP packet. */
1479                 memset((char *) &opt, 0, sizeof(opt));
1480                 if (do_options(iph, &opt) != 0)
1481                         return 0;
1482                 opts_p = 1;
1483         }
1484 
1485         /*
1486          *      Remember if the frame is fragmented.
1487          */
1488 
1489         if (iph->frag_off & 0x0020)
1490                 is_frag|=1;
1491         
1492         /*
1493          *      Last fragment ?
1494          */
1495          
1496         if (ntohs(iph->frag_off) & 0x1fff)
1497                 is_frag|=2;
1498         
1499         /* 
1500          *      Do any IP forwarding required.  chk_addr() is expensive -- avoid it someday. 
1501          *
1502          *      This is inefficient. While finding out if it is for us we could also compute
1503          *      the routing table entry. This is where the great unified cache theory comes
1504          *      in as and when someone impliments it
1505          */
1506 
1507         if ((brd = ip_chk_addr(iph->daddr)) == 0) 
1508         {
1509                 /*
1510                  *      Don't forward multicast or broadcast frames.
1511                  */
1512         
1513                 if(skb->pkt_type!=PACKET_HOST)
1514                 {
1515                         kfree_skb(skb,FREE_WRITE);
1516                         return 0;
1517                 }
1518                 
1519                 /*
1520                  *      The packet is for another target. Forward the frame
1521                  */
1522                  
1523 #ifdef CONFIG_IP_FORWARD
1524                 ip_forward(skb, dev, is_frag);
1525 #else
1526 /*              printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n",
1527                         iph->saddr,iph->daddr);*/
1528                 ip_statistics.IpInAddrErrors++;
1529 #endif                  
1530                 /*
1531                  *      The forwarder is inefficient and copies the packet. We 
1532                  *      free the original now.
1533                  */
1534                  
1535                 kfree_skb(skb, FREE_WRITE);
1536                 return(0);
1537         }
1538 
1539         /*
1540          * Reassemble IP fragments. 
1541          */
1542 
1543         if(is_frag)
1544         {
1545                 /* Defragment. Obtain the complete packet if there is one */
1546                 skb=ip_defrag(iph,skb,dev);
1547                 if(skb==NULL)
1548                         return 0;
1549                 iph=skb->h.iph;
1550         }
1551 
1552         /*
1553          *      Point into the IP datagram, just past the header. 
1554          */
1555 
1556         skb->ip_hdr = iph;
1557         skb->h.raw += iph->ihl*4;
1558         
1559         /*
1560          *      skb->h.raw now points at the protocol beyond the IP header.
1561          */
1562          
1563         hash = iph->protocol & (MAX_INET_PROTOS -1);
1564         for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next)
1565         {
1566                 struct sk_buff *skb2;
1567 
1568                 if (ipprot->protocol != iph->protocol) 
1569                         continue;
1570        /*
1571         *       See if we need to make a copy of it.  This will
1572         *       only be set if more than one protocol wants it. 
1573         *       and then not for the last one.
1574         *
1575         *       This is an artifact of poor upper protocol design. 
1576         *       Because the upper protocols damage the actual packet
1577         *       we must do copying. In actual fact it's even worse
1578         *       than this as TCP may hold on to the buffer.
1579         */
1580                 if (ipprot->copy) 
1581                 {
1582                         skb2 = skb_clone(skb, GFP_ATOMIC);
1583                         if(skb2==NULL)
1584                                 continue;
1585                 } 
1586                 else 
1587                 {
1588                         skb2 = skb;
1589                 }
1590                 flag = 1;
1591 
1592                /*
1593                 * Pass on the datagram to each protocol that wants it,
1594                 * based on the datagram protocol.  We should really
1595                 * check the protocol handler's return values here...
1596                 */
1597                 ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr,
1598                                 (ntohs(iph->tot_len) - (iph->ihl * 4)),
1599                                 iph->saddr, 0, ipprot);
1600 
1601         }
1602 
1603         /*
1604          * All protocols checked.
1605          * If this packet was a broadcast, we may *not* reply to it, since that
1606          * causes (proven, grin) ARP storms and a leakage of memory (i.e. all
1607          * ICMP reply messages get queued up for transmission...)
1608          */
1609 
1610         if (!flag) 
1611         {
1612                 if (brd != IS_BROADCAST)
1613                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
1614                 kfree_skb(skb, FREE_WRITE);
1615         }
1616 
1617         return(0);
1618 }
1619 
1620 
1621 /*
1622  * Queues a packet to be sent, and starts the transmitter
1623  * if necessary.  if free = 1 then we free the block after
1624  * transmit, otherwise we don't. If free==2 we not only
1625  * free the block but also dont assign a new ip seq number.
1626  * This routine also needs to put in the total length,
1627  * and compute the checksum
1628  */
1629  
1630 void ip_queue_xmit(struct sock *sk, struct device *dev, 
     /*  */
1631               struct sk_buff *skb, int free)
1632 {
1633         struct iphdr *iph;
1634         unsigned char *ptr;
1635 
1636         /* Sanity check */
1637         if (dev == NULL) 
1638         {
1639                 printk("IP: ip_queue_xmit dev = NULL\n");
1640                 return;
1641         }
1642   
1643         IS_SKB(skb);
1644         
1645         /*
1646          *      Do some book-keeping in the packet for later
1647          */
1648 
1649 
1650         skb->dev = dev;
1651         skb->when = jiffies;
1652   
1653         /*
1654          *      Find the IP header and set the length. This is bad
1655          *      but once we get the skb data handling code in the
1656          *      hardware will push its header sensibly and we will
1657          *      set skb->ip_hdr to avoid this mess and the fixed
1658          *      header length problem
1659          */
1660          
1661         ptr = skb->data;
1662         ptr += dev->hard_header_len;
1663         iph = (struct iphdr *)ptr;
1664         skb->ip_hdr = iph;
1665         iph->tot_len = ntohs(skb->len-dev->hard_header_len);
1666 
1667         /*
1668          *      No reassigning numbers to fragments...
1669          */
1670          
1671         if(free!=2)
1672                 iph->id      = htons(ip_id_count++);
1673         else
1674                 free=1;
1675                 
1676         /* All buffers without an owner socket get freed */
1677         if (sk == NULL) 
1678                 free = 1;
1679         
1680         skb->free = free;               
1681 
1682         /*
1683          *      Do we need to fragment. Again this is inefficient. 
1684          *      We need to somehow lock the original buffer and use
1685          *      bits of it.
1686          */
1687          
1688         if(skb->len > dev->mtu)
1689         {
1690                 ip_fragment(sk,skb,dev,0);
1691                 IS_SKB(skb);
1692                 kfree_skb(skb,FREE_WRITE);
1693                 return;
1694         }
1695   
1696         /*
1697          *      Add an IP checksum
1698          */
1699          
1700         ip_send_check(iph);
1701         
1702         /*
1703          *      Print the frame when debugging
1704          */
1705 
1706         /*
1707          *      More debugging. You cannot queue a packet already on a list
1708          *      Spot this and moan loudly.
1709          */
1710         if (skb->next != NULL) 
1711         {
1712                 printk("ip_queue_xmit: next != NULL\n");
1713                 skb_unlink(skb);
1714         }
1715 
1716         /*
1717          *      If a sender wishes the packet to remain unfreed
1718          *      we add it to his send queue. This arguably belongs
1719          *      in the TCP level since nobody elses uses it. BUT
1720          *      remember IPng might change all the rules.
1721          */
1722          
1723         if (!free) 
1724         {
1725                 unsigned long flags;
1726                 /* The socket now has more outstanding blocks */
1727                 
1728                 sk->packets_out++;
1729                 
1730                 /* Protect the list for a moment */
1731                 save_flags(flags);
1732                 cli();
1733                 
1734                 if (skb->link3 != NULL) 
1735                 {
1736                         printk("ip.c: link3 != NULL\n");
1737                         skb->link3 = NULL;
1738                 }
1739                 if (sk->send_head == NULL) 
1740                 {
1741                         sk->send_tail = skb;
1742                         sk->send_head = skb;
1743                 }
1744                 else 
1745                 {
1746                         sk->send_tail->link3 = skb;
1747                         sk->send_tail = skb;
1748                 }
1749                 /* skb->link3 is NULL */
1750                 
1751                 /* Interrupt restore */
1752                 restore_flags(flags);
1753                 /* Set the IP write timeout to the round trip time for the packet.
1754                    If an acknowledge has not arrived by then we may wish to act */      
1755                 reset_timer(sk, TIME_WRITE, sk->rto);
1756         } 
1757         else 
1758                 /* Remember who owns the buffer */
1759                 skb->sk = sk;
1760 
1761         /*
1762          *      If the indicated interface is up and running, send the packet. 
1763          */
1764         ip_statistics.IpOutRequests++;
1765          
1766         if (dev->flags & IFF_UP) 
1767         {
1768                 /* 
1769                  *      If we have an owner use its priority setting,
1770                  *      otherwise use NORMAL
1771                  */
1772                 
1773                 if (sk != NULL) 
1774                 {
1775                         dev_queue_xmit(skb, dev, sk->priority);
1776                 }
1777                 else 
1778                 {
1779                         dev_queue_xmit(skb, dev, SOPRI_NORMAL);
1780                 }
1781         } 
1782         else 
1783         {
1784                 ip_statistics.IpOutDiscards++;
1785                 if (free) 
1786                         kfree_skb(skb, FREE_WRITE);
1787         }
1788 }
1789 
1790 
1791 /*
1792  *      A socket has timed out on its send queue and wants to do a
1793  *      little retransmitting. Currently this means TCP.
1794  */
1795 
1796 void ip_do_retransmit(struct sock *sk, int all)
     /*  */
1797 {
1798         struct sk_buff * skb;
1799         struct proto *prot;
1800         struct device *dev;
1801         int retransmits;
1802 
1803         prot = sk->prot;
1804         skb = sk->send_head;
1805         retransmits = sk->retransmits;
1806         
1807         while (skb != NULL) 
1808         {
1809                 dev = skb->dev;
1810                 IS_SKB(skb);
1811                 skb->when = jiffies;
1812 
1813                 /* 
1814                  * In general it's OK just to use the old packet.  However we
1815                  * need to use the current ack and window fields.  Urg and 
1816                  * urg_ptr could possibly stand to be updated as well, but we 
1817                  * don't keep the necessary data.  That shouldn't be a problem,
1818                  * if the other end is doing the right thing.  Since we're 
1819                  * changing the packet, we have to issue a new IP identifier.
1820                  */
1821 
1822                 /* this check may be unnecessary - retransmit only for TCP */
1823                 if (sk->protocol == IPPROTO_TCP) {
1824                   struct tcphdr *th;
1825                   struct iphdr *iph;
1826                   int size;
1827 
1828                   iph = (struct iphdr *)(skb->data + dev->hard_header_len);
1829                   th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2));
1830                   size = skb->len - (((unsigned char *) th) - skb->data);
1831 
1832                   iph->id = htons(ip_id_count++);
1833                   ip_send_check(iph);
1834 
1835                   th->ack_seq = ntohl(sk->acked_seq);
1836                   th->window = ntohs(tcp_select_window(sk));
1837                   tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
1838                 }
1839 
1840                 /* 
1841                  *      If the interface is (still) up and running, kick it. 
1842                  */
1843                 
1844                 if (dev->flags & IFF_UP) 
1845                 {
1846                         /*
1847                          *      If the packet is still being sent by the device/protocol
1848                          *      below then don't retransmit. This is both needed, and good -
1849                          *      especially with connected mode AX.25 where it stops resends
1850                          *      occuring of an as yet unsent anyway frame!
1851                          *      We still add up the counts as the round trip time wants
1852                          *      adjusting.
1853                          */
1854                         if (sk && !skb_device_locked(skb))
1855                         {
1856                                 /* Remove it from any existing driver queue first! */
1857                                 skb_unlink(skb);
1858                                 /* Now queue it */
1859                                 ip_statistics.IpOutRequests++;
1860                                 dev_queue_xmit(skb, dev, sk->priority);
1861                         }
1862                 }
1863                 
1864                 /*
1865                  *      Count retransmissions
1866                  */
1867                 retransmits++;
1868                 sk->prot->retransmits ++;
1869                 
1870                 /*
1871                  *      Only one retransmit requested.
1872                  */
1873                 if (!all) 
1874                         break;
1875 
1876                 /*
1877                  *      This should cut it off before we send too many packets. 
1878                  */
1879                 if (sk->retransmits > sk->cong_window) 
1880                         break;
1881                 skb = skb->link3;
1882         }
1883 }
1884 
1885 /*
1886  *      This is the normal code called for timeouts.  It does the retransmission
1887  *      and then does backoff.  ip_do_retransmit is separated out because
1888  *      tcp_ack needs to send stuff from the retransmit queue without
1889  *      initiating a backoff.
1890  */
1891 
1892 void ip_retransmit(struct sock *sk, int all)
     /*  */
1893 {
1894         ip_do_retransmit(sk, all);
1895 
1896         /*
1897          * Increase the timeout each time we retransmit.  Note that
1898          * we do not increase the rtt estimate.  rto is initialized
1899          * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
1900          * that doubling rto each time is the least we can get away with.
1901          * In KA9Q, Karn uses this for the first few times, and then
1902          * goes to quadratic.  netBSD doubles, but only goes up to *64,
1903          * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
1904          * defined in the protocol as the maximum possible RTT.  I guess
1905          * we'll have to use something other than TCP to talk to the
1906          * University of Mars.
1907          */
1908 
1909         sk->retransmits++;
1910         sk->backoff++;
1911         sk->rto = min(sk->rto << 1, 120*HZ);
1912         reset_timer(sk, TIME_WRITE, sk->rto);
1913 }
1914 
1915 /*
1916  *      Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
1917  *      an IP socket.
1918  *
1919  *      We impliment IP_TOS (type of service), IP_TTL (time to live).
1920  *
1921  *      Next release we will sort out IP_OPTIONS since for some people are kind of important.
1922  */
1923  
1924 int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /*  */
1925 {
1926         int val,err;
1927         
1928         if (optval == NULL) 
1929                 return(-EINVAL);
1930 
1931         err=verify_area(VERIFY_READ, optval, sizeof(int));
1932         if(err)
1933                 return err;
1934         
1935         val = get_fs_long((unsigned long *)optval);
1936 
1937         if(level!=SOL_IP)
1938                 return -EOPNOTSUPP;
1939 
1940         switch(optname)
1941         {
1942                 case IP_TOS:
1943                         if(val<0||val>255)
1944                                 return -EINVAL;
1945                         sk->ip_tos=val;
1946                         if(val==IPTOS_LOWDELAY)
1947                                 sk->priority=SOPRI_INTERACTIVE;
1948                         if(val==IPTOS_THROUGHPUT)
1949                                 sk->priority=SOPRI_BACKGROUND;
1950                         return 0;
1951                 case IP_TTL:
1952                         if(val<1||val>255)
1953                                 return -EINVAL;
1954                         sk->ip_ttl=val;
1955                         return 0;
1956                 /* IP_OPTIONS and friends go here eventually */
1957                 default:
1958                         return(-ENOPROTOOPT);
1959         }
1960 }
1961 
1962 /*
1963  *      Get the options. Note for future reference. The GET of IP options gets the
1964  *      _received_ ones. The set sets the _sent_ ones.
1965  */
1966  
1967 int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /*  */
1968 {
1969         int val,err;
1970         
1971         if(level!=SOL_IP)
1972                 return -EOPNOTSUPP;
1973                 
1974         switch(optname)
1975         {
1976                 case IP_TOS:
1977                         val=sk->ip_tos;
1978                         break;
1979                 case IP_TTL:
1980                         val=sk->ip_ttl;
1981                         break;
1982                 default:
1983                         return(-ENOPROTOOPT);
1984         }
1985         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
1986         if(err)
1987                 return err;
1988         put_fs_long(sizeof(int),(unsigned long *) optlen);
1989 
1990         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
1991         if(err)
1992                 return err;
1993         put_fs_long(val,(unsigned long *)optval);
1994 
1995         return(0);
1996 }
1997 
1998 /*
1999  *      IP protocol layer initialiser
2000  */
2001  
2002 static struct packet_type ip_packet_type = 
2003 {
2004         0,      /* MUTTER ntohs(ETH_P_IP),*/
2005         0,              /* copy */
2006         ip_rcv,
2007         NULL,
2008         NULL,
2009 };
2010  
2011  
2012 /*
2013  *      IP registers the packet type and then calls the subprotocol initialisers
2014  */
2015  
2016 void ip_init(void)
     /*  */
2017 {
2018         ip_packet_type.type=htons(ETH_P_IP);
2019         dev_add_pack(&ip_packet_type);
2020 /*      ip_raw_init();
2021         ip_packet_init();
2022         ip_tcp_init();
2023         ip_udp_init();*/
2024 }
/* */
root/net/inet/ip.c

DEFINITIONS