root/net/inet/ip.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip_ioctl
  2. strict_route
  3. loose_route
  4. ip_route_check
  5. build_options
  6. ip_send
  7. ip_build_header
  8. do_options
  9. ip_fast_csum
  10. ip_compute_csum
  11. ip_csum
  12. ip_send_check
  13. ip_frag_create
  14. ip_find
  15. ip_free
  16. ip_expire
  17. ip_create
  18. ip_done
  19. ip_glue
  20. ip_defrag
  21. ip_fragment
  22. ip_forward
  23. ip_rcv
  24. ip_queue_xmit
  25. ip_do_retransmit
  26. ip_retransmit
  27. ip_setsockopt
  28. ip_getsockopt
  29. ip_init

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              The Internet Protocol (IP) module.
   7  *
   8  * Version:     @(#)ip.c        1.0.16b 9/1/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Donald Becker, <becker@super.org>
  13  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  14  *
  15  * Fixes:
  16  *              Alan Cox        :       Commented a couple of minor bits of surplus code
  17  *              Alan Cox        :       Undefining IP_FORWARD doesn't include the code
  18  *                                      (just stops a compiler warning).
  19  *              Alan Cox        :       Frames with >=MAX_ROUTE record routes, strict routes or loose routes
  20  *                                      are junked rather than corrupting things.
  21  *              Alan Cox        :       Frames to bad broadcast subnets are dumped
  22  *                                      We used to process them non broadcast and
  23  *                                      boy could that cause havoc.
  24  *              Alan Cox        :       ip_forward sets the free flag on the
  25  *                                      new frame it queues. Still crap because
  26  *                                      it copies the frame but at least it
  27  *                                      doesn't eat memory too.
  28  *              Alan Cox        :       Generic queue code and memory fixes.
  29  *              Fred Van Kempen :       IP fragment support (borrowed from NET2E)
  30  *              Gerhard Koerting:       Forward fragmented frames correctly.
  31  *              Gerhard Koerting:       Fixes to my fix of the above 8-).
  32  *              Gerhard Koerting:       IP interface addressing fix.
  33  *              Linus Torvalds  :       More robustness checks
  34  *              Alan Cox        :       Even more checks: Still not as robust as it ought to be
  35  *              Alan Cox        :       Save IP header pointer for later
  36  *              Alan Cox        :       ip option setting
  37  *              Alan Cox        :       Use ip_tos/ip_ttl settings
  38  *              Alan Cox        :       Fragmentation bogosity removed
  39  *                                      (Thanks to Mark.Bush@prg.ox.ac.uk)
  40  *              Dmitry Gorodchanin :    Send of a raw packet crash fix.
  41  *              Alan Cox        :       Silly ip bug when an overlength
  42  *                                      fragment turns up. Now frees the
  43  *                                      queue.
  44  *              Linus Torvalds/ :       Memory leakage on fragmentation
  45  *              Alan Cox        :       handling.
  46  *              Gerhard Koerting:       Forwarding uses IP priority hints
  47  *              Teemu Rantanen  :       Fragment problems.
  48  *              Alan Cox        :       General cleanup, comments and reformat
  49  *              Alan Cox        :       SNMP statistics
  50  *              Alan Cox        :       BSD address rule semantics. Also see
  51  *                                      UDP as there is a nasty checksum issue
  52  *                                      if you do things the wrong way.
  53  *              Alan Cox        :       Always defrag, moved IP_FORWARD to the config.in file
  54  *              Alan Cox        :       IP options adjust sk->priority.
  55  *              Pedro Roque     :       Fix mtu/length error in ip_forward.
  56  *              Alan Cox        :       Avoid ip_chk_addr when possible.
  57  *
  58  * To Fix:
  59  *              IP option processing is mostly not needed. ip_forward needs to know about routing rules
  60  *              and time stamp but that's about all. Use the route mtu field here too
  61  *
  62  *              This program is free software; you can redistribute it and/or
  63  *              modify it under the terms of the GNU General Public License
  64  *              as published by the Free Software Foundation; either version
  65  *              2 of the License, or (at your option) any later version.
  66  */
  67 #include <asm/segment.h>
  68 #include <asm/system.h>
  69 #include <linux/types.h>
  70 #include <linux/kernel.h>
  71 #include <linux/sched.h>
  72 #include <linux/string.h>
  73 #include <linux/errno.h>
  74 #include <linux/socket.h>
  75 #include <linux/sockios.h>
  76 #include <linux/in.h>
  77 #include <linux/inet.h>
  78 #include <linux/netdevice.h>
  79 #include <linux/etherdevice.h>
  80 #include "snmp.h"
  81 #include "ip.h"
  82 #include "protocol.h"
  83 #include "route.h"
  84 #include "tcp.h"
  85 #include <linux/skbuff.h>
  86 #include "sock.h"
  87 #include "arp.h"
  88 #include "icmp.h"
  89 
  90 #define CONFIG_IP_DEFRAG
  91 
  92 extern int last_retran;
  93 extern void sort_send(struct sock *sk);
  94 
  95 #define min(a,b)        ((a)<(b)?(a):(b))
  96 #define LOOPBACK(x)     (((x) & htonl(0xff000000)) == htonl(0x7f000000))
  97 
  98 /*
  99  *      SNMP management statistics
 100  */
 101 
 102 struct ip_mib ip_statistics={1,64,};    /* Forwarding=Yes, Default TTL=64 */
 103 
 104 /*
 105  *      Handle the issuing of an ioctl() request
 106  *      for the ip device. This is scheduled to
 107  *      disappear
 108  */
 109 
 110 int ip_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 111 {
 112         switch(cmd)
 113         {
 114                 default:
 115                         return(-EINVAL);
 116         }
 117 }
 118 
 119 
 120 /* these two routines will do routing. */
 121 
 122 static void
 123 strict_route(struct iphdr *iph, struct options *opt)
     /* [previous][next][first][last][top][bottom][index][help] */
 124 {
 125 }
 126 
 127 
 128 static void
 129 loose_route(struct iphdr *iph, struct options *opt)
     /* [previous][next][first][last][top][bottom][index][help] */
 130 {
 131 }
 132 
 133 
 134 
 135 
 136 /* This routine will check to see if we have lost a gateway. */
 137 void
 138 ip_route_check(unsigned long daddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 139 {
 140 }
 141 
 142 
 143 #if 0
 144 /* this routine puts the options at the end of an ip header. */
 145 static int
 146 build_options(struct iphdr *iph, struct options *opt)
     /* [previous][next][first][last][top][bottom][index][help] */
 147 {
 148   unsigned char *ptr;
 149   /* currently we don't support any options. */
 150   ptr = (unsigned char *)(iph+1);
 151   *ptr = 0;
 152   return (4);
 153 }
 154 #endif
 155 
 156 
 157 /*
 158  *      Take an skb, and fill in the MAC header.
 159  */
 160 
 161 static int ip_send(struct sk_buff *skb, unsigned long daddr, int len, struct device *dev, unsigned long saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 162 {
 163         int mac = 0;
 164 
 165         skb->dev = dev;
 166         skb->arp = 1;
 167         if (dev->hard_header)
 168         {
 169                 /*
 170                  *      Build a hardware header. Source address is our mac, destination unknown
 171                  *      (rebuild header will sort this out)
 172                  */
 173                 mac = dev->hard_header(skb->data, dev, ETH_P_IP, NULL, NULL, len, skb);
 174                 if (mac < 0)
 175                 {
 176                         mac = -mac;
 177                         skb->arp = 0;
 178                         skb->raddr = daddr;     /* next routing address */
 179                 }
 180         }
 181         return mac;
 182 }
 183 
 184 int ip_id_count = 0;
 185 
 186 /*
 187  * This routine builds the appropriate hardware/IP headers for
 188  * the routine.  It assumes that if *dev != NULL then the
 189  * protocol knows what it's doing, otherwise it uses the
 190  * routing/ARP tables to select a device struct.
 191  */
 192 int ip_build_header(struct sk_buff *skb, unsigned long saddr, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 193                 struct device **dev, int type, struct options *opt, int len, int tos, int ttl)
 194 {
 195         static struct options optmem;
 196         struct iphdr *iph;
 197         struct rtable *rt;
 198         unsigned char *buff;
 199         unsigned long raddr;
 200         int tmp;
 201         unsigned long src;
 202 
 203         /*
 204          *      If there is no 'from' address as yet, then make it our loopback
 205          */
 206 
 207         if (saddr == 0)
 208                 saddr = ip_my_addr();
 209 
 210         buff = skb->data;
 211 
 212         /*
 213          *      See if we need to look up the device.
 214          */
 215 
 216         if (*dev == NULL)
 217         {
 218                 if(skb->localroute)
 219                         rt = ip_rt_local(daddr, &optmem, &src);
 220                 else
 221                         rt = ip_rt_route(daddr, &optmem, &src);
 222                 if (rt == NULL)
 223                 {
 224                         ip_statistics.IpOutNoRoutes++;
 225                         return(-ENETUNREACH);
 226                 }
 227 
 228                 *dev = rt->rt_dev;
 229                 /*
 230                  *      If the frame is from us and going off machine it MUST MUST MUST
 231                  *      have the output device ip address and never the loopback
 232                  */
 233                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 234                         saddr = src;/*rt->rt_dev->pa_addr;*/
 235                 raddr = rt->rt_gateway;
 236 
 237                 opt = &optmem;
 238         }
 239         else
 240         {
 241                 /*
 242                  *      We still need the address of the first hop.
 243                  */
 244                 if(skb->localroute)
 245                         rt = ip_rt_local(daddr, &optmem, &src);
 246                 else
 247                         rt = ip_rt_route(daddr, &optmem, &src);
 248                 /*
 249                  *      If the frame is from us and going off machine it MUST MUST MUST
 250                  *      have the output device ip address and never the loopback
 251                  */
 252                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 253                         saddr = src;/*rt->rt_dev->pa_addr;*/
 254 
 255                 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
 256         }
 257 
 258         /*
 259          *      No gateway so aim at the real destination
 260          */
 261         if (raddr == 0)
 262                 raddr = daddr;
 263 
 264         /*
 265          *      Now build the MAC header.
 266          */
 267 
 268         tmp = ip_send(skb, raddr, len, *dev, saddr);
 269         buff += tmp;
 270         len -= tmp;
 271 
 272         /*
 273          *      Book keeping
 274          */
 275 
 276         skb->dev = *dev;
 277         skb->saddr = saddr;
 278         if (skb->sk)
 279                 skb->sk->saddr = saddr;
 280 
 281         /*
 282          *      Now build the IP header.
 283          */
 284 
 285         /*
 286          *      If we are using IPPROTO_RAW, then we don't need an IP header, since
 287          *      one is being supplied to us by the user
 288          */
 289 
 290         if(type == IPPROTO_RAW)
 291                 return (tmp);
 292 
 293         iph = (struct iphdr *)buff;
 294         iph->version  = 4;
 295         iph->tos      = tos;
 296         iph->frag_off = 0;
 297         iph->ttl      = ttl;
 298         iph->daddr    = daddr;
 299         iph->saddr    = saddr;
 300         iph->protocol = type;
 301         iph->ihl      = 5;
 302 
 303         /* Setup the IP options. */
 304 #ifdef Not_Yet_Avail
 305         build_options(iph, opt);
 306 #endif
 307 
 308         return(20 + tmp);       /* IP header plus MAC header size */
 309 }
 310 
 311 
 312 static int
 313 do_options(struct iphdr *iph, struct options *opt)
     /* [previous][next][first][last][top][bottom][index][help] */
 314 {
 315   unsigned char *buff;
 316   int done = 0;
 317   int i, len = sizeof(struct iphdr);
 318 
 319   /* Zero out the options. */
 320   opt->record_route.route_size = 0;
 321   opt->loose_route.route_size  = 0;
 322   opt->strict_route.route_size = 0;
 323   opt->tstamp.ptr              = 0;
 324   opt->security                = 0;
 325   opt->compartment             = 0;
 326   opt->handling                = 0;
 327   opt->stream                  = 0;
 328   opt->tcc                     = 0;
 329   return(0);
 330 
 331   /* Advance the pointer to start at the options. */
 332   buff = (unsigned char *)(iph + 1);
 333 
 334   /* Now start the processing. */
 335   while (!done && len < iph->ihl*4) switch(*buff) {
 336         case IPOPT_END:
 337                 done = 1;
 338                 break;
 339         case IPOPT_NOOP:
 340                 buff++;
 341                 len++;
 342                 break;
 343         case IPOPT_SEC:
 344                 buff++;
 345                 if (*buff != 11) return(1);
 346                 buff++;
 347                 opt->security = ntohs(*(unsigned short *)buff);
 348                 buff += 2;
 349                 opt->compartment = ntohs(*(unsigned short *)buff);
 350                 buff += 2;
 351                 opt->handling = ntohs(*(unsigned short *)buff);
 352                 buff += 2;
 353                 opt->tcc = ((*buff) << 16) + ntohs(*(unsigned short *)(buff+1));
 354                 buff += 3;
 355                 len += 11;
 356                 break;
 357         case IPOPT_LSRR:
 358                 buff++;
 359                 if ((*buff - 3)% 4 != 0) return(1);
 360                 len += *buff;
 361                 opt->loose_route.route_size = (*buff -3)/4;
 362                 buff++;
 363                 if (*buff % 4 != 0) return(1);
 364                 opt->loose_route.pointer = *buff/4 - 1;
 365                 buff++;
 366                 buff++;
 367                 for (i = 0; i < opt->loose_route.route_size; i++) {
 368                         if(i>=MAX_ROUTE)
 369                                 return(1);
 370                         opt->loose_route.route[i] = *(unsigned long *)buff;
 371                         buff += 4;
 372                 }
 373                 break;
 374         case IPOPT_SSRR:
 375                 buff++;
 376                 if ((*buff - 3)% 4 != 0) return(1);
 377                 len += *buff;
 378                 opt->strict_route.route_size = (*buff -3)/4;
 379                 buff++;
 380                 if (*buff % 4 != 0) return(1);
 381                 opt->strict_route.pointer = *buff/4 - 1;
 382                 buff++;
 383                 buff++;
 384                 for (i = 0; i < opt->strict_route.route_size; i++) {
 385                         if(i>=MAX_ROUTE)
 386                                 return(1);
 387                         opt->strict_route.route[i] = *(unsigned long *)buff;
 388                         buff += 4;
 389                 }
 390                 break;
 391         case IPOPT_RR:
 392                 buff++;
 393                 if ((*buff - 3)% 4 != 0) return(1);
 394                 len += *buff;
 395                 opt->record_route.route_size = (*buff -3)/4;
 396                 buff++;
 397                 if (*buff % 4 != 0) return(1);
 398                 opt->record_route.pointer = *buff/4 - 1;
 399                 buff++;
 400                 buff++;
 401                 for (i = 0; i < opt->record_route.route_size; i++) {
 402                         if(i>=MAX_ROUTE)
 403                                 return 1;
 404                         opt->record_route.route[i] = *(unsigned long *)buff;
 405                         buff += 4;
 406                 }
 407                 break;
 408         case IPOPT_SID:
 409                 len += 4;
 410                 buff +=2;
 411                 opt->stream = *(unsigned short *)buff;
 412                 buff += 2;
 413                 break;
 414         case IPOPT_TIMESTAMP:
 415                 buff++;
 416                 len += *buff;
 417                 if (*buff % 4 != 0) return(1);
 418                 opt->tstamp.len = *buff / 4 - 1;
 419                 buff++;
 420                 if ((*buff - 1) % 4 != 0) return(1);
 421                 opt->tstamp.ptr = (*buff-1)/4;
 422                 buff++;
 423                 opt->tstamp.x.full_char = *buff;
 424                 buff++;
 425                 for (i = 0; i < opt->tstamp.len; i++) {
 426                         opt->tstamp.data[i] = *(unsigned long *)buff;
 427                         buff += 4;
 428                 }
 429                 break;
 430         default:
 431                 return(1);
 432   }
 433 
 434   if (opt->record_route.route_size == 0) {
 435         if (opt->strict_route.route_size != 0) {
 436                 memcpy(&(opt->record_route), &(opt->strict_route),
 437                                              sizeof(opt->record_route));
 438         } else if (opt->loose_route.route_size != 0) {
 439                 memcpy(&(opt->record_route), &(opt->loose_route),
 440                                              sizeof(opt->record_route));
 441         }
 442   }
 443 
 444   if (opt->strict_route.route_size != 0 &&
 445       opt->strict_route.route_size != opt->strict_route.pointer) {
 446         strict_route(iph, opt);
 447         return(0);
 448   }
 449 
 450   if (opt->loose_route.route_size != 0 &&
 451       opt->loose_route.route_size != opt->loose_route.pointer) {
 452         loose_route(iph, opt);
 453         return(0);
 454   }
 455 
 456   return(0);
 457 }
 458 
 459 /*
 460  *      This is a version of ip_compute_csum() optimized for IP headers, which
 461  *      always checksum on 4 octet boundaries.
 462  */
 463 
 464 static inline unsigned short ip_fast_csum(unsigned char * buff, int wlen)
     /* [previous][next][first][last][top][bottom][index][help] */
 465 {
 466         unsigned long sum = 0;
 467 
 468         if (wlen)
 469         {
 470         unsigned long bogus;
 471          __asm__("clc\n"
 472                 "1:\t"
 473                 "lodsl\n\t"
 474                 "adcl %3, %0\n\t"
 475                 "decl %2\n\t"
 476                 "jne 1b\n\t"
 477                 "adcl $0, %0\n\t"
 478                 "movl %0, %3\n\t"
 479                 "shrl $16, %3\n\t"
 480                 "addw %w3, %w0\n\t"
 481                 "adcw $0, %w0"
 482             : "=r" (sum), "=S" (buff), "=r" (wlen), "=a" (bogus)
 483             : "0"  (sum),  "1" (buff),  "2" (wlen));
 484         }
 485         return (~sum) & 0xffff;
 486 }
 487 
 488 /*
 489  * This routine does all the checksum computations that don't
 490  * require anything special (like copying or special headers).
 491  */
 492 
 493 unsigned short ip_compute_csum(unsigned char * buff, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
 494 {
 495         unsigned long sum = 0;
 496 
 497         /* Do the first multiple of 4 bytes and convert to 16 bits. */
 498         if (len > 3)
 499         {
 500                 __asm__("clc\n"
 501                 "1:\t"
 502                 "lodsl\n\t"
 503                 "adcl %%eax, %%ebx\n\t"
 504                 "loop 1b\n\t"
 505                 "adcl $0, %%ebx\n\t"
 506                 "movl %%ebx, %%eax\n\t"
 507                 "shrl $16, %%eax\n\t"
 508                 "addw %%ax, %%bx\n\t"
 509                 "adcw $0, %%bx"
 510                 : "=b" (sum) , "=S" (buff)
 511                 : "0" (sum), "c" (len >> 2) ,"1" (buff)
 512                 : "ax", "cx", "si", "bx" );
 513         }
 514         if (len & 2)
 515         {
 516                 __asm__("lodsw\n\t"
 517                 "addw %%ax, %%bx\n\t"
 518                 "adcw $0, %%bx"
 519                 : "=b" (sum), "=S" (buff)
 520                 : "0" (sum), "1" (buff)
 521                 : "bx", "ax", "si");
 522         }
 523         if (len & 1)
 524         {
 525                 __asm__("lodsb\n\t"
 526                 "movb $0, %%ah\n\t"
 527                 "addw %%ax, %%bx\n\t"
 528                 "adcw $0, %%bx"
 529                 : "=b" (sum), "=S" (buff)
 530                 : "0" (sum), "1" (buff)
 531                 : "bx", "ax", "si");
 532         }
 533         sum =~sum;
 534         return(sum & 0xffff);
 535 }
 536 
 537 /*
 538  *      Check the header of an incoming IP datagram.  This version is still used in slhc.c.
 539  */
 540 
 541 int ip_csum(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 542 {
 543         return ip_fast_csum((unsigned char *)iph, iph->ihl);
 544 }
 545 
 546 /*
 547  *      Generate a checksum for an outgoing IP datagram.
 548  */
 549 
 550 static void ip_send_check(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 551 {
 552         iph->check = 0;
 553         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 554 }
 555 
 556 /************************ Fragment Handlers From NET2E not yet with tweaks to beat 4K **********************************/
 557 
 558 
 559 /*
 560  *      This fragment handler is a bit of a heap. On the other hand it works quite
 561  *      happily and handles things quite well.
 562  */
 563 
 564 static struct ipq *ipqueue = NULL;              /* IP fragment queue    */
 565 
 566 /*
 567  *      Create a new fragment entry.
 568  */
 569 
 570 static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, unsigned char *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
 571 {
 572         struct ipfrag *fp;
 573 
 574         fp = (struct ipfrag *) kmalloc(sizeof(struct ipfrag), GFP_ATOMIC);
 575         if (fp == NULL)
 576         {
 577                 printk("IP: frag_create: no memory left !\n");
 578                 return(NULL);
 579         }
 580         memset(fp, 0, sizeof(struct ipfrag));
 581 
 582         /* Fill in the structure. */
 583         fp->offset = offset;
 584         fp->end = end;
 585         fp->len = end - offset;
 586         fp->skb = skb;
 587         fp->ptr = ptr;
 588 
 589         return(fp);
 590 }
 591 
 592 
 593 /*
 594  *      Find the correct entry in the "incomplete datagrams" queue for
 595  *      this IP datagram, and return the queue entry address if found.
 596  */
 597 
 598 static struct ipq *ip_find(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 599 {
 600         struct ipq *qp;
 601         struct ipq *qplast;
 602 
 603         cli();
 604         qplast = NULL;
 605         for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
 606         {
 607                 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
 608                         iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
 609                 {
 610                         del_timer(&qp->timer);  /* So it doesn't vanish on us. The timer will be reset anyway */
 611                         sti();
 612                         return(qp);
 613                 }
 614         }
 615         sti();
 616         return(NULL);
 617 }
 618 
 619 
 620 /*
 621  *      Remove an entry from the "incomplete datagrams" queue, either
 622  *      because we completed, reassembled and processed it, or because
 623  *      it timed out.
 624  */
 625 
 626 static void ip_free(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 627 {
 628         struct ipfrag *fp;
 629         struct ipfrag *xp;
 630 
 631         /*
 632          * Stop the timer for this entry.
 633          */
 634 
 635         del_timer(&qp->timer);
 636 
 637         /* Remove this entry from the "incomplete datagrams" queue. */
 638         cli();
 639         if (qp->prev == NULL)
 640         {
 641                 ipqueue = qp->next;
 642                 if (ipqueue != NULL)
 643                         ipqueue->prev = NULL;
 644         }
 645         else
 646         {
 647                 qp->prev->next = qp->next;
 648                 if (qp->next != NULL)
 649                         qp->next->prev = qp->prev;
 650         }
 651 
 652         /* Release all fragment data. */
 653 
 654         fp = qp->fragments;
 655         while (fp != NULL)
 656         {
 657                 xp = fp->next;
 658                 IS_SKB(fp->skb);
 659                 kfree_skb(fp->skb,FREE_READ);
 660                 kfree_s(fp, sizeof(struct ipfrag));
 661                 fp = xp;
 662         }
 663 
 664         /* Release the MAC header. */
 665         kfree_s(qp->mac, qp->maclen);
 666 
 667         /* Release the IP header. */
 668         kfree_s(qp->iph, qp->ihlen + 8);
 669 
 670         /* Finally, release the queue descriptor itself. */
 671         kfree_s(qp, sizeof(struct ipq));
 672         sti();
 673 }
 674 
 675 
 676 /*
 677  *      Oops- a fragment queue timed out.  Kill it and send an ICMP reply.
 678  */
 679 
 680 static void ip_expire(unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 681 {
 682         struct ipq *qp;
 683 
 684         qp = (struct ipq *)arg;
 685 
 686         /*
 687          *      Send an ICMP "Fragment Reassembly Timeout" message.
 688          */
 689 
 690         ip_statistics.IpReasmTimeout++;
 691         ip_statistics.IpReasmFails++;   
 692         /* This if is always true... shrug */
 693         if(qp->fragments!=NULL)
 694                 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
 695                                 ICMP_EXC_FRAGTIME, qp->dev);
 696 
 697         /*
 698          *      Nuke the fragment queue.
 699          */
 700         ip_free(qp);
 701 }
 702 
 703 
 704 /*
 705  *      Add an entry to the 'ipq' queue for a newly received IP datagram.
 706  *      We will (hopefully :-) receive all other fragments of this datagram
 707  *      in time, so we just create a queue for this datagram, in which we
 708  *      will insert the received fragments at their respective positions.
 709  */
 710 
 711 static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 712 {
 713         struct ipq *qp;
 714         int maclen;
 715         int ihlen;
 716 
 717         qp = (struct ipq *) kmalloc(sizeof(struct ipq), GFP_ATOMIC);
 718         if (qp == NULL)
 719         {
 720                 printk("IP: create: no memory left !\n");
 721                 return(NULL);
 722                 skb->dev = qp->dev;
 723         }
 724         memset(qp, 0, sizeof(struct ipq));
 725 
 726         /*
 727          *      Allocate memory for the MAC header.
 728          *
 729          *      FIXME: We have a maximum MAC address size limit and define
 730          *      elsewhere. We should use it here and avoid the 3 kmalloc() calls
 731          */
 732 
 733         maclen = ((unsigned long) iph) - ((unsigned long) skb->data);
 734         qp->mac = (unsigned char *) kmalloc(maclen, GFP_ATOMIC);
 735         if (qp->mac == NULL)
 736         {
 737                 printk("IP: create: no memory left !\n");
 738                 kfree_s(qp, sizeof(struct ipq));
 739                 return(NULL);
 740         }
 741 
 742         /*
 743          *      Allocate memory for the IP header (plus 8 octets for ICMP).
 744          */
 745 
 746         ihlen = (iph->ihl * sizeof(unsigned long));
 747         qp->iph = (struct iphdr *) kmalloc(ihlen + 8, GFP_ATOMIC);
 748         if (qp->iph == NULL)
 749         {
 750                 printk("IP: create: no memory left !\n");
 751                 kfree_s(qp->mac, maclen);
 752                 kfree_s(qp, sizeof(struct ipq));
 753                 return(NULL);
 754         }
 755 
 756         /* Fill in the structure. */
 757         memcpy(qp->mac, skb->data, maclen);
 758         memcpy(qp->iph, iph, ihlen + 8);
 759         qp->len = 0;
 760         qp->ihlen = ihlen;
 761         qp->maclen = maclen;
 762         qp->fragments = NULL;
 763         qp->dev = dev;
 764 
 765         /* Start a timer for this entry. */
 766         qp->timer.expires = IP_FRAG_TIME;               /* about 30 seconds     */
 767         qp->timer.data = (unsigned long) qp;            /* pointer to queue     */
 768         qp->timer.function = ip_expire;                 /* expire function      */
 769         add_timer(&qp->timer);
 770 
 771         /* Add this entry to the queue. */
 772         qp->prev = NULL;
 773         cli();
 774         qp->next = ipqueue;
 775         if (qp->next != NULL)
 776                 qp->next->prev = qp;
 777         ipqueue = qp;
 778         sti();
 779         return(qp);
 780 }
 781 
 782 
 783 /*
 784  *      See if a fragment queue is complete.
 785  */
 786 
 787 static int ip_done(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 788 {
 789         struct ipfrag *fp;
 790         int offset;
 791 
 792         /* Only possible if we received the final fragment. */
 793         if (qp->len == 0)
 794                 return(0);
 795 
 796         /* Check all fragment offsets to see if they connect. */
 797         fp = qp->fragments;
 798         offset = 0;
 799         while (fp != NULL)
 800         {
 801                 if (fp->offset > offset)
 802                         return(0);      /* fragment(s) missing */
 803                 offset = fp->end;
 804                 fp = fp->next;
 805         }
 806 
 807         /* All fragments are present. */
 808         return(1);
 809 }
 810 
 811 
 812 /*
 813  *      Build a new IP datagram from all its fragments.
 814  *
 815  *      FIXME: We copy here because we lack an effective way of handling lists
 816  *      of bits on input. Until the new skb data handling is in I'm not going
 817  *      to touch this with a bargepole. This also causes a 4Kish limit on
 818  *      packet sizes.
 819  */
 820 
 821 static struct sk_buff *ip_glue(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 822 {
 823         struct sk_buff *skb;
 824         struct iphdr *iph;
 825         struct ipfrag *fp;
 826         unsigned char *ptr;
 827         int count, len;
 828 
 829         /*
 830          *      Allocate a new buffer for the datagram.
 831          */
 832 
 833         len = qp->maclen + qp->ihlen + qp->len;
 834 
 835         if ((skb = alloc_skb(len,GFP_ATOMIC)) == NULL)
 836         {
 837                 ip_statistics.IpReasmFails++;
 838                 printk("IP: queue_glue: no memory for gluing queue 0x%X\n", (int) qp);
 839                 ip_free(qp);
 840                 return(NULL);
 841         }
 842 
 843         /* Fill in the basic details. */
 844         skb->len = (len - qp->maclen);
 845         skb->h.raw = skb->data;
 846         skb->free = 1;
 847 
 848         /* Copy the original MAC and IP headers into the new buffer. */
 849         ptr = (unsigned char *) skb->h.raw;
 850         memcpy(ptr, ((unsigned char *) qp->mac), qp->maclen);
 851         ptr += qp->maclen;
 852         memcpy(ptr, ((unsigned char *) qp->iph), qp->ihlen);
 853         ptr += qp->ihlen;
 854         skb->h.raw += qp->maclen;
 855 
 856         count = 0;
 857 
 858         /* Copy the data portions of all fragments into the new buffer. */
 859         fp = qp->fragments;
 860         while(fp != NULL)
 861         {
 862                 if(count+fp->len>skb->len)
 863                 {
 864                         printk("Invalid fragment list: Fragment over size.\n");
 865                         ip_free(qp);
 866                         kfree_skb(skb,FREE_WRITE);
 867                         ip_statistics.IpReasmFails++;
 868                         return NULL;
 869                 }
 870                 memcpy((ptr + fp->offset), fp->ptr, fp->len);
 871                 count += fp->len;
 872                 fp = fp->next;
 873         }
 874 
 875         /* We glued together all fragments, so remove the queue entry. */
 876         ip_free(qp);
 877 
 878         /* Done with all fragments. Fixup the new IP header. */
 879         iph = skb->h.iph;
 880         iph->frag_off = 0;
 881         iph->tot_len = htons((iph->ihl * sizeof(unsigned long)) + count);
 882         skb->ip_hdr = iph;
 883 
 884         ip_statistics.IpReasmOKs++;
 885         return(skb);
 886 }
 887 
 888 
 889 /*
 890  *      Process an incoming IP datagram fragment.
 891  */
 892 
 893 static struct sk_buff *ip_defrag(struct iphdr *iph, struct sk_buff *skb, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 894 {
 895         struct ipfrag *prev, *next;
 896         struct ipfrag *tfp;
 897         struct ipq *qp;
 898         struct sk_buff *skb2;
 899         unsigned char *ptr;
 900         int flags, offset;
 901         int i, ihl, end;
 902 
 903         ip_statistics.IpReasmReqds++;
 904 
 905         /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
 906         qp = ip_find(iph);
 907 
 908         /* Is this a non-fragmented datagram? */
 909         offset = ntohs(iph->frag_off);
 910         flags = offset & ~IP_OFFSET;
 911         offset &= IP_OFFSET;
 912         if (((flags & IP_MF) == 0) && (offset == 0))
 913         {
 914                 if (qp != NULL)
 915                         ip_free(qp);    /* Huh? How could this exist?? */
 916                 return(skb);
 917         }
 918 
 919         offset <<= 3;           /* offset is in 8-byte chunks */
 920 
 921         /*
 922          * If the queue already existed, keep restarting its timer as long
 923          * as we still are receiving fragments.  Otherwise, create a fresh
 924          * queue entry.
 925          */
 926 
 927         if (qp != NULL)
 928         {
 929                 del_timer(&qp->timer);
 930                 qp->timer.expires = IP_FRAG_TIME;       /* about 30 seconds */
 931                 qp->timer.data = (unsigned long) qp;    /* pointer to queue */
 932                 qp->timer.function = ip_expire;         /* expire function */
 933                 add_timer(&qp->timer);
 934         }
 935         else
 936         {
 937                 /*
 938                  *      If we failed to create it, then discard the frame
 939                  */
 940                 if ((qp = ip_create(skb, iph, dev)) == NULL)
 941                 {
 942                         skb->sk = NULL;
 943                         kfree_skb(skb, FREE_READ);
 944                         ip_statistics.IpReasmFails++;
 945                         return NULL;
 946                 }
 947         }
 948 
 949         /*
 950          *      Determine the position of this fragment.
 951          */
 952 
 953         ihl = (iph->ihl * sizeof(unsigned long));
 954         end = offset + ntohs(iph->tot_len) - ihl;
 955 
 956         /*
 957          *      Point into the IP datagram 'data' part.
 958          */
 959 
 960         ptr = skb->data + dev->hard_header_len + ihl;
 961 
 962         /*
 963          *      Is this the final fragment?
 964          */
 965 
 966         if ((flags & IP_MF) == 0)
 967                 qp->len = end;
 968 
 969         /*
 970          *      Find out which fragments are in front and at the back of us
 971          *      in the chain of fragments so far.  We must know where to put
 972          *      this fragment, right?
 973          */
 974 
 975         prev = NULL;
 976         for(next = qp->fragments; next != NULL; next = next->next)
 977         {
 978                 if (next->offset > offset)
 979                         break;  /* bingo! */
 980                 prev = next;
 981         }
 982 
 983         /*
 984          *      We found where to put this one.
 985          *      Check for overlap with preceding fragment, and, if needed,
 986          *      align things so that any overlaps are eliminated.
 987          */
 988         if (prev != NULL && offset < prev->end)
 989         {
 990                 i = prev->end - offset;
 991                 offset += i;    /* ptr into datagram */
 992                 ptr += i;       /* ptr into fragment data */
 993         }
 994 
 995         /*
 996          * Look for overlap with succeeding segments.
 997          * If we can merge fragments, do it.
 998          */
 999 
1000         for(; next != NULL; next = tfp)
1001         {
1002                 tfp = next->next;
1003                 if (next->offset >= end)
1004                         break;          /* no overlaps at all */
1005 
1006                 i = end - next->offset;                 /* overlap is 'i' bytes */
1007                 next->len -= i;                         /* so reduce size of    */
1008                 next->offset += i;                      /* next fragment        */
1009                 next->ptr += i;
1010 
1011                 /*
1012                  *      If we get a frag size of <= 0, remove it and the packet
1013                  *      that it goes with.
1014                  */
1015                 if (next->len <= 0)
1016                 {
1017                         if (next->prev != NULL)
1018                                 next->prev->next = next->next;
1019                         else
1020                                 qp->fragments = next->next;
1021 
1022                         if (tfp->next != NULL)
1023                                 next->next->prev = next->prev;
1024 
1025                         kfree_skb(next->skb,FREE_READ);
1026                         kfree_s(next, sizeof(struct ipfrag));
1027                 }
1028         }
1029 
1030         /*
1031          *      Insert this fragment in the chain of fragments.
1032          */
1033 
1034         tfp = NULL;
1035         tfp = ip_frag_create(offset, end, skb, ptr);
1036 
1037         /*
1038          *      No memory to save the fragment - so throw the lot
1039          */
1040 
1041         if (!tfp)
1042         {
1043                 skb->sk = NULL;
1044                 kfree_skb(skb, FREE_READ);
1045                 return NULL;
1046         }
1047         tfp->prev = prev;
1048         tfp->next = next;
1049         if (prev != NULL)
1050                 prev->next = tfp;
1051         else
1052                 qp->fragments = tfp;
1053 
1054         if (next != NULL)
1055                 next->prev = tfp;
1056 
1057         /*
1058          *      OK, so we inserted this new fragment into the chain.
1059          *      Check if we now have a full IP datagram which we can
1060          *      bump up to the IP layer...
1061          */
1062 
1063         if (ip_done(qp))
1064         {
1065                 skb2 = ip_glue(qp);             /* glue together the fragments */
1066                 return(skb2);
1067         }
1068         return(NULL);
1069 }
1070 
1071 
1072 /*
1073  *      This IP datagram is too large to be sent in one piece.  Break it up into
1074  *      smaller pieces (each of size equal to the MAC header plus IP header plus
1075  *      a block of the data of the original IP data part) that will yet fit in a
1076  *      single device frame, and queue such a frame for sending by calling the
1077  *      ip_queue_xmit().  Note that this is recursion, and bad things will happen
1078  *      if this function causes a loop...
1079  *
1080  *      Yes this is inefficient, feel free to submit a quicker one.
1081  *
1082  *      **Protocol Violation**
1083  *      We copy all the options to each fragment. !FIXME!
1084  */
1085 void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int is_frag)
     /* [previous][next][first][last][top][bottom][index][help] */
1086 {
1087         struct iphdr *iph;
1088         unsigned char *raw;
1089         unsigned char *ptr;
1090         struct sk_buff *skb2;
1091         int left, mtu, hlen, len;
1092         int offset;
1093         unsigned long flags;
1094 
1095         /*
1096          *      Point into the IP datagram header.
1097          */
1098 
1099         raw = skb->data;
1100         iph = (struct iphdr *) (raw + dev->hard_header_len);
1101 
1102         skb->ip_hdr = iph;
1103 
1104         /*
1105          *      Setup starting values.
1106          */
1107 
1108         hlen = (iph->ihl * sizeof(unsigned long));
1109         left = ntohs(iph->tot_len) - hlen;      /* Space per frame */
1110         hlen += dev->hard_header_len;           /* Total header size */
1111         mtu = (dev->mtu - hlen);                /* Size of data space */
1112         ptr = (raw + hlen);                     /* Where to start from */
1113 
1114         /*
1115          *      Check for any "DF" flag. [DF means do not fragment]
1116          */
1117 
1118         if (ntohs(iph->frag_off) & IP_DF)
1119         {
1120                 ip_statistics.IpFragFails++;
1121                 icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev);
1122                 return;
1123         }
1124 
1125         /*
1126          *      The protocol doesn't seem to say what to do in the case that the
1127          *      frame + options doesn't fit the mtu. As it used to fall down dead
1128          *      in this case we were fortunate it didn't happen
1129          */
1130 
1131         if(mtu<8)
1132         {
1133                 /* It's wrong but its better than nothing */
1134                 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev);
1135                 ip_statistics.IpFragFails++;
1136                 return;
1137         }
1138 
1139         /*
1140          *      Fragment the datagram.
1141          */
1142 
1143         /*
1144          *      The initial offset is 0 for a complete frame. When
1145          *      fragmenting fragments its wherever this one starts.
1146          */
1147 
1148         if (is_frag & 2)
1149                 offset = (ntohs(iph->frag_off) & 0x1fff) << 3;
1150         else
1151                 offset = 0;
1152 
1153 
1154         /*
1155          *      Keep copying data until we run out.
1156          */
1157 
1158         while(left > 0)
1159         {
1160                 len = left;
1161                 /* IF: it doesn't fit, use 'mtu' - the data space left */
1162                 if (len > mtu)
1163                         len = mtu;
1164                 /* IF: we are not sending upto and including the packet end
1165                    then align the next start on an eight byte boundary */
1166                 if (len < left)
1167                 {
1168                         len/=8;
1169                         len*=8;
1170                 }
1171                 /*
1172                  *      Allocate buffer.
1173                  */
1174 
1175                 if ((skb2 = alloc_skb(len + hlen,GFP_ATOMIC)) == NULL)
1176                 {
1177                         printk("IP: frag: no memory for new fragment!\n");
1178                         ip_statistics.IpFragFails++;
1179                         return;
1180                 }
1181 
1182                 /*
1183                  *      Set up data on packet
1184                  */
1185 
1186                 skb2->arp = skb->arp;
1187                 if(skb->free==0)
1188                         printk("IP fragmenter: BUG free!=1 in fragmenter\n");
1189                 skb2->free = 1;
1190                 skb2->len = len + hlen;
1191                 skb2->h.raw=(char *) skb2->data;
1192                 /*
1193                  *      Charge the memory for the fragment to any owner
1194                  *      it might possess
1195                  */
1196 
1197                 save_flags(flags);
1198                 if (sk)
1199                 {
1200                         cli();
1201                         sk->wmem_alloc += skb2->mem_len;
1202                         skb2->sk=sk;
1203                 }
1204                 restore_flags(flags);
1205                 skb2->raddr = skb->raddr;       /* For rebuild_header - must be here */
1206 
1207                 /*
1208                  *      Copy the packet header into the new buffer.
1209                  */
1210 
1211                 memcpy(skb2->h.raw, raw, hlen);
1212 
1213                 /*
1214                  *      Copy a block of the IP datagram.
1215                  */
1216                 memcpy(skb2->h.raw + hlen, ptr, len);
1217                 left -= len;
1218 
1219                 skb2->h.raw+=dev->hard_header_len;
1220 
1221                 /*
1222                  *      Fill in the new header fields.
1223                  */
1224                 iph = (struct iphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
1225                 iph->frag_off = htons((offset >> 3));
1226                 /*
1227                  *      Added AC : If we are fragmenting a fragment thats not the
1228                  *                 last fragment then keep MF on each bit
1229                  */
1230                 if (left > 0 || (is_frag & 1))
1231                         iph->frag_off |= htons(IP_MF);
1232                 ptr += len;
1233                 offset += len;
1234 
1235                 /*
1236                  *      Put this fragment into the sending queue.
1237                  */
1238 
1239                 ip_statistics.IpFragCreates++;
1240 
1241                 ip_queue_xmit(sk, dev, skb2, 2);
1242         }
1243         ip_statistics.IpFragOKs++;
1244 }
1245 
1246 
1247 
1248 #ifdef CONFIG_IP_FORWARD
1249 
1250 /*
1251  *      Forward an IP datagram to its next destination.
1252  */
1253 
1254 static void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag)
     /* [previous][next][first][last][top][bottom][index][help] */
1255 {
1256         struct device *dev2;    /* Output device */
1257         struct iphdr *iph;      /* Our header */
1258         struct sk_buff *skb2;   /* Output packet */
1259         struct rtable *rt;      /* Route we use */
1260         unsigned char *ptr;     /* Data pointer */
1261         unsigned long raddr;    /* Router IP address */
1262 
1263         /*
1264          *      According to the RFC, we must first decrease the TTL field. If
1265          *      that reaches zero, we must reply an ICMP control message telling
1266          *      that the packet's lifetime expired.
1267          *
1268          *      Exception:
1269          *      We may not generate an ICMP for an ICMP. icmp_send does the
1270          *      enforcement of this so we can forget it here. It is however
1271          *      sometimes VERY important.
1272          */
1273 
1274         iph = skb->h.iph;
1275         iph->ttl--;
1276         if (iph->ttl <= 0)
1277         {
1278                 /* Tell the sender its packet died... */
1279                 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, dev);
1280                 return;
1281         }
1282 
1283         /*
1284          *      Re-compute the IP header checksum.
1285          *      This is inefficient. We know what has happened to the header
1286          *      and could thus adjust the checksum as Phil Karn does in KA9Q
1287          */
1288 
1289         ip_send_check(iph);
1290 
1291         /*
1292          * OK, the packet is still valid.  Fetch its destination address,
1293          * and give it to the IP sender for further processing.
1294          */
1295 
1296         rt = ip_rt_route(iph->daddr, NULL, NULL);
1297         if (rt == NULL)
1298         {
1299                 /*
1300                  *      Tell the sender its packet cannot be delivered. Again
1301                  *      ICMP is screened later.
1302                  */
1303                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, dev);
1304                 return;
1305         }
1306 
1307 
1308         /*
1309          * Gosh.  Not only is the packet valid; we even know how to
1310          * forward it onto its final destination.  Can we say this
1311          * is being plain lucky?
1312          * If the router told us that there is no GW, use the dest.
1313          * IP address itself- we seem to be connected directly...
1314          */
1315 
1316         raddr = rt->rt_gateway;
1317 
1318         if (raddr != 0)
1319         {
1320                 /*
1321                  *      There is a gateway so find the correct route for it.
1322                  *      Gateways cannot in turn be gatewayed.
1323                  */
1324                 rt = ip_rt_route(raddr, NULL, NULL);
1325                 if (rt == NULL)
1326                 {
1327                         /*
1328                          *      Tell the sender its packet cannot be delivered...
1329                          */
1330                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, dev);
1331                         return;
1332                 }
1333                 if (rt->rt_gateway != 0)
1334                         raddr = rt->rt_gateway;
1335         }
1336         else
1337                 raddr = iph->daddr;
1338 
1339         /*
1340          *      Having picked a route we can now send the frame out.
1341          */
1342 
1343         dev2 = rt->rt_dev;
1344 
1345         /*
1346          *      In IP you never forward a frame on the interface that it arrived
1347          *      upon. We should generate an ICMP HOST REDIRECT giving the route
1348          *      we calculated.
1349          *      For now just dropping the packet is an acceptable compromise.
1350          */
1351 
1352         if (dev == dev2)
1353                 return;
1354 
1355         /*
1356          * We now allocate a new buffer, and copy the datagram into it.
1357          * If the indicated interface is up and running, kick it.
1358          */
1359 
1360         if (dev2->flags & IFF_UP)
1361         {
1362 
1363                 /*
1364                  *      Current design decrees we copy the packet. For identical header
1365                  *      lengths we could avoid it. The new skb code will let us push
1366                  *      data so the problem goes away then.
1367                  */
1368 
1369                 skb2 = alloc_skb(dev2->hard_header_len + skb->len, GFP_ATOMIC);
1370                 /*
1371                  *      This is rare and since IP is tolerant of network failures
1372                  *      quite harmless.
1373                  */
1374                 if (skb2 == NULL)
1375                 {
1376                         printk("\nIP: No memory available for IP forward\n");
1377                         return;
1378                 }
1379                 ptr = skb2->data;
1380                 skb2->free = 1;
1381                 skb2->len = skb->len + dev2->hard_header_len;
1382                 skb2->h.raw = ptr;
1383 
1384                 /*
1385                  *      Copy the packet data into the new buffer.
1386                  */
1387                 memcpy(ptr + dev2->hard_header_len, skb->h.raw, skb->len);
1388 
1389                 /* Now build the MAC header. */
1390                 (void) ip_send(skb2, raddr, skb->len, dev2, dev2->pa_addr);
1391 
1392                 ip_statistics.IpForwDatagrams++;
1393 
1394                 /*
1395                  *      See if it needs fragmenting. Note in ip_rcv we tagged
1396                  *      the fragment type. This must be right so that
1397                  *      the fragmenter does the right thing.
1398                  */
1399 
1400                 if(skb2->len > dev2->mtu + dev2->hard_header_len)
1401                 {
1402                         ip_fragment(NULL,skb2,dev2, is_frag);
1403                         kfree_skb(skb2,FREE_WRITE);
1404                 }
1405                 else
1406                 {
1407                         /*
1408                          *      Map service types to priority. We lie about
1409                          *      throughput being low priority, but its a good
1410                          *      choice to help improve general usage.
1411                          */
1412                         if(iph->tos & IPTOS_LOWDELAY)
1413                                 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
1414                         else if(iph->tos & IPTOS_THROUGHPUT)
1415                                 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
1416                         else
1417                                 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
1418                 }
1419         }
1420 }
1421 
1422 
1423 #endif
1424 
1425 /*
1426  *      This function receives all incoming IP datagrams.
1427  */
1428 
1429 int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
     /* [previous][next][first][last][top][bottom][index][help] */
1430 {
1431         struct iphdr *iph = skb->h.iph;
1432         unsigned char hash;
1433         unsigned char flag = 0;
1434         unsigned char opts_p = 0;       /* Set iff the packet has options. */
1435         struct inet_protocol *ipprot;
1436         static struct options opt; /* since we don't use these yet, and they
1437                                 take up stack space. */
1438         int brd=IS_MYADDR;
1439         int is_frag=0;
1440 
1441 
1442         ip_statistics.IpInReceives++;
1443 
1444         /*
1445          *      Tag the ip header of this packet so we can find it
1446          */
1447 
1448         skb->ip_hdr = iph;
1449 
1450         /*
1451          *      Is the datagram acceptable?
1452          *
1453          *      1.      Length at least the size of an ip header
1454          *      2.      Version of 4
1455          *      3.      Checksums correctly. [Speed optimisation for later, skip loopback checksums]
1456          *      (4.     We ought to check for IP multicast addresses and undefined types.. does this matter ?)
1457          */
1458 
1459         if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0)
1460         {
1461                 ip_statistics.IpInHdrErrors++;
1462                 kfree_skb(skb, FREE_WRITE);
1463                 return(0);
1464         }
1465 
1466         /*
1467          *      Our transport medium may have padded the buffer out. Now we know it
1468          *      is IP we can trim to the true length of the frame.
1469          */
1470 
1471         skb->len=ntohs(iph->tot_len);
1472 
1473         /*
1474          *      Next analyse the packet for options. Studies show under one packet in
1475          *      a thousand have options....
1476          */
1477 
1478         if (iph->ihl != 5)
1479         {       /* Fast path for the typical optionless IP packet. */
1480                 memset((char *) &opt, 0, sizeof(opt));
1481                 if (do_options(iph, &opt) != 0)
1482                         return 0;
1483                 opts_p = 1;
1484         }
1485 
1486         /*
1487          *      Remember if the frame is fragmented.
1488          */
1489 
1490         if (iph->frag_off & 0x0020)
1491                 is_frag|=1;
1492 
1493         /*
1494          *      Last fragment ?
1495          */
1496 
1497         if (ntohs(iph->frag_off) & 0x1fff)
1498                 is_frag|=2;
1499 
1500         /*
1501          *      Do any IP forwarding required.  chk_addr() is expensive -- avoid it someday.
1502          *
1503          *      This is inefficient. While finding out if it is for us we could also compute
1504          *      the routing table entry. This is where the great unified cache theory comes
1505          *      in as and when someone implements it
1506          *
1507          *      For most hosts over 99% of packets match the first conditional
1508          *      and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at
1509          *      function entry.
1510          */
1511 
1512         if ( iph->daddr != skb->dev->pa_addr && (brd = ip_chk_addr(iph->daddr)) == 0)
1513         {
1514                 /*
1515                  *      Don't forward multicast or broadcast frames.
1516                  */
1517 
1518                 if(skb->pkt_type!=PACKET_HOST)
1519                 {
1520                         kfree_skb(skb,FREE_WRITE);
1521                         return 0;
1522                 }
1523 
1524                 /*
1525                  *      The packet is for another target. Forward the frame
1526                  */
1527 
1528 #ifdef CONFIG_IP_FORWARD
1529                 ip_forward(skb, dev, is_frag);
1530 #else
1531 /*              printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n",
1532                         iph->saddr,iph->daddr);*/
1533                 ip_statistics.IpInAddrErrors++;
1534 #endif
1535                 /*
1536                  *      The forwarder is inefficient and copies the packet. We
1537                  *      free the original now.
1538                  */
1539 
1540                 kfree_skb(skb, FREE_WRITE);
1541                 return(0);
1542         }
1543 
1544         /*
1545          * Reassemble IP fragments.
1546          */
1547 
1548         if(is_frag)
1549         {
1550                 /* Defragment. Obtain the complete packet if there is one */
1551                 skb=ip_defrag(iph,skb,dev);
1552                 if(skb==NULL)
1553                         return 0;
1554                 iph=skb->h.iph;
1555         }
1556 
1557         /*
1558          *      Point into the IP datagram, just past the header.
1559          */
1560 
1561         skb->ip_hdr = iph;
1562         skb->h.raw += iph->ihl*4;
1563 
1564         /*
1565          *      skb->h.raw now points at the protocol beyond the IP header.
1566          */
1567 
1568         hash = iph->protocol & (MAX_INET_PROTOS -1);
1569         for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next)
1570         {
1571                 struct sk_buff *skb2;
1572 
1573                 if (ipprot->protocol != iph->protocol)
1574                         continue;
1575        /*
1576         *       See if we need to make a copy of it.  This will
1577         *       only be set if more than one protocol wants it.
1578         *       and then not for the last one.
1579         *
1580         *       This is an artifact of poor upper protocol design.
1581         *       Because the upper protocols damage the actual packet
1582         *       we must do copying. In actual fact it's even worse
1583         *       than this as TCP may hold on to the buffer.
1584         */
1585                 if (ipprot->copy)
1586                 {
1587                         skb2 = skb_clone(skb, GFP_ATOMIC);
1588                         if(skb2==NULL)
1589                                 continue;
1590                 }
1591                 else
1592                 {
1593                         skb2 = skb;
1594                 }
1595                 flag = 1;
1596 
1597                /*
1598                 * Pass on the datagram to each protocol that wants it,
1599                 * based on the datagram protocol.  We should really
1600                 * check the protocol handler's return values here...
1601                 */
1602                 ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr,
1603                                 (ntohs(iph->tot_len) - (iph->ihl * 4)),
1604                                 iph->saddr, 0, ipprot);
1605 
1606         }
1607 
1608         /*
1609          * All protocols checked.
1610          * If this packet was a broadcast, we may *not* reply to it, since that
1611          * causes (proven, grin) ARP storms and a leakage of memory (i.e. all
1612          * ICMP reply messages get queued up for transmission...)
1613          */
1614 
1615         if (!flag)
1616         {
1617                 if (brd != IS_BROADCAST)
1618                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
1619                 kfree_skb(skb, FREE_WRITE);
1620         }
1621 
1622         return(0);
1623 }
1624 
1625 
1626 /*
1627  * Queues a packet to be sent, and starts the transmitter
1628  * if necessary.  if free = 1 then we free the block after
1629  * transmit, otherwise we don't. If free==2 we not only
1630  * free the block but also don't assign a new ip seq number.
1631  * This routine also needs to put in the total length,
1632  * and compute the checksum
1633  */
1634 
1635 void ip_queue_xmit(struct sock *sk, struct device *dev,
     /* [previous][next][first][last][top][bottom][index][help] */
1636               struct sk_buff *skb, int free)
1637 {
1638         struct iphdr *iph;
1639         unsigned char *ptr;
1640 
1641         /* Sanity check */
1642         if (dev == NULL)
1643         {
1644                 printk("IP: ip_queue_xmit dev = NULL\n");
1645                 return;
1646         }
1647 
1648         IS_SKB(skb);
1649 
1650         /*
1651          *      Do some book-keeping in the packet for later
1652          */
1653 
1654 
1655         skb->dev = dev;
1656         skb->when = jiffies;
1657 
1658         /*
1659          *      Find the IP header and set the length. This is bad
1660          *      but once we get the skb data handling code in the
1661          *      hardware will push its header sensibly and we will
1662          *      set skb->ip_hdr to avoid this mess and the fixed
1663          *      header length problem
1664          */
1665 
1666         ptr = skb->data;
1667         ptr += dev->hard_header_len;
1668         iph = (struct iphdr *)ptr;
1669         skb->ip_hdr = iph;
1670         iph->tot_len = ntohs(skb->len-dev->hard_header_len);
1671 
1672         /*
1673          *      No reassigning numbers to fragments...
1674          */
1675 
1676         if(free!=2)
1677                 iph->id      = htons(ip_id_count++);
1678         else
1679                 free=1;
1680 
1681         /* All buffers without an owner socket get freed */
1682         if (sk == NULL)
1683                 free = 1;
1684 
1685         skb->free = free;
1686 
1687         /*
1688          *      Do we need to fragment. Again this is inefficient.
1689          *      We need to somehow lock the original buffer and use
1690          *      bits of it.
1691          */
1692 
1693         if(skb->len > dev->mtu + dev->hard_header_len)
1694         {
1695                 ip_fragment(sk,skb,dev,0);
1696                 IS_SKB(skb);
1697                 kfree_skb(skb,FREE_WRITE);
1698                 return;
1699         }
1700 
1701         /*
1702          *      Add an IP checksum
1703          */
1704 
1705         ip_send_check(iph);
1706 
1707         /*
1708          *      Print the frame when debugging
1709          */
1710 
1711         /*
1712          *      More debugging. You cannot queue a packet already on a list
1713          *      Spot this and moan loudly.
1714          */
1715         if (skb->next != NULL)
1716         {
1717                 printk("ip_queue_xmit: next != NULL\n");
1718                 skb_unlink(skb);
1719         }
1720 
1721         /*
1722          *      If a sender wishes the packet to remain unfreed
1723          *      we add it to his send queue. This arguably belongs
1724          *      in the TCP level since nobody else uses it. BUT
1725          *      remember IPng might change all the rules.
1726          */
1727 
1728         if (!free)
1729         {
1730                 unsigned long flags;
1731                 /* The socket now has more outstanding blocks */
1732 
1733                 sk->packets_out++;
1734 
1735                 /* Protect the list for a moment */
1736                 save_flags(flags);
1737                 cli();
1738 
1739                 if (skb->link3 != NULL)
1740                 {
1741                         printk("ip.c: link3 != NULL\n");
1742                         skb->link3 = NULL;
1743                 }
1744                 if (sk->send_head == NULL)
1745                 {
1746                         sk->send_tail = skb;
1747                         sk->send_head = skb;
1748                 }
1749                 else
1750                 {
1751                         sk->send_tail->link3 = skb;
1752                         sk->send_tail = skb;
1753                 }
1754                 /* skb->link3 is NULL */
1755 
1756                 /* Interrupt restore */
1757                 restore_flags(flags);
1758                 /* Set the IP write timeout to the round trip time for the packet.
1759                    If an acknowledge has not arrived by then we may wish to act */
1760                 reset_timer(sk, TIME_WRITE, sk->rto);
1761         }
1762         else
1763                 /* Remember who owns the buffer */
1764                 skb->sk = sk;
1765 
1766         /*
1767          *      If the indicated interface is up and running, send the packet.
1768          */
1769         ip_statistics.IpOutRequests++;
1770 
1771         if (dev->flags & IFF_UP)
1772         {
1773                 /*
1774                  *      If we have an owner use its priority setting,
1775                  *      otherwise use NORMAL
1776                  */
1777 
1778                 if (sk != NULL)
1779                 {
1780                         dev_queue_xmit(skb, dev, sk->priority);
1781                 }
1782                 else
1783                 {
1784                         dev_queue_xmit(skb, dev, SOPRI_NORMAL);
1785                 }
1786         }
1787         else
1788         {
1789                 ip_statistics.IpOutDiscards++;
1790                 if (free)
1791                         kfree_skb(skb, FREE_WRITE);
1792         }
1793 }
1794 
1795 
1796 /*
1797  *      A socket has timed out on its send queue and wants to do a
1798  *      little retransmitting. Currently this means TCP.
1799  */
1800 
1801 void ip_do_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
1802 {
1803         struct sk_buff * skb;
1804         struct proto *prot;
1805         struct device *dev;
1806         int retransmits;
1807 
1808         prot = sk->prot;
1809         skb = sk->send_head;
1810         retransmits = sk->retransmits;
1811 
1812         while (skb != NULL)
1813         {
1814                 dev = skb->dev;
1815                 IS_SKB(skb);
1816                 skb->when = jiffies;
1817 
1818                 /*
1819                  * In general it's OK just to use the old packet.  However we
1820                  * need to use the current ack and window fields.  Urg and
1821                  * urg_ptr could possibly stand to be updated as well, but we
1822                  * don't keep the necessary data.  That shouldn't be a problem,
1823                  * if the other end is doing the right thing.  Since we're
1824                  * changing the packet, we have to issue a new IP identifier.
1825                  */
1826 
1827                 /* this check may be unnecessary - retransmit only for TCP */
1828                 if (sk->protocol == IPPROTO_TCP) {
1829                   struct tcphdr *th;
1830                   struct iphdr *iph;
1831                   int size;
1832 
1833                   iph = (struct iphdr *)(skb->data + dev->hard_header_len);
1834                   th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2));
1835                   size = skb->len - (((unsigned char *) th) - skb->data);
1836 
1837                   iph->id = htons(ip_id_count++);
1838                   ip_send_check(iph);
1839 
1840                   th->ack_seq = ntohl(sk->acked_seq);
1841                   th->window = ntohs(tcp_select_window(sk));
1842                   tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
1843                 }
1844 
1845                 /*
1846                  *      If the interface is (still) up and running, kick it.
1847                  */
1848 
1849                 if (dev->flags & IFF_UP)
1850                 {
1851                         /*
1852                          *      If the packet is still being sent by the device/protocol
1853                          *      below then don't retransmit. This is both needed, and good -
1854                          *      especially with connected mode AX.25 where it stops resends
1855                          *      occuring of an as yet unsent anyway frame!
1856                          *      We still add up the counts as the round trip time wants
1857                          *      adjusting.
1858                          */
1859                         if (sk && !skb_device_locked(skb))
1860                         {
1861                                 /* Remove it from any existing driver queue first! */
1862                                 skb_unlink(skb);
1863                                 /* Now queue it */
1864                                 ip_statistics.IpOutRequests++;
1865                                 dev_queue_xmit(skb, dev, sk->priority);
1866                         }
1867                 }
1868 
1869                 /*
1870                  *      Count retransmissions
1871                  */
1872                 retransmits++;
1873                 sk->prot->retransmits ++;
1874 
1875                 /*
1876                  *      Only one retransmit requested.
1877                  */
1878                 if (!all)
1879                         break;
1880 
1881                 /*
1882                  *      This should cut it off before we send too many packets.
1883                  */
1884                 if (sk->retransmits > sk->cong_window)
1885                         break;
1886                 skb = skb->link3;
1887         }
1888 }
1889 
1890 /*
1891  *      This is the normal code called for timeouts.  It does the retransmission
1892  *      and then does backoff.  ip_do_retransmit is separated out because
1893  *      tcp_ack needs to send stuff from the retransmit queue without
1894  *      initiating a backoff.
1895  */
1896 
1897 void ip_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
1898 {
1899         ip_do_retransmit(sk, all);
1900 
1901         /*
1902          * Increase the timeout each time we retransmit.  Note that
1903          * we do not increase the rtt estimate.  rto is initialized
1904          * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
1905          * that doubling rto each time is the least we can get away with.
1906          * In KA9Q, Karn uses this for the first few times, and then
1907          * goes to quadratic.  netBSD doubles, but only goes up to *64,
1908          * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
1909          * defined in the protocol as the maximum possible RTT.  I guess
1910          * we'll have to use something other than TCP to talk to the
1911          * University of Mars.
1912          */
1913 
1914         sk->retransmits++;
1915         sk->backoff++;
1916         sk->rto = min(sk->rto << 1, 120*HZ);
1917         reset_timer(sk, TIME_WRITE, sk->rto);
1918 }
1919 
1920 /*
1921  *      Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
1922  *      an IP socket.
1923  *
1924  *      We implement IP_TOS (type of service), IP_TTL (time to live).
1925  *
1926  *      Next release we will sort out IP_OPTIONS since for some people are kind of important.
1927  */
1928 
1929 int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
1930 {
1931         int val,err;
1932 
1933         if (optval == NULL)
1934                 return(-EINVAL);
1935 
1936         err=verify_area(VERIFY_READ, optval, sizeof(int));
1937         if(err)
1938                 return err;
1939 
1940         val = get_fs_long((unsigned long *)optval);
1941 
1942         if(level!=SOL_IP)
1943                 return -EOPNOTSUPP;
1944 
1945         switch(optname)
1946         {
1947                 case IP_TOS:
1948                         if(val<0||val>255)
1949                                 return -EINVAL;
1950                         sk->ip_tos=val;
1951                         if(val==IPTOS_LOWDELAY)
1952                                 sk->priority=SOPRI_INTERACTIVE;
1953                         if(val==IPTOS_THROUGHPUT)
1954                                 sk->priority=SOPRI_BACKGROUND;
1955                         return 0;
1956                 case IP_TTL:
1957                         if(val<1||val>255)
1958                                 return -EINVAL;
1959                         sk->ip_ttl=val;
1960                         return 0;
1961                 /* IP_OPTIONS and friends go here eventually */
1962                 default:
1963                         return(-ENOPROTOOPT);
1964         }
1965 }
1966 
1967 /*
1968  *      Get the options. Note for future reference. The GET of IP options gets the
1969  *      _received_ ones. The set sets the _sent_ ones.
1970  */
1971 
1972 int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
1973 {
1974         int val,err;
1975 
1976         if(level!=SOL_IP)
1977                 return -EOPNOTSUPP;
1978 
1979         switch(optname)
1980         {
1981                 case IP_TOS:
1982                         val=sk->ip_tos;
1983                         break;
1984                 case IP_TTL:
1985                         val=sk->ip_ttl;
1986                         break;
1987                 default:
1988                         return(-ENOPROTOOPT);
1989         }
1990         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
1991         if(err)
1992                 return err;
1993         put_fs_long(sizeof(int),(unsigned long *) optlen);
1994 
1995         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
1996         if(err)
1997                 return err;
1998         put_fs_long(val,(unsigned long *)optval);
1999 
2000         return(0);
2001 }
2002 
2003 /*
2004  *      IP protocol layer initialiser
2005  */
2006 
2007 static struct packet_type ip_packet_type =
2008 {
2009         0,      /* MUTTER ntohs(ETH_P_IP),*/
2010         0,              /* copy */
2011         ip_rcv,
2012         NULL,
2013         NULL,
2014 };
2015 
2016 
2017 /*
2018  *      IP registers the packet type and then calls the subprotocol initialisers
2019  */
2020 
2021 void ip_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
2022 {
2023         ip_packet_type.type=htons(ETH_P_IP);
2024         dev_add_pack(&ip_packet_type);
2025 /*      ip_raw_init();
2026         ip_packet_init();
2027         ip_tcp_init();
2028         ip_udp_init();*/
2029 }

/* [previous][next][first][last][top][bottom][index][help] */