root/net/ipv4/ip_fragment.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip_frag_create
  2. ip_find
  3. ip_free
  4. ip_expire
  5. ip_create
  6. ip_done
  7. ip_glue
  8. ip_defrag
  9. ip_fragment

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              The IP fragmentation functionality.
   7  *              
   8  * Authors:     Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
   9  *              Alan Cox <Alan.Cox@linux.org>
  10  *
  11  * Fixes:
  12  *              Alan Cox        :       Split from ip.c , see ip_input.c for history.
  13  */
  14 
  15 #include <linux/config.h>
  16 #include <linux/types.h>
  17 #include <linux/mm.h>
  18 #include <linux/sched.h>
  19 #include <linux/skbuff.h>
  20 #include <linux/ip.h>
  21 #include <linux/icmp.h>
  22 #include <net/sock.h>
  23 #include <net/ip.h>
  24 #include <net/icmp.h>
  25 #include <linux/tcp.h>
  26 #include <linux/udp.h>
  27 #include <linux/firewall.h>
  28 #include <linux/ip_fw.h>
  29 #include <net/checksum.h>
  30 
  31 /*
  32  *      This fragment handler is a bit of a heap. On the other hand it works quite
  33  *      happily and handles things quite well.
  34  */
  35 
  36 static struct ipq *ipqueue = NULL;              /* IP fragment queue    */
  37 
  38 /*
  39  *      Create a new fragment entry.
  40  */
  41 
  42 static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, unsigned char *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
  43 {
  44         struct ipfrag *fp;
  45 
  46         fp = (struct ipfrag *) kmalloc(sizeof(struct ipfrag), GFP_ATOMIC);
  47         if (fp == NULL)
  48         {
  49                 NETDEBUG(printk("IP: frag_create: no memory left !\n"));
  50                 return(NULL);
  51         }
  52         memset(fp, 0, sizeof(struct ipfrag));
  53 
  54         /* Fill in the structure. */
  55         fp->offset = offset;
  56         fp->end = end;
  57         fp->len = end - offset;
  58         fp->skb = skb;
  59         fp->ptr = ptr;
  60 
  61         return(fp);
  62 }
  63 
  64 
  65 /*
  66  *      Find the correct entry in the "incomplete datagrams" queue for
  67  *      this IP datagram, and return the queue entry address if found.
  68  */
  69 
  70 static struct ipq *ip_find(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
  71 {
  72         struct ipq *qp;
  73         struct ipq *qplast;
  74 
  75         cli();
  76         qplast = NULL;
  77         for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
  78         {
  79                 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
  80                         iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
  81                 {
  82                         del_timer(&qp->timer);  /* So it doesn't vanish on us. The timer will be reset anyway */
  83                         sti();
  84                         return(qp);
  85                 }
  86         }
  87         sti();
  88         return(NULL);
  89 }
  90 
  91 
  92 /*
  93  *      Remove an entry from the "incomplete datagrams" queue, either
  94  *      because we completed, reassembled and processed it, or because
  95  *      it timed out.
  96  */
  97 
  98 static void ip_free(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
  99 {
 100         struct ipfrag *fp;
 101         struct ipfrag *xp;
 102 
 103         /*
 104          * Stop the timer for this entry.
 105          */
 106 
 107         del_timer(&qp->timer);
 108 
 109         /* Remove this entry from the "incomplete datagrams" queue. */
 110         cli();
 111         if (qp->prev == NULL)
 112         {
 113                 ipqueue = qp->next;
 114                 if (ipqueue != NULL)
 115                         ipqueue->prev = NULL;
 116         }
 117         else
 118         {
 119                 qp->prev->next = qp->next;
 120                 if (qp->next != NULL)
 121                         qp->next->prev = qp->prev;
 122         }
 123 
 124         /* Release all fragment data. */
 125 
 126         fp = qp->fragments;
 127         while (fp != NULL)
 128         {
 129                 xp = fp->next;
 130                 IS_SKB(fp->skb);
 131                 kfree_skb(fp->skb,FREE_READ);
 132                 kfree_s(fp, sizeof(struct ipfrag));
 133                 fp = xp;
 134         }
 135 
 136         /* Release the IP header. */
 137         kfree_s(qp->iph, 64 + 8);
 138 
 139         /* Finally, release the queue descriptor itself. */
 140         kfree_s(qp, sizeof(struct ipq));
 141         sti();
 142 }
 143 
 144 
 145 /*
 146  *      Oops- a fragment queue timed out.  Kill it and send an ICMP reply.
 147  */
 148 
 149 static void ip_expire(unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 150 {
 151         struct ipq *qp;
 152 
 153         qp = (struct ipq *)arg;
 154 
 155         /*
 156          *      Send an ICMP "Fragment Reassembly Timeout" message.
 157          */
 158 
 159         ip_statistics.IpReasmTimeout++;
 160         ip_statistics.IpReasmFails++;   
 161         /* This if is always true... shrug */
 162         if(qp->fragments!=NULL)
 163                 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
 164                                 ICMP_EXC_FRAGTIME, 0, qp->dev);
 165 
 166         /*
 167          *      Nuke the fragment queue.
 168          */
 169         ip_free(qp);
 170 }
 171 
 172 
 173 /*
 174  *      Add an entry to the 'ipq' queue for a newly received IP datagram.
 175  *      We will (hopefully :-) receive all other fragments of this datagram
 176  *      in time, so we just create a queue for this datagram, in which we
 177  *      will insert the received fragments at their respective positions.
 178  */
 179 
 180 static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 181 {
 182         struct ipq *qp;
 183         int ihlen;
 184 
 185         qp = (struct ipq *) kmalloc(sizeof(struct ipq), GFP_ATOMIC);
 186         if (qp == NULL)
 187         {
 188                 NETDEBUG(printk("IP: create: no memory left !\n"));
 189                 return(NULL);
 190                 skb->dev = qp->dev;
 191         }
 192         memset(qp, 0, sizeof(struct ipq));
 193 
 194         /*
 195          *      Allocate memory for the IP header (plus 8 octets for ICMP).
 196          */
 197 
 198         ihlen = iph->ihl * 4;
 199         qp->iph = (struct iphdr *) kmalloc(64 + 8, GFP_ATOMIC);
 200         if (qp->iph == NULL)
 201         {
 202                 NETDEBUG(printk("IP: create: no memory left !\n"));
 203                 kfree_s(qp, sizeof(struct ipq));
 204                 return(NULL);
 205         }
 206 
 207         memcpy(qp->iph, iph, ihlen + 8);
 208         qp->len = 0;
 209         qp->ihlen = ihlen;
 210         qp->fragments = NULL;
 211         qp->dev = dev;
 212 
 213         /* Start a timer for this entry. */
 214         qp->timer.expires = jiffies + IP_FRAG_TIME;     /* about 30 seconds     */
 215         qp->timer.data = (unsigned long) qp;            /* pointer to queue     */
 216         qp->timer.function = ip_expire;                 /* expire function      */
 217         add_timer(&qp->timer);
 218 
 219         /* Add this entry to the queue. */
 220         qp->prev = NULL;
 221         cli();
 222         qp->next = ipqueue;
 223         if (qp->next != NULL)
 224                 qp->next->prev = qp;
 225         ipqueue = qp;
 226         sti();
 227         return(qp);
 228 }
 229 
 230 
 231 /*
 232  *      See if a fragment queue is complete.
 233  */
 234 
 235 static int ip_done(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 236 {
 237         struct ipfrag *fp;
 238         int offset;
 239 
 240         /* Only possible if we received the final fragment. */
 241         if (qp->len == 0)
 242                 return(0);
 243 
 244         /* Check all fragment offsets to see if they connect. */
 245         fp = qp->fragments;
 246         offset = 0;
 247         while (fp != NULL)
 248         {
 249                 if (fp->offset > offset)
 250                         return(0);      /* fragment(s) missing */
 251                 offset = fp->end;
 252                 fp = fp->next;
 253         }
 254 
 255         /* All fragments are present. */
 256         return(1);
 257 }
 258 
 259 
 260 /*
 261  *      Build a new IP datagram from all its fragments.
 262  *
 263  *      FIXME: We copy here because we lack an effective way of handling lists
 264  *      of bits on input. Until the new skb data handling is in I'm not going
 265  *      to touch this with a bargepole. 
 266  */
 267 
 268 static struct sk_buff *ip_glue(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 269 {
 270         struct sk_buff *skb;
 271         struct iphdr *iph;
 272         struct ipfrag *fp;
 273         unsigned char *ptr;
 274         int count, len;
 275 
 276         /*
 277          *      Allocate a new buffer for the datagram.
 278          */
 279         len = qp->ihlen + qp->len;
 280 
 281         if ((skb = dev_alloc_skb(len)) == NULL)
 282         {
 283                 ip_statistics.IpReasmFails++;
 284                 NETDEBUG(printk("IP: queue_glue: no memory for gluing queue %p\n", qp));
 285                 ip_free(qp);
 286                 return(NULL);
 287         }
 288 
 289         /* Fill in the basic details. */
 290         skb_put(skb,len);
 291         skb->h.raw = skb->data;
 292         skb->free = 1;
 293 
 294         /* Copy the original IP headers into the new buffer. */
 295         ptr = (unsigned char *) skb->h.raw;
 296         memcpy(ptr, ((unsigned char *) qp->iph), qp->ihlen);
 297         ptr += qp->ihlen;
 298 
 299         count = 0;
 300 
 301         /* Copy the data portions of all fragments into the new buffer. */
 302         fp = qp->fragments;
 303         while(fp != NULL)
 304         {
 305                 if(count+fp->len > skb->len)
 306                 {
 307                         NETDEBUG(printk("Invalid fragment list: Fragment over size.\n"));
 308                         ip_free(qp);
 309                         kfree_skb(skb,FREE_WRITE);
 310                         ip_statistics.IpReasmFails++;
 311                         return NULL;
 312                 }
 313                 memcpy((ptr + fp->offset), fp->ptr, fp->len);
 314                 count += fp->len;
 315                 fp = fp->next;
 316         }
 317 
 318         /* We glued together all fragments, so remove the queue entry. */
 319         ip_free(qp);
 320 
 321         /* Done with all fragments. Fixup the new IP header. */
 322         iph = skb->h.iph;
 323         iph->frag_off = 0;
 324         iph->tot_len = htons((iph->ihl * 4) + count);
 325         skb->ip_hdr = iph;
 326 
 327         ip_statistics.IpReasmOKs++;
 328         return(skb);
 329 }
 330 
 331 
 332 /*
 333  *      Process an incoming IP datagram fragment.
 334  */
 335 
 336 struct sk_buff *ip_defrag(struct iphdr *iph, struct sk_buff *skb, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 337 {
 338         struct ipfrag *prev, *next, *tmp;
 339         struct ipfrag *tfp;
 340         struct ipq *qp;
 341         struct sk_buff *skb2;
 342         unsigned char *ptr;
 343         int flags, offset;
 344         int i, ihl, end;
 345 
 346         ip_statistics.IpReasmReqds++;
 347 
 348         /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
 349         qp = ip_find(iph);
 350 
 351         /* Is this a non-fragmented datagram? */
 352         offset = ntohs(iph->frag_off);
 353         flags = offset & ~IP_OFFSET;
 354         offset &= IP_OFFSET;
 355         if (((flags & IP_MF) == 0) && (offset == 0))
 356         {
 357                 if (qp != NULL)
 358                         ip_free(qp);    /* Huh? How could this exist?? */
 359                 return(skb);
 360         }
 361 
 362         offset <<= 3;           /* offset is in 8-byte chunks */
 363         ihl = iph->ihl * 4;
 364 
 365         /*
 366          * If the queue already existed, keep restarting its timer as long
 367          * as we still are receiving fragments.  Otherwise, create a fresh
 368          * queue entry.
 369          */
 370 
 371         if (qp != NULL)
 372         {
 373                 /* ANK. If the first fragment is received,
 374                  * we should remember the correct IP header (with options)
 375                  */
 376                 if (offset == 0)
 377                 {
 378                         qp->ihlen = ihl;
 379                         memcpy(qp->iph, iph, ihl+8);
 380                 }
 381                 del_timer(&qp->timer);
 382                 qp->timer.expires = jiffies + IP_FRAG_TIME;     /* about 30 seconds */
 383                 qp->timer.data = (unsigned long) qp;    /* pointer to queue */
 384                 qp->timer.function = ip_expire;         /* expire function */
 385                 add_timer(&qp->timer);
 386         }
 387         else
 388         {
 389                 /*
 390                  *      If we failed to create it, then discard the frame
 391                  */
 392                 if ((qp = ip_create(skb, iph, dev)) == NULL)
 393                 {
 394                         skb->sk = NULL;
 395                         kfree_skb(skb, FREE_READ);
 396                         ip_statistics.IpReasmFails++;
 397                         return NULL;
 398                 }
 399         }
 400 
 401         /*
 402          *      Determine the position of this fragment.
 403          */
 404 
 405         end = offset + ntohs(iph->tot_len) - ihl;
 406 
 407         /*
 408          *      Point into the IP datagram 'data' part.
 409          */
 410 
 411         ptr = skb->data + ihl;
 412 
 413         /*
 414          *      Is this the final fragment?
 415          */
 416 
 417         if ((flags & IP_MF) == 0)
 418                 qp->len = end;
 419 
 420         /*
 421          *      Find out which fragments are in front and at the back of us
 422          *      in the chain of fragments so far.  We must know where to put
 423          *      this fragment, right?
 424          */
 425 
 426         prev = NULL;
 427         for(next = qp->fragments; next != NULL; next = next->next)
 428         {
 429                 if (next->offset > offset)
 430                         break;  /* bingo! */
 431                 prev = next;
 432         }
 433 
 434         /*
 435          *      We found where to put this one.
 436          *      Check for overlap with preceding fragment, and, if needed,
 437          *      align things so that any overlaps are eliminated.
 438          */
 439         if (prev != NULL && offset < prev->end)
 440         {
 441                 i = prev->end - offset;
 442                 offset += i;    /* ptr into datagram */
 443                 ptr += i;       /* ptr into fragment data */
 444         }
 445 
 446         /*
 447          * Look for overlap with succeeding segments.
 448          * If we can merge fragments, do it.
 449          */
 450 
 451         for(tmp=next; tmp != NULL; tmp = tfp)
 452         {
 453                 tfp = tmp->next;
 454                 if (tmp->offset >= end)
 455                         break;          /* no overlaps at all */
 456 
 457                 i = end - next->offset;                 /* overlap is 'i' bytes */
 458                 tmp->len -= i;                          /* so reduce size of    */
 459                 tmp->offset += i;                       /* next fragment        */
 460                 tmp->ptr += i;
 461                 /*
 462                  *      If we get a frag size of <= 0, remove it and the packet
 463                  *      that it goes with.
 464                  */
 465                 if (tmp->len <= 0)
 466                 {
 467                         if (tmp->prev != NULL)
 468                                 tmp->prev->next = tmp->next;
 469                         else
 470                                 qp->fragments = tmp->next;
 471 
 472                         if (tfp->next != NULL)
 473                                 tmp->next->prev = tmp->prev;
 474                         
 475                         next=tfp;       /* We have killed the original next frame */
 476 
 477                         kfree_skb(tmp->skb,FREE_READ);
 478                         kfree_s(tmp, sizeof(struct ipfrag));
 479                 }
 480         }
 481 
 482         /*
 483          *      Insert this fragment in the chain of fragments.
 484          */
 485 
 486         tfp = NULL;
 487         tfp = ip_frag_create(offset, end, skb, ptr);
 488 
 489         /*
 490          *      No memory to save the fragment - so throw the lot
 491          */
 492 
 493         if (!tfp)
 494         {
 495                 skb->sk = NULL;
 496                 kfree_skb(skb, FREE_READ);
 497                 return NULL;
 498         }
 499         tfp->prev = prev;
 500         tfp->next = next;
 501         if (prev != NULL)
 502                 prev->next = tfp;
 503         else
 504                 qp->fragments = tfp;
 505 
 506         if (next != NULL)
 507                 next->prev = tfp;
 508 
 509         /*
 510          *      OK, so we inserted this new fragment into the chain.
 511          *      Check if we now have a full IP datagram which we can
 512          *      bump up to the IP layer...
 513          */
 514 
 515         if (ip_done(qp))
 516         {
 517                 skb2 = ip_glue(qp);             /* glue together the fragments */
 518                 return(skb2);
 519         }
 520         return(NULL);
 521 }
 522 
 523 
 524 /*
 525  *      This IP datagram is too large to be sent in one piece.  Break it up into
 526  *      smaller pieces (each of size equal to the MAC header plus IP header plus
 527  *      a block of the data of the original IP data part) that will yet fit in a
 528  *      single device frame, and queue such a frame for sending by calling the
 529  *      ip_queue_xmit().  Note that this is recursion, and bad things will happen
 530  *      if this function causes a loop...
 531  *
 532  *      Yes this is inefficient, feel free to submit a quicker one.
 533  *
 534  */
 535  
 536 void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int is_frag)
     /* [previous][next][first][last][top][bottom][index][help] */
 537 {
 538         struct iphdr *iph;
 539         unsigned char *raw;
 540         unsigned char *ptr;
 541         struct sk_buff *skb2;
 542         int left, mtu, hlen, len;
 543         int offset;
 544         unsigned long flags;
 545 
 546         /*
 547          *      Point into the IP datagram header.
 548          */
 549 
 550         raw = skb->data;
 551 #if 0
 552         iph = (struct iphdr *) (raw + dev->hard_header_len);    
 553         skb->ip_hdr = iph;
 554 #else
 555         iph = skb->ip_hdr;
 556 #endif
 557 
 558         /*
 559          *      Setup starting values.
 560          */
 561 
 562         hlen = iph->ihl * 4;
 563         left = ntohs(iph->tot_len) - hlen;      /* Space per frame */
 564         hlen += dev->hard_header_len;           /* Total header size */
 565         mtu = (dev->mtu - hlen);                /* Size of data space */
 566         ptr = (raw + hlen);                     /* Where to start from */
 567 
 568         /*
 569          *      Check for any "DF" flag. [DF means do not fragment]
 570          */
 571 
 572         if (ntohs(iph->frag_off) & IP_DF)
 573         {
 574                 ip_statistics.IpFragFails++;
 575                 printk("ip_queue_xmit: frag needed\n");
 576                 return;
 577         }
 578 
 579         /*
 580          *      The protocol doesn't seem to say what to do in the case that the
 581          *      frame + options doesn't fit the mtu. As it used to fall down dead
 582          *      in this case we were fortunate it didn't happen
 583          */
 584 
 585         if(mtu<8)
 586         {
 587                 /* It's wrong but it's better than nothing */
 588                 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev->mtu, dev);
 589                 ip_statistics.IpFragFails++;
 590                 return;
 591         }
 592 
 593         /*
 594          *      Fragment the datagram.
 595          */
 596 
 597         /*
 598          *      The initial offset is 0 for a complete frame. When
 599          *      fragmenting fragments it's wherever this one starts.
 600          */
 601 
 602         if (is_frag & 2)
 603                 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
 604         else
 605                 offset = 0;
 606 
 607 
 608         /*
 609          *      Keep copying data until we run out.
 610          */
 611 
 612         while(left > 0)
 613         {
 614                 len = left;
 615                 /* IF: it doesn't fit, use 'mtu' - the data space left */
 616                 if (len > mtu)
 617                         len = mtu;
 618                 /* IF: we are not sending upto and including the packet end
 619                    then align the next start on an eight byte boundary */
 620                 if (len < left)
 621                 {
 622                         len/=8;
 623                         len*=8;
 624                 }
 625                 /*
 626                  *      Allocate buffer.
 627                  */
 628 
 629                 if ((skb2 = alloc_skb(len + hlen+15,GFP_ATOMIC)) == NULL)
 630                 {
 631                         NETDEBUG(printk("IP: frag: no memory for new fragment!\n"));
 632                         ip_statistics.IpFragFails++;
 633                         return;
 634                 }
 635 
 636                 /*
 637                  *      Set up data on packet
 638                  */
 639 
 640                 skb2->arp = skb->arp;
 641                 if(skb->free==0)
 642                         printk("IP fragmenter: BUG free!=1 in fragmenter\n");
 643                 skb2->free = 1;
 644                 skb_put(skb2,len + hlen);
 645                 skb2->h.raw=(char *) skb2->data;
 646                 /*
 647                  *      Charge the memory for the fragment to any owner
 648                  *      it might possess
 649                  */
 650 
 651                 save_flags(flags);
 652                 if (sk)
 653                 {
 654                         cli();
 655                         sk->wmem_alloc += skb2->truesize;
 656                         skb2->sk=sk;
 657                 }
 658                 restore_flags(flags);
 659                 skb2->raddr = skb->raddr;       /* For rebuild_header - must be here */
 660 
 661                 /*
 662                  *      Copy the packet header into the new buffer.
 663                  */
 664 
 665                 memcpy(skb2->h.raw, raw, hlen);
 666 
 667                 /*
 668                  *      Copy a block of the IP datagram.
 669                  */
 670                 memcpy(skb2->h.raw + hlen, ptr, len);
 671                 left -= len;
 672 
 673                 skb2->h.raw+=dev->hard_header_len;
 674 
 675                 /*
 676                  *      Fill in the new header fields.
 677                  */
 678                 iph = (struct iphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
 679                 iph->frag_off = htons((offset >> 3));
 680                 skb2->ip_hdr = iph;
 681 
 682                 /* ANK: dirty, but effective trick. Upgrade options only if
 683                  * the segment to be fragmented was THE FIRST (otherwise,
 684                  * options are already fixed) and make it ONCE
 685                  * on the initial skb, so that all the following fragments
 686                  * will inherit fixed options.
 687                  */
 688                 if (offset == 0)
 689                         ip_options_fragment(skb);
 690 
 691                 /*
 692                  *      Added AC : If we are fragmenting a fragment thats not the
 693                  *                 last fragment then keep MF on each bit
 694                  */
 695                 if (left > 0 || (is_frag & 1))
 696                         iph->frag_off |= htons(IP_MF);
 697                 ptr += len;
 698                 offset += len;
 699 
 700                 /*
 701                  *      Put this fragment into the sending queue.
 702                  */
 703 
 704                 ip_statistics.IpFragCreates++;
 705 
 706                 ip_queue_xmit(sk, dev, skb2, 2);
 707         }
 708         ip_statistics.IpFragOKs++;
 709 }
 710 
 711 

/* [previous][next][first][last][top][bottom][index][help] */