root/net/inet/ip.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip_ioctl
  2. strict_route
  3. loose_route
  4. ip_route_check
  5. build_options
  6. ip_send
  7. ip_build_header
  8. do_options
  9. ip_fast_csum
  10. ip_compute_csum
  11. ip_csum
  12. ip_send_check
  13. ip_frag_create
  14. ip_find
  15. ip_free
  16. ip_expire
  17. ip_create
  18. ip_done
  19. ip_glue
  20. ip_defrag
  21. ip_fragment
  22. ip_forward
  23. ip_rcv
  24. ip_queue_xmit
  25. ip_do_retransmit
  26. ip_retransmit
  27. ip_setsockopt
  28. ip_getsockopt
  29. ip_init

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              The Internet Protocol (IP) module.
   7  *
   8  * Version:     @(#)ip.c        1.0.16b 9/1/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Donald Becker, <becker@super.org>
  13  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  14  *
  15  * Fixes:
  16  *              Alan Cox        :       Commented a couple of minor bits of surplus code
  17  *              Alan Cox        :       Undefining IP_FORWARD doesn't include the code
  18  *                                      (just stops a compiler warning).
  19  *              Alan Cox        :       Frames with >=MAX_ROUTE record routes, strict routes or loose routes
  20  *                                      are junked rather than corrupting things.
  21  *              Alan Cox        :       Frames to bad broadcast subnets are dumped
  22  *                                      We used to process them non broadcast and
  23  *                                      boy could that cause havoc.
  24  *              Alan Cox        :       ip_forward sets the free flag on the 
  25  *                                      new frame it queues. Still crap because
  26  *                                      it copies the frame but at least it 
  27  *                                      doesn't eat memory too.
  28  *              Alan Cox        :       Generic queue code and memory fixes.
  29  *              Fred Van Kempen :       IP fragment support (borrowed from NET2E)
  30  *              Gerhard Koerting:       Forward fragmented frames correctly.
  31  *              Gerhard Koerting:       Fixes to my fix of the above 8-).
  32  *              Gerhard Koerting:       IP interface addressing fix.
  33  *              Linus Torvalds  :       More robustness checks
  34  *              Alan Cox        :       Even more checks: Still not as robust as it ought to be
  35  *              Alan Cox        :       Save IP header pointer for later
  36  *              Alan Cox        :       ip option setting
  37  *              Alan Cox        :       Use ip_tos/ip_ttl settings
  38  *              Alan Cox        :       Fragmentation bogosity removed
  39  *                                      (Thanks to Mark.Bush@prg.ox.ac.uk)
  40  *              Dmitry Gorodchanin :    Send of a raw packet crash fix.
  41  *              Alan Cox        :       Silly ip bug when an overlength
  42  *                                      fragment turns up. Now frees the
  43  *                                      queue.
  44  *              Linus Torvalds/ :       Memory leakage on fragmentation 
  45  *              Alan Cox        :       handling.
  46  *              Gerhard Koerting:       Forwarding uses IP priority hints
  47  *              Teemu Rantanen  :       Fragment problems.
  48  *              Alan Cox        :       General cleanup, comments and reformat
  49  *              Alan Cox        :       SNMP statistics
  50  *              Alan Cox        :       BSD address rule semantics. Also see
  51  *                                      UDP as there is a nasty checksum issue
  52  *                                      if you do things the wrong way.
  53  *              Alan Cox        :       Always defrag, moved IP_FORWARD to the config.in file
  54  *
  55  * To Fix:
  56  *              IP option processing is mostly not needed. ip_forward needs to know about routing rules
  57  *              and time stamp but that's about all. Use the route mtu field here too
  58  *
  59  *              This program is free software; you can redistribute it and/or
  60  *              modify it under the terms of the GNU General Public License
  61  *              as published by the Free Software Foundation; either version
  62  *              2 of the License, or (at your option) any later version.
  63  */
  64 #include <asm/segment.h>
  65 #include <asm/system.h>
  66 #include <linux/types.h>
  67 #include <linux/kernel.h>
  68 #include <linux/sched.h>
  69 #include <linux/string.h>
  70 #include <linux/errno.h>
  71 #include <linux/socket.h>
  72 #include <linux/sockios.h>
  73 #include <linux/in.h>
  74 #include <linux/inet.h>
  75 #include <linux/netdevice.h>
  76 #include <linux/etherdevice.h>
  77 #include "snmp.h"
  78 #include "ip.h"
  79 #include "protocol.h"
  80 #include "route.h"
  81 #include "tcp.h"
  82 #include <linux/skbuff.h>
  83 #include "sock.h"
  84 #include "arp.h"
  85 #include "icmp.h"
  86 
  87 #define CONFIG_IP_DEFRAG
  88 
  89 extern int last_retran;
  90 extern void sort_send(struct sock *sk);
  91 
  92 #define min(a,b)        ((a)<(b)?(a):(b))
  93 
  94 /*
  95  *      SNMP management statistics
  96  */
  97  
  98 struct ip_mib ip_statistics={1,64,};    /* Forwarding=Yes, Default TTL=64 */
  99  
 100 /*
 101  *      Handle the issuing of an ioctl() request 
 102  *      for the ip device. This is scheduled to
 103  *      disappear
 104  */
 105 
 106 int ip_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 107 {
 108         switch(cmd) 
 109         {
 110                 default:
 111                         return(-EINVAL);
 112         }
 113 }
 114 
 115 
 116 /* these two routines will do routing. */
 117 
 118 static void
 119 strict_route(struct iphdr *iph, struct options *opt)
     /* [previous][next][first][last][top][bottom][index][help] */
 120 {
 121 }
 122 
 123 
 124 static void
 125 loose_route(struct iphdr *iph, struct options *opt)
     /* [previous][next][first][last][top][bottom][index][help] */
 126 {
 127 }
 128 
 129 
 130 
 131 
 132 /* This routine will check to see if we have lost a gateway. */
 133 void
 134 ip_route_check(unsigned long daddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 135 {
 136 }
 137 
 138 
 139 #if 0
 140 /* this routine puts the options at the end of an ip header. */
 141 static int
 142 build_options(struct iphdr *iph, struct options *opt)
     /* [previous][next][first][last][top][bottom][index][help] */
 143 {
 144   unsigned char *ptr;
 145   /* currently we don't support any options. */
 146   ptr = (unsigned char *)(iph+1);
 147   *ptr = 0;
 148   return (4);
 149 }
 150 #endif
 151 
 152 
 153 /*
 154  *      Take an skb, and fill in the MAC header. 
 155  */
 156  
 157 static int ip_send(struct sk_buff *skb, unsigned long daddr, int len, struct device *dev, unsigned long saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 158 {
 159         int mac = 0;
 160 
 161         skb->dev = dev;
 162         skb->arp = 1;
 163         if (dev->hard_header) 
 164         {
 165                 /*
 166                  *      Build a hardware header. Source address is our mac, destination unknown
 167                  *      (rebuild header will sort this out) 
 168                  */
 169                 mac = dev->hard_header(skb->data, dev, ETH_P_IP, NULL, NULL, len, skb);
 170                 if (mac < 0) 
 171                 {
 172                         mac = -mac;
 173                         skb->arp = 0;
 174                         skb->raddr = daddr;     /* next routing address */
 175                 }       
 176         }
 177         return mac;
 178 }
 179 
 180 int ip_id_count = 0;
 181 
 182 /*
 183  * This routine builds the appropriate hardware/IP headers for
 184  * the routine.  It assumes that if *dev != NULL then the
 185  * protocol knows what it's doing, otherwise it uses the
 186  * routing/ARP tables to select a device struct.
 187  */
 188 int ip_build_header(struct sk_buff *skb, unsigned long saddr, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 189                 struct device **dev, int type, struct options *opt, int len, int tos, int ttl)
 190 {
 191         static struct options optmem;
 192         struct iphdr *iph;
 193         struct rtable *rt;
 194         unsigned char *buff;
 195         unsigned long raddr;
 196         int tmp;
 197         unsigned long src;
 198 
 199         /*
 200          *      If there is no 'from' address as yet, then make it our loopback
 201          */
 202          
 203         if (saddr == 0) 
 204                 saddr = ip_my_addr();
 205         
 206         buff = skb->data;
 207 
 208         /* 
 209          *      See if we need to look up the device. 
 210          */
 211          
 212         if (*dev == NULL) 
 213         {
 214                 if(skb->localroute)
 215                         rt = ip_rt_local(daddr, &optmem, &src);
 216                 else
 217                         rt = ip_rt_route(daddr, &optmem, &src);
 218                 if (rt == NULL) 
 219                 {
 220                         ip_statistics.IpOutNoRoutes++;
 221                         return(-ENETUNREACH);
 222                 }
 223         
 224                 *dev = rt->rt_dev;
 225                 /*
 226                  *      If the frame is from us and going off machine it MUST MUST MUST
 227                  *      have the output device ip address and never the loopback
 228                  */
 229                 if (saddr == 0x0100007FL && daddr != 0x0100007FL) 
 230                         saddr = src;/*rt->rt_dev->pa_addr;*/
 231                 raddr = rt->rt_gateway;
 232 
 233                 opt = &optmem;
 234         } 
 235         else 
 236         {
 237                 /* 
 238                  *      We still need the address of the first hop. 
 239                  */
 240                 if(skb->localroute)
 241                         rt = ip_rt_local(daddr, &optmem, &src);
 242                 else
 243                         rt = ip_rt_route(daddr, &optmem, &src);
 244                 /*
 245                  *      If the frame is from us and going off machine it MUST MUST MUST
 246                  *      have the output device ip address and never the loopback
 247                  */
 248                 if (saddr == 0x0100007FL && daddr != 0x0100007FL) 
 249                         saddr = src;/*rt->rt_dev->pa_addr;*/
 250 
 251                 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
 252         }
 253   
 254           /*
 255            *    No gateway so aim at the real destination
 256            */
 257         if (raddr == 0)
 258                 raddr = daddr;
 259 
 260         /* 
 261          *      Now build the MAC header. 
 262          */
 263          
 264         tmp = ip_send(skb, raddr, len, *dev, saddr);
 265         buff += tmp;
 266         len -= tmp;
 267 
 268         /*
 269          *      Book keeping
 270          */
 271 
 272         skb->dev = *dev;
 273         skb->saddr = saddr;
 274         if (skb->sk) 
 275                 skb->sk->saddr = saddr;
 276 
 277         /*
 278          *      Now build the IP header. 
 279          */
 280 
 281         /* 
 282          *      If we are using IPPROTO_RAW, then we don't need an IP header, since
 283          *      one is being supplied to us by the user 
 284          */
 285 
 286         if(type == IPPROTO_RAW) 
 287                 return (tmp);
 288 
 289         iph = (struct iphdr *)buff;
 290         iph->version  = 4;
 291         iph->tos      = tos;
 292         iph->frag_off = 0;
 293         iph->ttl      = ttl;
 294         iph->daddr    = daddr;
 295         iph->saddr    = saddr;
 296         iph->protocol = type;
 297         iph->ihl      = 5;
 298   
 299         /* Setup the IP options. */
 300 #ifdef Not_Yet_Avail
 301         build_options(iph, opt);
 302 #endif
 303 
 304         return(20 + tmp);       /* IP header plus MAC header size */
 305 }
 306 
 307 
 308 static int
 309 do_options(struct iphdr *iph, struct options *opt)
     /* [previous][next][first][last][top][bottom][index][help] */
 310 {
 311   unsigned char *buff;
 312   int done = 0;
 313   int i, len = sizeof(struct iphdr);
 314 
 315   /* Zero out the options. */
 316   opt->record_route.route_size = 0;
 317   opt->loose_route.route_size  = 0;
 318   opt->strict_route.route_size = 0;
 319   opt->tstamp.ptr              = 0;
 320   opt->security                = 0;
 321   opt->compartment             = 0;
 322   opt->handling                = 0;
 323   opt->stream                  = 0;
 324   opt->tcc                     = 0;
 325   return(0);
 326 
 327   /* Advance the pointer to start at the options. */
 328   buff = (unsigned char *)(iph + 1);
 329 
 330   /* Now start the processing. */
 331   while (!done && len < iph->ihl*4) switch(*buff) {
 332         case IPOPT_END:
 333                 done = 1;
 334                 break;
 335         case IPOPT_NOOP:
 336                 buff++;
 337                 len++;
 338                 break;
 339         case IPOPT_SEC:
 340                 buff++;
 341                 if (*buff != 11) return(1);
 342                 buff++;
 343                 opt->security = ntohs(*(unsigned short *)buff);
 344                 buff += 2;
 345                 opt->compartment = ntohs(*(unsigned short *)buff);
 346                 buff += 2;
 347                 opt->handling = ntohs(*(unsigned short *)buff);
 348                 buff += 2;
 349                 opt->tcc = ((*buff) << 16) + ntohs(*(unsigned short *)(buff+1));
 350                 buff += 3;
 351                 len += 11;
 352                 break;
 353         case IPOPT_LSRR:
 354                 buff++;
 355                 if ((*buff - 3)% 4 != 0) return(1);
 356                 len += *buff;
 357                 opt->loose_route.route_size = (*buff -3)/4;
 358                 buff++;
 359                 if (*buff % 4 != 0) return(1);
 360                 opt->loose_route.pointer = *buff/4 - 1;
 361                 buff++;
 362                 buff++;
 363                 for (i = 0; i < opt->loose_route.route_size; i++) {
 364                         if(i>=MAX_ROUTE)
 365                                 return(1);
 366                         opt->loose_route.route[i] = *(unsigned long *)buff;
 367                         buff += 4;
 368                 }
 369                 break;
 370         case IPOPT_SSRR:
 371                 buff++;
 372                 if ((*buff - 3)% 4 != 0) return(1);
 373                 len += *buff;
 374                 opt->strict_route.route_size = (*buff -3)/4;
 375                 buff++;
 376                 if (*buff % 4 != 0) return(1);
 377                 opt->strict_route.pointer = *buff/4 - 1;
 378                 buff++;
 379                 buff++;
 380                 for (i = 0; i < opt->strict_route.route_size; i++) {
 381                         if(i>=MAX_ROUTE)
 382                                 return(1);
 383                         opt->strict_route.route[i] = *(unsigned long *)buff;
 384                         buff += 4;
 385                 }
 386                 break;
 387         case IPOPT_RR:
 388                 buff++;
 389                 if ((*buff - 3)% 4 != 0) return(1);
 390                 len += *buff;
 391                 opt->record_route.route_size = (*buff -3)/4;
 392                 buff++;
 393                 if (*buff % 4 != 0) return(1);
 394                 opt->record_route.pointer = *buff/4 - 1;
 395                 buff++;
 396                 buff++;
 397                 for (i = 0; i < opt->record_route.route_size; i++) {
 398                         if(i>=MAX_ROUTE)
 399                                 return 1;
 400                         opt->record_route.route[i] = *(unsigned long *)buff;
 401                         buff += 4;
 402                 }
 403                 break;
 404         case IPOPT_SID:
 405                 len += 4;
 406                 buff +=2;
 407                 opt->stream = *(unsigned short *)buff;
 408                 buff += 2;
 409                 break;
 410         case IPOPT_TIMESTAMP:
 411                 buff++;
 412                 len += *buff;
 413                 if (*buff % 4 != 0) return(1);
 414                 opt->tstamp.len = *buff / 4 - 1;
 415                 buff++;
 416                 if ((*buff - 1) % 4 != 0) return(1);
 417                 opt->tstamp.ptr = (*buff-1)/4;
 418                 buff++;
 419                 opt->tstamp.x.full_char = *buff;
 420                 buff++;
 421                 for (i = 0; i < opt->tstamp.len; i++) {
 422                         opt->tstamp.data[i] = *(unsigned long *)buff;
 423                         buff += 4;
 424                 }
 425                 break;
 426         default:
 427                 return(1);
 428   }
 429 
 430   if (opt->record_route.route_size == 0) {
 431         if (opt->strict_route.route_size != 0) {
 432                 memcpy(&(opt->record_route), &(opt->strict_route),
 433                                              sizeof(opt->record_route));
 434         } else if (opt->loose_route.route_size != 0) {
 435                 memcpy(&(opt->record_route), &(opt->loose_route),
 436                                              sizeof(opt->record_route));
 437         }
 438   }
 439 
 440   if (opt->strict_route.route_size != 0 &&
 441       opt->strict_route.route_size != opt->strict_route.pointer) {
 442         strict_route(iph, opt);
 443         return(0);
 444   }
 445 
 446   if (opt->loose_route.route_size != 0 &&
 447       opt->loose_route.route_size != opt->loose_route.pointer) {
 448         loose_route(iph, opt);
 449         return(0);
 450   }
 451 
 452   return(0);
 453 }
 454 
 455 /* 
 456  *      This is a version of ip_compute_csum() optimized for IP headers, which
 457  *      always checksum on 4 octet boundaries. 
 458  */
 459  
 460 static inline unsigned short ip_fast_csum(unsigned char * buff, int wlen)
     /* [previous][next][first][last][top][bottom][index][help] */
 461 {
 462         unsigned long sum = 0;
 463 
 464         if (wlen) 
 465         {
 466         unsigned long bogus;
 467          __asm__("clc\n"
 468                 "1:\t"
 469                 "lodsl\n\t"
 470                 "adcl %3, %0\n\t"
 471                 "decl %2\n\t"
 472                 "jne 1b\n\t"
 473                 "adcl $0, %0\n\t"
 474                 "movl %0, %3\n\t"
 475                 "shrl $16, %3\n\t"
 476                 "addw %w3, %w0\n\t"
 477                 "adcw $0, %w0"
 478             : "=r" (sum), "=S" (buff), "=r" (wlen), "=a" (bogus)
 479             : "0"  (sum),  "1" (buff),  "2" (wlen));
 480         }
 481         return (~sum) & 0xffff;
 482 }
 483 
 484 /*
 485  * This routine does all the checksum computations that don't
 486  * require anything special (like copying or special headers).
 487  */
 488 
 489 unsigned short ip_compute_csum(unsigned char * buff, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
 490 {
 491         unsigned long sum = 0;
 492 
 493         /* Do the first multiple of 4 bytes and convert to 16 bits. */
 494         if (len > 3) 
 495         {
 496                 __asm__("clc\n"
 497                 "1:\t"
 498                 "lodsl\n\t"
 499                 "adcl %%eax, %%ebx\n\t"
 500                 "loop 1b\n\t"
 501                 "adcl $0, %%ebx\n\t"
 502                 "movl %%ebx, %%eax\n\t"
 503                 "shrl $16, %%eax\n\t"
 504                 "addw %%ax, %%bx\n\t"
 505                 "adcw $0, %%bx"
 506                 : "=b" (sum) , "=S" (buff)
 507                 : "0" (sum), "c" (len >> 2) ,"1" (buff)
 508                 : "ax", "cx", "si", "bx" );
 509         }
 510         if (len & 2) 
 511         {
 512                 __asm__("lodsw\n\t"
 513                 "addw %%ax, %%bx\n\t"
 514                 "adcw $0, %%bx"
 515                 : "=b" (sum), "=S" (buff)
 516                 : "0" (sum), "1" (buff)
 517                 : "bx", "ax", "si");
 518         }
 519         if (len & 1) 
 520         {
 521                 __asm__("lodsb\n\t"
 522                 "movb $0, %%ah\n\t"
 523                 "addw %%ax, %%bx\n\t"
 524                 "adcw $0, %%bx"
 525                 : "=b" (sum), "=S" (buff)
 526                 : "0" (sum), "1" (buff)
 527                 : "bx", "ax", "si");
 528         }
 529         sum =~sum;
 530         return(sum & 0xffff);
 531 }
 532 
 533 /* 
 534  *      Check the header of an incoming IP datagram.  This version is still used in slhc.c. 
 535  */
 536  
 537 int ip_csum(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 538 {
 539         return ip_fast_csum((unsigned char *)iph, iph->ihl);
 540 }
 541 
 542 /* 
 543  *      Generate a checksym for an outgoing IP datagram. 
 544  */
 545 
 546 static void ip_send_check(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 547 {
 548         iph->check = 0;
 549         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 550 }
 551 
 552 /************************ Fragment Handlers From NET2E not yet with tweaks to beat 4K **********************************/
 553 
 554 
 555 /*
 556  *      This fragment handler is a bit of a heap. On the other hand it works quite
 557  *      happily and handles things quite well.
 558  */
 559  
 560 static struct ipq *ipqueue = NULL;              /* IP fragment queue    */
 561 
 562 /*
 563  *      Create a new fragment entry. 
 564  */
 565  
 566 static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, unsigned char *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
 567 {
 568         struct ipfrag *fp;
 569  
 570         fp = (struct ipfrag *) kmalloc(sizeof(struct ipfrag), GFP_ATOMIC);
 571         if (fp == NULL) 
 572         {
 573                 printk("IP: frag_create: no memory left !\n");
 574                 return(NULL);
 575         }
 576         memset(fp, 0, sizeof(struct ipfrag));
 577 
 578         /* Fill in the structure. */
 579         fp->offset = offset;
 580         fp->end = end;
 581         fp->len = end - offset;
 582         fp->skb = skb;
 583         fp->ptr = ptr;
 584  
 585         return(fp);
 586 }
 587  
 588  
 589 /*
 590  *      Find the correct entry in the "incomplete datagrams" queue for
 591  *      this IP datagram, and return the queue entry address if found.
 592  */
 593  
 594 static struct ipq *ip_find(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 595 {
 596         struct ipq *qp;
 597         struct ipq *qplast;
 598  
 599         cli();
 600         qplast = NULL;
 601         for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next) 
 602         {
 603                 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
 604                         iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol) 
 605                 {
 606                         del_timer(&qp->timer);  /* So it doesn't vanish on us. The timer will be reset anyway */
 607                         sti();
 608                         return(qp);
 609                 }
 610         }
 611         sti();
 612         return(NULL);
 613 }
 614  
 615  
 616 /*
 617  *      Remove an entry from the "incomplete datagrams" queue, either
 618  *      because we completed, reassembled and processed it, or because
 619  *      it timed out.
 620  */
 621 
 622 static void ip_free(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 623 {
 624         struct ipfrag *fp;
 625         struct ipfrag *xp;
 626 
 627         /*
 628          * Stop the timer for this entry. 
 629          */
 630          
 631         del_timer(&qp->timer);
 632 
 633         /* Remove this entry from the "incomplete datagrams" queue. */
 634         cli();
 635         if (qp->prev == NULL) 
 636         {
 637                 ipqueue = qp->next;
 638                 if (ipqueue != NULL) 
 639                         ipqueue->prev = NULL;
 640         } 
 641         else 
 642         {
 643                 qp->prev->next = qp->next;
 644                 if (qp->next != NULL) 
 645                         qp->next->prev = qp->prev;
 646         }
 647  
 648         /* Release all fragment data. */
 649 
 650         fp = qp->fragments;
 651         while (fp != NULL) 
 652         {
 653                 xp = fp->next;
 654                 IS_SKB(fp->skb);
 655                 kfree_skb(fp->skb,FREE_READ);
 656                 kfree_s(fp, sizeof(struct ipfrag));
 657                 fp = xp;
 658         }
 659         
 660         /* Release the MAC header. */
 661         kfree_s(qp->mac, qp->maclen);
 662  
 663         /* Release the IP header. */
 664         kfree_s(qp->iph, qp->ihlen + 8);
 665  
 666         /* Finally, release the queue descriptor itself. */
 667         kfree_s(qp, sizeof(struct ipq));
 668 /*      printk("ip_free:done\n");*/
 669         sti();
 670  }
 671  
 672  
 673 /*
 674  *      Oops- a fragment queue timed out.  Kill it and send an ICMP reply. 
 675  */
 676  
 677 static void ip_expire(unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 678 {
 679         struct ipq *qp;
 680  
 681         qp = (struct ipq *)arg;
 682 
 683         /*
 684          *      Send an ICMP "Fragment Reassembly Timeout" message. 
 685          */
 686 
 687         ip_statistics.IpReasmTimeout++;
 688         ip_statistics.IpReasmFails++;            
 689         /* This if is always true... shrug */
 690         if(qp->fragments!=NULL)
 691                 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
 692                                 ICMP_EXC_FRAGTIME, qp->dev);
 693  
 694         /* 
 695          *      Nuke the fragment queue. 
 696          */
 697         ip_free(qp);
 698 }
 699  
 700  
 701 /*
 702  *      Add an entry to the 'ipq' queue for a newly received IP datagram.
 703  *      We will (hopefully :-) receive all other fragments of this datagram
 704  *      in time, so we just create a queue for this datagram, in which we
 705  *      will insert the received fragments at their respective positions.
 706  */
 707 
 708 static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 709 {
 710         struct ipq *qp;
 711         int maclen;
 712         int ihlen;
 713 
 714         qp = (struct ipq *) kmalloc(sizeof(struct ipq), GFP_ATOMIC);
 715         if (qp == NULL) 
 716         {
 717                 printk("IP: create: no memory left !\n");
 718                 return(NULL);
 719         skb->dev = qp->dev;
 720         }
 721         memset(qp, 0, sizeof(struct ipq));
 722 
 723         /*
 724          *      Allocate memory for the MAC header. 
 725          *
 726          *      FIXME: We have a maximum MAC address size limit and define 
 727          *      elsewhere. We should use it here and avoid the 3 kmalloc() calls
 728          */
 729          
 730         maclen = ((unsigned long) iph) - ((unsigned long) skb->data);
 731         qp->mac = (unsigned char *) kmalloc(maclen, GFP_ATOMIC);
 732         if (qp->mac == NULL) 
 733         {
 734                 printk("IP: create: no memory left !\n");
 735                 kfree_s(qp, sizeof(struct ipq));
 736                 return(NULL);
 737         }
 738 
 739         /* 
 740          *      Allocate memory for the IP header (plus 8 octects for ICMP). 
 741          */
 742          
 743         ihlen = (iph->ihl * sizeof(unsigned long));
 744         qp->iph = (struct iphdr *) kmalloc(ihlen + 8, GFP_ATOMIC);
 745         if (qp->iph == NULL) 
 746         {
 747                 printk("IP: create: no memory left !\n");
 748                 kfree_s(qp->mac, maclen);
 749                 kfree_s(qp, sizeof(struct ipq));
 750                 return(NULL);
 751         }
 752 
 753         /* Fill in the structure. */
 754         memcpy(qp->mac, skb->data, maclen);
 755         memcpy(qp->iph, iph, ihlen + 8);
 756         qp->len = 0;
 757         qp->ihlen = ihlen;
 758         qp->maclen = maclen;
 759         qp->fragments = NULL;
 760         qp->dev = dev;
 761         
 762         /* Start a timer for this entry. */
 763         qp->timer.expires = IP_FRAG_TIME;               /* about 30 seconds     */
 764         qp->timer.data = (unsigned long) qp;            /* pointer to queue     */
 765         qp->timer.function = ip_expire;                 /* expire function      */
 766         add_timer(&qp->timer);
 767 
 768         /* Add this entry to the queue. */
 769         qp->prev = NULL;
 770         cli();
 771         qp->next = ipqueue;
 772         if (qp->next != NULL) 
 773                 qp->next->prev = qp;
 774         ipqueue = qp;
 775         sti();
 776         return(qp);
 777 }
 778  
 779  
 780 /*
 781  *      See if a fragment queue is complete. 
 782  */
 783  
 784 static int ip_done(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 785 {
 786         struct ipfrag *fp;
 787         int offset;
 788  
 789         /* Only possible if we received the final fragment. */
 790         if (qp->len == 0) 
 791                 return(0);
 792  
 793         /* Check all fragment offsets to see if they connect. */
 794         fp = qp->fragments;
 795         offset = 0;
 796         while (fp != NULL) 
 797         {
 798                 if (fp->offset > offset) 
 799                         return(0);      /* fragment(s) missing */
 800                 offset = fp->end;
 801                 fp = fp->next;
 802         }
 803  
 804         /* All fragments are present. */
 805         return(1);
 806  }
 807  
 808  
 809 /* 
 810  *      Build a new IP datagram from all its fragments. 
 811  *
 812  *      FIXME: We copy here because we lack an effective way of handling lists
 813  *      of bits on input. Until the new skb data handling is in I'm not going
 814  *      to touch this with a bargepole. This also causes a 4Kish limit on
 815  *      packet sizes.
 816  */
 817  
 818 static struct sk_buff *ip_glue(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 819 {
 820         struct sk_buff *skb;
 821         struct iphdr *iph;
 822         struct ipfrag *fp;
 823         unsigned char *ptr;
 824         int count, len;
 825  
 826         /*
 827          *      Allocate a new buffer for the datagram. 
 828          */
 829          
 830         len = qp->maclen + qp->ihlen + qp->len;
 831 
 832         if ((skb = alloc_skb(len,GFP_ATOMIC)) == NULL) 
 833         {
 834                 ip_statistics.IpReasmFails++;
 835                 printk("IP: queue_glue: no memory for glueing queue 0x%X\n", (int) qp);
 836                 ip_free(qp);
 837                 return(NULL);
 838         }
 839  
 840         /* Fill in the basic details. */
 841         skb->len = (len - qp->maclen);
 842         skb->h.raw = skb->data;
 843         skb->free = 1;
 844  
 845         /* Copy the original MAC and IP headers into the new buffer. */
 846         ptr = (unsigned char *) skb->h.raw;
 847         memcpy(ptr, ((unsigned char *) qp->mac), qp->maclen);
 848         ptr += qp->maclen;
 849         memcpy(ptr, ((unsigned char *) qp->iph), qp->ihlen);
 850         ptr += qp->ihlen;
 851         skb->h.raw += qp->maclen;
 852         
 853         count = 0;
 854  
 855         /* Copy the data portions of all fragments into the new buffer. */
 856         fp = qp->fragments;
 857         while(fp != NULL) 
 858         {
 859                 if(count+fp->len>skb->len)
 860                 {
 861                         printk("Invalid fragment list: Fragment over size.\n");
 862                         ip_free(qp);
 863                         kfree_skb(skb,FREE_WRITE);
 864                         ip_statistics.IpReasmFails++;
 865                         return NULL;
 866                 }
 867                 memcpy((ptr + fp->offset), fp->ptr, fp->len);
 868                 count += fp->len;
 869                 fp = fp->next;
 870         }
 871  
 872         /* We glued together all fragments, so remove the queue entry. */
 873         ip_free(qp);
 874  
 875         /* Done with all fragments. Fixup the new IP header. */
 876         iph = skb->h.iph;
 877         iph->frag_off = 0;
 878         iph->tot_len = htons((iph->ihl * sizeof(unsigned long)) + count);
 879         skb->ip_hdr = iph;
 880         
 881         ip_statistics.IpReasmOKs++;
 882         return(skb);
 883 }
 884  
 885 
 886 /*
 887  *      Process an incoming IP datagram fragment. 
 888  */
 889  
 890 static struct sk_buff *ip_defrag(struct iphdr *iph, struct sk_buff *skb, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 891 {
 892         struct ipfrag *prev, *next;
 893         struct ipfrag *tfp;
 894         struct ipq *qp;
 895         struct sk_buff *skb2;
 896         unsigned char *ptr;
 897         int flags, offset;
 898         int i, ihl, end;
 899 
 900         ip_statistics.IpReasmReqds++;
 901         
 902         /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
 903         qp = ip_find(iph);
 904  
 905         /* Is this a non-fragmented datagram? */
 906         offset = ntohs(iph->frag_off);
 907         flags = offset & ~IP_OFFSET;
 908         offset &= IP_OFFSET;
 909         if (((flags & IP_MF) == 0) && (offset == 0)) 
 910         {
 911                 if (qp != NULL)
 912                         ip_free(qp);    /* Huh? How could this exist?? */
 913                 return(skb);
 914         }
 915 
 916         offset <<= 3;           /* offset is in 8-byte chunks */
 917  
 918         /*
 919          * If the queue already existed, keep restarting its timer as long
 920          * as we still are receiving fragments.  Otherwise, create a fresh
 921          * queue entry.
 922          */
 923 
 924         if (qp != NULL) 
 925         {
 926                 del_timer(&qp->timer);
 927                 qp->timer.expires = IP_FRAG_TIME;       /* about 30 seconds */
 928                 qp->timer.data = (unsigned long) qp;    /* pointer to queue */
 929                 qp->timer.function = ip_expire;         /* expire function */
 930                 add_timer(&qp->timer);
 931         } 
 932         else 
 933         {
 934                 /*
 935                  *      If we failed to create it, then discard the frame
 936                  */
 937                 if ((qp = ip_create(skb, iph, dev)) == NULL) 
 938                 {
 939                         skb->sk = NULL;
 940                         kfree_skb(skb, FREE_READ);
 941                         ip_statistics.IpReasmFails++;
 942                         return NULL;
 943                 }
 944         }
 945 
 946         /*
 947          *      Determine the position of this fragment. 
 948          */
 949          
 950         ihl = (iph->ihl * sizeof(unsigned long));
 951         end = offset + ntohs(iph->tot_len) - ihl;
 952  
 953         /*
 954          *      Point into the IP datagram 'data' part. 
 955          */
 956 
 957         ptr = skb->data + dev->hard_header_len + ihl;
 958  
 959         /* 
 960          *      Is this the final fragment? 
 961          */
 962 
 963         if ((flags & IP_MF) == 0) 
 964                 qp->len = end;
 965  
 966         /*
 967          *      Find out which fragments are in front and at the back of us
 968          *      in the chain of fragments so far.  We must know where to put
 969          *      this fragment, right?
 970          */
 971          
 972         prev = NULL;
 973         for(next = qp->fragments; next != NULL; next = next->next) 
 974         {
 975                 if (next->offset > offset) 
 976                         break;  /* bingo! */
 977                 prev = next;
 978         }       
 979  
 980         /*
 981          *      We found where to put this one.
 982          *      Check for overlap with preceeding fragment, and, if needed,
 983          *      align things so that any overlaps are eliminated.
 984          */
 985         if (prev != NULL && offset < prev->end) 
 986         {
 987                 i = prev->end - offset;
 988                 offset += i;    /* ptr into datagram */
 989                 ptr += i;       /* ptr into fragment data */
 990         }       
 991  
 992         /*
 993          * Look for overlap with succeeding segments.
 994          * If we can merge fragments, do it.
 995          */
 996    
 997         for(; next != NULL; next = tfp) 
 998         {
 999                 tfp = next->next;
1000                 if (next->offset >= end) 
1001                         break;          /* no overlaps at all */
1002  
1003                 i = end - next->offset;                 /* overlap is 'i' bytes */
1004                 next->len -= i;                         /* so reduce size of    */
1005                 next->offset += i;                      /* next fragment        */
1006                 next->ptr += i;
1007                 
1008                 /* 
1009                  *      If we get a frag size of <= 0, remove it and the packet
1010                  *      that it goes with.
1011                  */
1012                 if (next->len <= 0) 
1013                 {
1014                         if (next->prev != NULL) 
1015                                 next->prev->next = next->next;
1016                         else 
1017                                 qp->fragments = next->next;
1018                 
1019                         if (tfp->next != NULL) 
1020                                 next->next->prev = next->prev;
1021                                 
1022                         kfree_skb(next->skb,FREE_READ);                         
1023                         kfree_s(next, sizeof(struct ipfrag));
1024                 }
1025         }
1026  
1027         /* 
1028          *      Insert this fragment in the chain of fragments. 
1029          */
1030          
1031         tfp = NULL;
1032         tfp = ip_frag_create(offset, end, skb, ptr);
1033         
1034         /*
1035          *      No memory to save the fragment - so throw the lot
1036          */
1037         
1038         if (!tfp) 
1039         {
1040                 skb->sk = NULL;
1041                 kfree_skb(skb, FREE_READ);
1042                 return NULL;
1043         }
1044         tfp->prev = prev;
1045         tfp->next = next;
1046         if (prev != NULL) 
1047                 prev->next = tfp;
1048         else 
1049                 qp->fragments = tfp;
1050    
1051         if (next != NULL) 
1052                 next->prev = tfp;
1053  
1054         /*
1055          *      OK, so we inserted this new fragment into the chain.
1056          *      Check if we now have a full IP datagram which we can
1057          *      bump up to the IP layer...
1058          */
1059    
1060         if (ip_done(qp)) 
1061         {
1062                 skb2 = ip_glue(qp);             /* glue together the fragments */
1063                 return(skb2);
1064         }
1065         return(NULL);
1066  }
1067  
1068  
1069  /*
1070   *     This IP datagram is too large to be sent in one piece.  Break it up into
1071   *     smaller pieces (each of size equal to the MAC header plus IP header plus
1072   *     a block of the data of the original IP data part) that will yet fit in a
1073   *     single device frame, and queue such a frame for sending by calling the
1074   *     ip_queue_xmit().  Note that this is recursion, and bad things will happen
1075   *     if this function causes a loop...
1076   *
1077   *     Yes this is inefficient, feel free to submit a quicker one.
1078   *
1079   *     **Protocol Violation**
1080   *     We copy all the options to each fragment. !FIXME!
1081   */
1082   
1083  void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int is_frag)
     /* [previous][next][first][last][top][bottom][index][help] */
1084  {
1085         struct iphdr *iph;
1086         unsigned char *raw;
1087         unsigned char *ptr;
1088         struct sk_buff *skb2;
1089         int left, mtu, hlen, len;
1090         int offset;
1091  
1092         /* 
1093          *      Point into the IP datagram header. 
1094          */
1095          
1096         raw = skb->data;
1097         iph = (struct iphdr *) (raw + dev->hard_header_len);
1098 
1099         skb->ip_hdr = iph;
1100                 
1101         /* 
1102          *      Setup starting values. 
1103          */
1104          
1105         hlen = (iph->ihl * sizeof(unsigned long));
1106         left = ntohs(iph->tot_len) - hlen;      /* Space per frame */
1107         hlen += dev->hard_header_len;           /* Total header size */
1108         mtu = (dev->mtu - hlen);                /* Size of data space */
1109         ptr = (raw + hlen);                     /* Where to start from */
1110         
1111         /*
1112          *      Check for any "DF" flag. [DF means do not fragment]
1113          */
1114          
1115         if (ntohs(iph->frag_off) & IP_DF) 
1116         {
1117                 ip_statistics.IpFragFails++;
1118                 icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev); 
1119                 return;
1120         }
1121  
1122         /*
1123          *      The protocol doesn't seem to say what to do in the case that the
1124          *      frame + options doesn't fit the mtu. As it used to fall down dead
1125          *      in this case we were fortunate it didn't happen
1126          */
1127          
1128         if(mtu<8)
1129         {
1130                 /* It's wrong but its better than nothing */
1131                 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev);
1132                 ip_statistics.IpFragFails++;
1133                 return;
1134         }
1135         
1136         /* 
1137          *      Fragment the datagram. 
1138          */
1139          
1140         /*
1141          *      The initial offset is 0 for a complete frame. When
1142          *      fragmenting fragments its wherever this one starts.
1143          */
1144 
1145         if (is_frag & 2)
1146                 offset = (ntohs(iph->frag_off) & 0x1fff) << 3;
1147         else
1148                 offset = 0;
1149 
1150 
1151         /*
1152          *      Keep copying data until we run out.
1153          */
1154                         
1155         while(left > 0) 
1156         {
1157                 len = left;
1158                 /* IF: it doesn't fit, use 'mtu' - the data space left */
1159                 if (len > mtu)
1160                         len = mtu;
1161                 /* IF: we are not sending upto and including the packet end
1162                    then align the next start on an eight byte boundary */
1163                 if (len < left)
1164                 {
1165                         len/=8;
1166                         len*=8;
1167                 }
1168                 /*
1169                  *      Allocate buffer. 
1170                  */
1171                  
1172                 if ((skb2 = alloc_skb(len + hlen,GFP_ATOMIC)) == NULL) 
1173                 {
1174                         printk("IP: frag: no memory for new fragment!\n");
1175                         ip_statistics.IpFragFails++;
1176                         return;
1177                 }
1178                 
1179                 /*
1180                  *      Set up data on packet
1181                  */
1182 
1183                 skb2->arp = 0;/*skb->arp;*/
1184                 skb2->free = skb->free;
1185                 skb2->len = len + hlen;
1186                 skb2->h.raw=(char *) skb2->data;
1187                 skb2->raddr = skb->raddr;       /* For rebuild_header */
1188                 /*
1189                  *      Charge the memory for the fragment to any owner
1190                  *      it might posess
1191                  */
1192                  
1193                 if (sk) 
1194                         sk->wmem_alloc += skb2->mem_len;
1195  
1196                 /* 
1197                  *      Copy the packet header into the new buffer. 
1198                  */
1199                  
1200                 memcpy(skb2->h.raw, raw, hlen);
1201  
1202                 /*
1203                  *      Copy a block of the IP datagram. 
1204                  */
1205                 memcpy(skb2->h.raw + hlen, ptr, len);
1206                 left -= len;
1207 
1208                 skb2->h.raw+=dev->hard_header_len; 
1209 
1210                 /*
1211                  *      Fill in the new header fields. 
1212                  */
1213                 iph = (struct iphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
1214                 iph->frag_off = htons((offset >> 3));
1215                 /* 
1216                  *      Added AC : If we are fragmenting a fragment thats not the
1217                  *                 last fragment then keep MF on each bit 
1218                  */
1219                 if (left > 0 || (is_frag & 1)) 
1220                         iph->frag_off |= htons(IP_MF);
1221                 ptr += len;
1222                 offset += len;
1223  
1224                 /* 
1225                  *      Put this fragment into the sending queue. 
1226                  */
1227                  
1228                 ip_statistics.IpFragCreates++;
1229                 
1230                 ip_queue_xmit(sk, dev, skb2, 1);
1231         }
1232         ip_statistics.IpFragOKs++;
1233 }
1234  
1235 
1236 
1237 #ifdef CONFIG_IP_FORWARD
1238 
1239 /* 
1240  *      Forward an IP datagram to its next destination. 
1241  */
1242 
1243 static void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag)
     /* [previous][next][first][last][top][bottom][index][help] */
1244 {
1245         struct device *dev2;    /* Output device */
1246         struct iphdr *iph;      /* Our header */
1247         struct sk_buff *skb2;   /* Output packet */
1248         struct rtable *rt;      /* Route we use */
1249         unsigned char *ptr;     /* Data pointer */
1250         unsigned long raddr;    /* Router IP address */
1251 
1252         /*
1253          *      According to the RFC, we must first decrease the TTL field. If
1254          *      that reaches zero, we must reply an ICMP control message telling
1255          *      that the packet's lifetime expired.
1256          *
1257          *      Exception:
1258          *      We may not generate an ICMP for an ICMP. icmp_send does the
1259          *      enforcement of this so we can forget it here. It is however
1260          *      sometimes VERY important.
1261          */
1262 
1263         iph = skb->h.iph;
1264         iph->ttl--;
1265         if (iph->ttl <= 0) 
1266         {
1267                 /* Tell the sender its packet died... */
1268                 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, dev);
1269                 return;
1270         }
1271 
1272         /* 
1273          *      Re-compute the IP header checksum. 
1274          *      This is inefficient. We know what has happened to the header
1275          *      and could thus adjust the checksum as Phil Karn does in KA9Q
1276          */
1277          
1278         ip_send_check(iph);
1279 
1280         /*
1281          * OK, the packet is still valid.  Fetch its destination address,
1282          * and give it to the IP sender for further processing.
1283          */
1284 
1285         rt = ip_rt_route(iph->daddr, NULL, NULL);
1286         if (rt == NULL) 
1287         {
1288                 /*
1289                  *      Tell the sender its packet cannot be delivered. Again
1290                  *      ICMP is screened later.
1291                  */
1292                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, dev);
1293                 return;
1294         }
1295 
1296 
1297         /*
1298          * Gosh.  Not only is the packet valid; we even know how to
1299          * forward it onto its final destination.  Can we say this
1300          * is being plain lucky?
1301          * If the router told us that there is no GW, use the dest.
1302          * IP address itself- we seem to be connected directly...
1303          */
1304 
1305         raddr = rt->rt_gateway;
1306 
1307         if (raddr != 0) 
1308         {
1309                 /*
1310                  *      There is a gateway so find the correct route for it.
1311                  *      Gateways cannot in turn be gatewayed.
1312                  */
1313                 rt = ip_rt_route(raddr, NULL, NULL);
1314                 if (rt == NULL) 
1315                 {
1316                         /* 
1317                          *      Tell the sender its packet cannot be delivered... 
1318                          */
1319                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, dev);
1320                         return;
1321                 }
1322                 if (rt->rt_gateway != 0) 
1323                         raddr = rt->rt_gateway;
1324         } 
1325         else 
1326                 raddr = iph->daddr;
1327                 
1328         /*
1329          *      Having picked a route we can now send the frame out.
1330          */
1331 
1332         dev2 = rt->rt_dev;
1333 
1334         /*
1335          *      In IP you never forward a frame on the interface that it arrived
1336          *      upon. We should generate an ICMP HOST REDIRECT giving the route
1337          *      we calculated.
1338          *      For now just dropping the packet is an acceptable compromise.
1339          */
1340 
1341         if (dev == dev2)
1342                 return;
1343 
1344         /*
1345          * We now allocate a new buffer, and copy the datagram into it.
1346          * If the indicated interface is up and running, kick it.
1347          */
1348 
1349         if (dev2->flags & IFF_UP) 
1350         {
1351         
1352                 /*
1353                  *      Current design decrees we copy the packet. For identical header
1354                  *      lengths we could avoid it. The new skb code will let us push
1355                  *      data so the problem goes away then.
1356                  */
1357                  
1358                 skb2 = alloc_skb(dev2->hard_header_len + skb->len, GFP_ATOMIC);
1359                 /*
1360                  *      This is rare and since IP is tolerant of network failures
1361                  *      quite harmless.
1362                  */
1363                 if (skb2 == NULL) 
1364                 {
1365                         printk("\nIP: No memory available for IP forward\n");
1366                         return;
1367                 }
1368                 ptr = skb2->data;
1369                 skb2->free = 1;
1370                 skb2->len = skb->len + dev2->hard_header_len;
1371                 skb2->h.raw = ptr;
1372 
1373                 /* 
1374                  *      Copy the packet data into the new buffer. 
1375                  */
1376                 memcpy(ptr + dev2->hard_header_len, skb->h.raw, skb->len);
1377                 
1378                 /* Now build the MAC header. */
1379                 (void) ip_send(skb2, raddr, skb->len, dev2, dev2->pa_addr);
1380 
1381                 ip_statistics.IpForwDatagrams++;
1382 
1383                 /*
1384                  *      See if it needs fragmenting. Note in ip_rcv we tagged
1385                  *      the fragment type. This must be right so that
1386                  *      the fragmenter does the right thing.
1387                  */
1388                  
1389                 if(skb2->len > dev2->mtu)
1390                 {
1391                         ip_fragment(NULL,skb2,dev2, is_frag);
1392                         kfree_skb(skb2,FREE_WRITE);
1393                 }
1394                 else
1395                 {
1396                         /*
1397                          *      Map service types to priority. We lie about
1398                          *      throughput being low priority, but its a good
1399                          *      choice to help improve general usage.
1400                          */
1401                         if(iph->tos & IPTOS_LOWDELAY)
1402                                 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
1403                         else if(iph->tos & IPTOS_THROUGHPUT)
1404                                 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
1405                         else
1406                                 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
1407                 }
1408         }
1409 }
1410 
1411 
1412 #endif
1413 
1414 /*
1415  *      This function receives all incoming IP datagrams. 
1416  */
1417  
1418 int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
     /* [previous][next][first][last][top][bottom][index][help] */
1419 {
1420         struct iphdr *iph = skb->h.iph;
1421         unsigned char hash;
1422         unsigned char flag = 0;
1423         unsigned char opts_p = 0;       /* Set iff the packet has options. */
1424         struct inet_protocol *ipprot;
1425         static struct options opt; /* since we don't use these yet, and they
1426                                 take up stack space. */
1427         int brd;
1428         int is_frag=0;
1429 
1430 
1431         ip_statistics.IpInReceives++;
1432         
1433         /*
1434          *      Tag the ip header of this packet so we can find it
1435          */
1436          
1437         skb->ip_hdr = iph;
1438 
1439         /*
1440          *      Is the datagram acceptable? 
1441          *
1442          *      1.      Length at least the size of an ip header
1443          *      2.      Version of 4
1444          *      3.      Checksums correctly. [Speed optimisation for later, skip loopback checksums]
1445          *      (4.     We ought to check for IP multicast addresses and undefined types.. does this matter ?)
1446          */
1447 
1448         if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0) 
1449         {
1450                 ip_statistics.IpInHdrErrors++;
1451                 kfree_skb(skb, FREE_WRITE);
1452                 return(0);
1453         }
1454         
1455         /*
1456          *      Our transport medium may have padded the buffer out. Now we know it
1457          *      is IP we can trim to the true length of the frame.
1458          */
1459          
1460         skb->len=ntohs(iph->tot_len);
1461 
1462         /*
1463          *      Next anaylse the packet for options. Studies show under one packet in
1464          *      a thousand have options....
1465          */
1466            
1467         if (iph->ihl != 5) 
1468         {       /* Fast path for the typical optionless IP packet. */
1469                 memset((char *) &opt, 0, sizeof(opt));
1470                 if (do_options(iph, &opt) != 0)
1471                         return 0;
1472                 opts_p = 1;
1473         }
1474 
1475         /*
1476          *      Remember if the frame is fragmented.
1477          */
1478 
1479         if (iph->frag_off & 0x0020)
1480                 is_frag|=1;
1481         
1482         /*
1483          *      Last fragment ?
1484          */
1485          
1486         if (ntohs(iph->frag_off) & 0x1fff)
1487                 is_frag|=2;
1488         
1489         /* 
1490          *      Do any IP forwarding required.  chk_addr() is expensive -- avoid it someday. 
1491          *
1492          *      This is inefficient. While finding out if it is for us we could also compute
1493          *      the routing table entry. This is where the great unified cache theory comes
1494          *      in as and when someone impliments it
1495          */
1496 
1497         if ((brd = ip_chk_addr(iph->daddr)) == 0) 
1498         {
1499                 /*
1500                  *      Don't forward multicast or broadcast frames.
1501                  */
1502         
1503                 if(skb->pkt_type!=PACKET_HOST)
1504                 {
1505                         kfree_skb(skb,FREE_WRITE);
1506                         return 0;
1507                 }
1508                 
1509                 /*
1510                  *      The packet is for another target. Forward the frame
1511                  */
1512                  
1513 #ifdef CONFIG_IP_FORWARD
1514                 ip_forward(skb, dev, is_frag);
1515 #else
1516                 printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n",
1517                         iph->saddr,iph->daddr);
1518                 ip_statistics.IpInAddrErrors++;
1519 #endif                  
1520                 /*
1521                  *      The forwarder is inefficient and copies the packet. We 
1522                  *      free the original now.
1523                  */
1524                  
1525                 kfree_skb(skb, FREE_WRITE);
1526                 return(0);
1527         }
1528 
1529         /*
1530          * Reassemble IP fragments. 
1531          */
1532 
1533         if(is_frag)
1534         {
1535                 /* Defragment. Obtain the complete packet if there is one */
1536                 skb=ip_defrag(iph,skb,dev);
1537                 if(skb==NULL)
1538                         return 0;
1539                 iph=skb->h.iph;
1540         }
1541 
1542         /*
1543          *      Point into the IP datagram, just past the header. 
1544          */
1545 
1546         skb->ip_hdr = iph;
1547         skb->h.raw += iph->ihl*4;
1548         
1549         /*
1550          *      skb->h.raw now points at the protocol beyond the IP header.
1551          */
1552          
1553         hash = iph->protocol & (MAX_INET_PROTOS -1);
1554         for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next)
1555         {
1556                 struct sk_buff *skb2;
1557 
1558                 if (ipprot->protocol != iph->protocol) 
1559                         continue;
1560        /*
1561         *       See if we need to make a copy of it.  This will
1562         *       only be set if more than one protocol wants it. 
1563         *       and then not for the last one.
1564         *
1565         *       This is an artifact of poor upper protocol design. 
1566         *       Because the upper protocols damage the actual packet
1567         *       we must do copying. In actual fact it's even worse
1568         *       than this as TCP may hold on to the buffer.
1569         */
1570                 if (ipprot->copy) 
1571                 {
1572 #if 0           
1573                         skb2 = alloc_skb(skb->mem_len-sizeof(struct sk_buff), GFP_ATOMIC);
1574                         if (skb2 == NULL) 
1575                                 continue;
1576                         memcpy(skb2, skb, skb2->mem_len);
1577                         skb2->ip_hdr = (struct iphdr *)(
1578                                         (unsigned long)skb2 +
1579                                         (unsigned long) skb->ip_hdr -
1580                                         (unsigned long)skb);
1581                         skb2->h.raw = (unsigned char *)(
1582                                         (unsigned long)skb2 +
1583                                         (unsigned long) skb->h.raw -
1584                                         (unsigned long)skb);
1585                         skb2->free=1;
1586 #else
1587                         skb2 = skb_clone(skb, GFP_ATOMIC);
1588                         if(skb2==NULL)
1589                                 continue;
1590 #endif                                                  
1591                 } 
1592                 else 
1593                 {
1594                         skb2 = skb;
1595                 }
1596                 flag = 1;
1597 
1598                /*
1599                 * Pass on the datagram to each protocol that wants it,
1600                 * based on the datagram protocol.  We should really
1601                 * check the protocol handler's return values here...
1602                 */
1603                 ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr,
1604                                 (ntohs(iph->tot_len) - (iph->ihl * 4)),
1605                                 iph->saddr, 0, ipprot);
1606 
1607         }
1608 
1609         /*
1610          * All protocols checked.
1611          * If this packet was a broadcast, we may *not* reply to it, since that
1612          * causes (proven, grin) ARP storms and a leakage of memory (i.e. all
1613          * ICMP reply messages get queued up for transmission...)
1614          */
1615 
1616         if (!flag) 
1617         {
1618                 if (brd != IS_BROADCAST)
1619                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
1620                 kfree_skb(skb, FREE_WRITE);
1621         }
1622 
1623         return(0);
1624 }
1625 
1626 
1627 /*
1628  * Queues a packet to be sent, and starts the transmitter
1629  * if necessary.  if free = 1 then we free the block after
1630  * transmit, otherwise we don't.
1631  * This routine also needs to put in the total length,
1632  * and compute the checksum
1633  */
1634  
1635 void ip_queue_xmit(struct sock *sk, struct device *dev, 
     /* [previous][next][first][last][top][bottom][index][help] */
1636               struct sk_buff *skb, int free)
1637 {
1638         struct iphdr *iph;
1639         unsigned char *ptr;
1640 
1641         /* All buffers without an owner socket get freed */
1642         if (sk == NULL) 
1643                 free = 1;
1644         
1645         /* Sanity check */
1646         if (dev == NULL) 
1647         {
1648                 printk("IP: ip_queue_xmit dev = NULL\n");
1649                 return;
1650         }
1651   
1652         IS_SKB(skb);
1653         
1654         /*
1655          *      Do some book-keeping in the packet for later
1656          */
1657 
1658         skb->free = free;
1659         skb->dev = dev;
1660         skb->when = jiffies;
1661   
1662         /*
1663          *      Find the IP header and set the length. This is bad
1664          *      but once we get the skb data handling code in the
1665          *      hardware will push its header sensibly and we will
1666          *      set skb->ip_hdr to avoid this mess and the fixed
1667          *      header length problem
1668          */
1669          
1670         ptr = skb->data;
1671         ptr += dev->hard_header_len;
1672         iph = (struct iphdr *)ptr;
1673         skb->ip_hdr = iph;
1674         iph->tot_len = ntohs(skb->len-dev->hard_header_len);
1675         iph->id      = htons(ip_id_count++);
1676 
1677         /*
1678          *      Do we need to fragment. Again this is inefficient. 
1679          *      We need to somehow lock the original buffer and use
1680          *      bits of it.
1681          */
1682          
1683         if(skb->len > dev->mtu)
1684         {
1685                 ip_fragment(sk,skb,dev,0);
1686                 IS_SKB(skb);
1687                 kfree_skb(skb,FREE_WRITE);
1688                 return;
1689         }
1690   
1691         /*
1692          *      Add an IP checksum
1693          */
1694          
1695         ip_send_check(iph);
1696         
1697         /*
1698          *      Print the frame when debugging
1699          */
1700 
1701         /*
1702          *      More debugging. You cannot queue a packet already on a list
1703          *      Spot this and moan loudly.
1704          */
1705         if (skb->next != NULL) 
1706         {
1707                 printk("ip_queue_xmit: next != NULL\n");
1708                 skb_unlink(skb);
1709         }
1710 
1711         /*
1712          *      If a sender wishes the packet to remain unfreed
1713          *      we add it to his send queue. This arguably belongs
1714          *      in the TCP level since nobody elses uses it. BUT
1715          *      remember IPng might change all the rules.
1716          */
1717          
1718         if (!free) 
1719         {
1720                 unsigned long flags;
1721                 /* The socket now has more outstanding blocks */
1722                 
1723                 sk->packets_out++;
1724                 
1725                 /* Protect the list for a moment */
1726                 save_flags(flags);
1727                 cli();
1728                 
1729                 if (skb->link3 != NULL) 
1730                 {
1731                         printk("ip.c: link3 != NULL\n");
1732                         skb->link3 = NULL;
1733                 }
1734                 if (sk->send_head == NULL) 
1735                 {
1736                         sk->send_tail = skb;
1737                         sk->send_head = skb;
1738                 }
1739                 else 
1740                 {
1741                         sk->send_tail->link3 = skb;
1742                         sk->send_tail = skb;
1743                 }
1744                 /* skb->link3 is NULL */
1745                 
1746                 /* Interrupt restore */
1747                 restore_flags(flags);
1748                 /* Set the IP write timeout to the round trip time for the packet.
1749                    If an acknowledge has not arrived by then we may wish to act */      
1750                 reset_timer(sk, TIME_WRITE, sk->rto);
1751         } 
1752         else 
1753                 /* Remember who owns the buffer */
1754                 skb->sk = sk;
1755 
1756         /*
1757          *      If the indicated interface is up and running, send the packet. 
1758          */
1759         ip_statistics.IpOutRequests++;
1760          
1761         if (dev->flags & IFF_UP) 
1762         {
1763                 /* 
1764                  *      If we have an owner use its priority setting,
1765                  *      otherwise use NORMAL
1766                  */
1767                 
1768                 if (sk != NULL) 
1769                 {
1770                         dev_queue_xmit(skb, dev, sk->priority);
1771                 }
1772                 else 
1773                 {
1774                         dev_queue_xmit(skb, dev, SOPRI_NORMAL);
1775                 }
1776         } 
1777         else 
1778         {
1779                 ip_statistics.IpOutDiscards++;
1780                 if (free) 
1781                         kfree_skb(skb, FREE_WRITE);
1782         }
1783 }
1784 
1785 
1786 /*
1787  *      A socket has timed out on its send queue and wants to do a
1788  *      little retransmitting. Currently this means TCP.
1789  */
1790 
1791 void ip_do_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
1792 {
1793         struct sk_buff * skb;
1794         struct proto *prot;
1795         struct device *dev;
1796         int retransmits;
1797 
1798         prot = sk->prot;
1799         skb = sk->send_head;
1800         retransmits = sk->retransmits;
1801         
1802         while (skb != NULL) 
1803         {
1804                 dev = skb->dev;
1805                 IS_SKB(skb);
1806                 skb->when = jiffies;
1807 
1808                 /* 
1809                  * In general it's OK just to use the old packet.  However we
1810                  * need to use the current ack and window fields.  Urg and 
1811                  * urg_ptr could possibly stand to be updated as well, but we 
1812                  * don't keep the necessary data.  That shouldn't be a problem,
1813                  * if the other end is doing the right thing.  Since we're 
1814                  * changing the packet, we have to issue a new IP identifier.
1815                  */
1816 
1817                 /* this check may be unnecessary - retransmit only for TCP */
1818                 if (sk->protocol == IPPROTO_TCP) {
1819                   struct tcphdr *th;
1820                   struct iphdr *iph;
1821                   int size;
1822 
1823                   iph = (struct iphdr *)(skb->data + dev->hard_header_len);
1824                   th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2));
1825                   size = skb->len - (((unsigned char *) th) - skb->data);
1826 
1827                   iph->id = htons(ip_id_count++);
1828                   ip_send_check(iph);
1829 
1830                   th->ack_seq = ntohl(sk->acked_seq);
1831                   th->window = ntohs(tcp_select_window(sk));
1832                   tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
1833                 }
1834 
1835                 /* 
1836                  *      If the interface is (still) up and running, kick it. 
1837                  */
1838                 
1839                 if (dev->flags & IFF_UP) 
1840                 {
1841                         /*
1842                          *      If the packet is still being sent by the device/protocol
1843                          *      below then don't retransmit. This is both needed, and good -
1844                          *      especially with connected mode AX.25 where it stops resends
1845                          *      occuring of an as yet unsent anyway frame!
1846                          *      We still add up the counts as the round trip time wants
1847                          *      adjusting.
1848                          */
1849                         if (sk && !skb_device_locked(skb))
1850                         {
1851                                 /* Remove it from any existing driver queue first! */
1852                                 skb_unlink(skb);
1853                                 /* Now queue it */
1854                                 ip_statistics.IpOutRequests++;
1855                                 dev_queue_xmit(skb, dev, sk->priority);
1856                         }
1857                 }
1858                 
1859                 /*
1860                  *      Count retransmissions
1861                  */
1862                 retransmits++;
1863                 sk->prot->retransmits ++;
1864                 
1865                 /*
1866                  *      Only one retransmit requested.
1867                  */
1868                 if (!all) 
1869                         break;
1870 
1871                 /*
1872                  *      This should cut it off before we send too many packets. 
1873                  */
1874                 if (sk->retransmits > sk->cong_window) 
1875                         break;
1876                 skb = skb->link3;
1877         }
1878 }
1879 
1880 /*
1881  *      This is the normal code called for timeouts.  It does the retransmission
1882  *      and then does backoff.  ip_do_retransmit is separated out because
1883  *      tcp_ack needs to send stuff from the retransmit queue without
1884  *      initiating a backoff.
1885  */
1886 
1887 void ip_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
1888 {
1889         ip_do_retransmit(sk, all);
1890 
1891         /*
1892          * Increase the timeout each time we retransmit.  Note that
1893          * we do not increase the rtt estimate.  rto is initialized
1894          * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
1895          * that doubling rto each time is the least we can get away with.
1896          * In KA9Q, Karn uses this for the first few times, and then
1897          * goes to quadratic.  netBSD doubles, but only goes up to *64,
1898          * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
1899          * defined in the protocol as the maximum possible RTT.  I guess
1900          * we'll have to use something other than TCP to talk to the
1901          * University of Mars.
1902          */
1903 
1904         sk->retransmits++;
1905         sk->backoff++;
1906         sk->rto = min(sk->rto << 1, 120*HZ);
1907         reset_timer(sk, TIME_WRITE, sk->rto);
1908 }
1909 
1910 /*
1911  *      Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
1912  *      an IP socket.
1913  *
1914  *      We impliment IP_TOS (type of service), IP_TTL (time to live).
1915  *
1916  *      Next release we will sort out IP_OPTIONS since for some people are kind of important.
1917  */
1918  
1919 int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
1920 {
1921         int val,err;
1922         
1923         if (optval == NULL) 
1924                 return(-EINVAL);
1925 
1926         err=verify_area(VERIFY_READ, optval, sizeof(int));
1927         if(err)
1928                 return err;
1929         
1930         val = get_fs_long((unsigned long *)optval);
1931 
1932         if(level!=SOL_IP)
1933                 return -EOPNOTSUPP;
1934 
1935         switch(optname)
1936         {
1937                 case IP_TOS:
1938                         if(val<0||val>255)
1939                                 return -EINVAL;
1940                         sk->ip_tos=val;
1941                         return 0;
1942                 case IP_TTL:
1943                         if(val<1||val>255)
1944                                 return -EINVAL;
1945                         sk->ip_ttl=val;
1946                         return 0;
1947                 /* IP_OPTIONS and friends go here eventually */
1948                 default:
1949                         return(-ENOPROTOOPT);
1950         }
1951 }
1952 
1953 /*
1954  *      Get the options. Note for future reference. The GET of IP options gets the
1955  *      _received_ ones. The set sets the _sent_ ones.
1956  */
1957  
1958 int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
1959 {
1960         int val,err;
1961         
1962         if(level!=SOL_IP)
1963                 return -EOPNOTSUPP;
1964                 
1965         switch(optname)
1966         {
1967                 case IP_TOS:
1968                         val=sk->ip_tos;
1969                         break;
1970                 case IP_TTL:
1971                         val=sk->ip_ttl;
1972                         break;
1973                 default:
1974                         return(-ENOPROTOOPT);
1975         }
1976         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
1977         if(err)
1978                 return err;
1979         put_fs_long(sizeof(int),(unsigned long *) optlen);
1980 
1981         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
1982         if(err)
1983                 return err;
1984         put_fs_long(val,(unsigned long *)optval);
1985 
1986         return(0);
1987 }
1988 
1989 /*
1990  *      IP protocol layer initialiser
1991  */
1992  
1993 static struct packet_type ip_packet_type = 
1994 {
1995         0,      /* MUTTER ntohs(ETH_P_IP),*/
1996         0,              /* copy */
1997         ip_rcv,
1998         NULL,
1999         NULL,
2000 };
2001  
2002  
2003 /*
2004  *      IP registers the packet type and then calls the subprotocol initialisers
2005  */
2006  
2007 void ip_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
2008 {
2009         ip_packet_type.type=htons(ETH_P_IP);
2010         dev_add_pack(&ip_packet_type);
2011 /*      ip_raw_init();
2012         ip_packet_init();
2013         ip_tcp_init();
2014         ip_udp_init();*/
2015 }

/* [previous][next][first][last][top][bottom][index][help] */