root/net/inet/ip.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip_print
  2. ip_ioctl
  3. strict_route
  4. loose_route
  5. print_ipprot
  6. ip_route_check
  7. build_options
  8. ip_send
  9. ip_build_header
  10. do_options
  11. ip_fast_csum
  12. ip_compute_csum
  13. ip_csum
  14. ip_send_check
  15. ip_frag_create
  16. ip_find
  17. ip_free
  18. ip_expire
  19. ip_create
  20. ip_done
  21. ip_glue
  22. ip_defrag
  23. ip_fragment
  24. ip_forward
  25. ip_rcv
  26. ip_queue_xmit
  27. ip_retransmit
  28. backoff
  29. ip_setsockopt
  30. ip_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              The Internet Protocol (IP) module.
   7  *
   8  * Version:     @(#)ip.c        1.28    20/12/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Donald Becker, <becker@super.org>
  13  *
  14  * Fixes:
  15  *              Alan Cox        :       Commented a couple of minor bits of surplus code
  16  *              Alan Cox        :       Undefining IP_FORWARD doesn't include the code
  17  *                                      (just stops a compiler warning).
  18  *              Alan Cox        :       Frames with >=MAX_ROUTE record routes, strict routes or loose routes
  19  *                                      are junked rather than corrupting things.
  20  *              Alan Cox        :       Frames to bad broadcast subnets are dumped
  21  *                                      We used to process them non broadcast and
  22  *                                      boy could that cause havoc.
  23  *              Alan Cox        :       ip_forward sets the free flag on the 
  24  *                                      new frame it queues. Still crap because
  25  *                                      it copies the frame but at least it 
  26  *                                      doesn't eat memory too.
  27  *              Alan Cox        :       Generic queue code and memory fixes.
  28  *              Fred Van Kempen :       IP fragment support (borrowed from NET2E)
  29  *              Gerhard Koerting:       Forward fragmented frames correctly.
  30  *              Gerhard Koerting:       Fixes to my fix of the above 8-).
  31  *              Gerhard Koerting:       IP interface addressing fix.
  32  *              Linus Torvalds  :       More robustness checks
  33  *              Alan Cox        :       Even more checks: Still not as robust as it ought to be
  34  *              Alan Cox        :       Reformatted for neatness and final release.
  35  *              Alan Cox        :       Tags ip header for RAW sockets, and for accept(). Old
  36  *                                      method wasn't suitable for AX.25
  37  *              Alan Cox        :       Most of the ip_options processing logic added.
  38  *
  39  * To Fix:
  40  *              RFC791 states that options are a 'required' feature of an
  41  *              IP implementation. We don't do options at all.
  42  *
  43  *              This program is free software; you can redistribute it and/or
  44  *              modify it under the terms of the GNU General Public License
  45  *              as published by the Free Software Foundation; either version
  46  *              2 of the License, or (at your option) any later version.
  47  */
  48 #include <asm/segment.h>
  49 #include <asm/system.h>
  50 #include <linux/types.h>
  51 #include <linux/kernel.h>
  52 #include <linux/sched.h>
  53 #include <linux/string.h>
  54 #include <linux/errno.h>
  55 #include <linux/socket.h>
  56 #include <linux/sockios.h>
  57 #include <linux/in.h>
  58 #include "inet.h"
  59 #include "devinet.h"
  60 #include "eth.h"
  61 #include "ip.h"
  62 #include "protocol.h"
  63 #include "route.h"
  64 #include "tcp.h"
  65 #include "skbuff.h"
  66 #include "sockinet.h"
  67 #include "arp.h"
  68 #include "icmp.h"
  69 
  70 /*
  71  *      These two can normally be left. In olden times the numerous bugs used to
  72  *      make forwarding go crazy on some nets and fragmentation fragment your
  73  *      computer 8-)
  74  */
  75  
  76 #define CONFIG_IP_FORWARD               /* Forwarding ? */
  77 #define CONFIG_IP_DEFRAG                /* Fragmentation ? */
  78 
  79 
  80 extern int last_retran;
  81 extern void sort_send(struct sock *sk);
  82 
  83 #ifdef IP_DEBUG
  84 
  85 void ip_print(struct iphdr *ip)
     /* [previous][next][first][last][top][bottom][index][help] */
  86 {
  87         unsigned char buff[32];
  88         unsigned char *ptr;
  89         int addr, len, i;
  90 
  91         if (inet_debug != DBG_IP) 
  92                 return;
  93 
  94           /* Dump the IP header. */
  95         printk("IP: ihl=%d, version=%d, tos=%d, tot_len=%d\n",
  96                 ip->ihl, ip->version, ip->tos, ntohs(ip->tot_len));
  97         printk("    id=%X, ttl=%d, prot=%d, check=%X\n",
  98                 ip->id, ip->ttl, ip->protocol, ip->check);
  99         printk("    frag_off=%d\n", ip->frag_off);
 100         printk("    soucre=%s ", in_ntoa(ip->saddr));
 101         printk("dest=%s\n", in_ntoa(ip->daddr));
 102         printk("    ----\n");
 103 
 104         /* Dump the data. */
 105         ptr = (unsigned char *)(ip + 1);
 106         addr = 0;
 107         len = ntohs(ip->tot_len) - (4 * ip->ihl);
 108         while (len > 0) 
 109         {
 110                 printk("    %04X: ", addr);
 111                 for(i = 0; i < 16; i++) 
 112                 {
 113                         if (len > 0) 
 114                         {
 115                                 printk("%02X ", (*ptr & 0xFF));
 116                                 buff[i] = *ptr++;
 117                                 if (buff[i] < 32 || buff[i] > 126) buff[i] = '.';
 118                         } 
 119                         else 
 120                         {
 121                                 printk("   ");
 122                                 buff[i] = ' ';
 123                         }
 124                         addr++;
 125                         len--;
 126                 };
 127                 buff[i] = '\0';
 128                 printk("  \"%s\"\n", buff);
 129         }
 130         printk("    ----\n\n");
 131 }
 132 
 133 #endif
 134 
 135 /*
 136  *      Low level user requests to the IP device. NOT that same as IP layer
 137  *      socket requests (which also do nothing useful at the moment)
 138  */
 139  
 140 int ip_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 141 {
 142         switch(cmd) 
 143         {
 144                 case DDIOCSDBG:
 145                         return(dbg_ioctl((void *) arg, DBG_IP));
 146                 default:
 147                         return(-EINVAL);
 148         }
 149 }
 150 
 151 
 152 /*
 153  *      These two routines will do routing when we have ip options support 
 154  *      (RFC 791 page 18,19)
 155  */
 156 
 157 static void strict_route(struct iphdr *iph, struct options *opt)
     /* [previous][next][first][last][top][bottom][index][help] */
 158 {
 159 }
 160 
 161 
 162 static void loose_route(struct iphdr *iph, struct options *opt)
     /* [previous][next][first][last][top][bottom][index][help] */
 163 {
 164 }
 165 
 166 
 167 static void print_ipprot(struct inet_protocol *ipprot)
     /* [previous][next][first][last][top][bottom][index][help] */
 168 {
 169         DPRINTF((DBG_IP, "handler = %X, protocol = %d, copy=%d \n",
 170                 ipprot->handler, ipprot->protocol, ipprot->copy));
 171 }
 172 
 173 
 174 /*
 175  *      This routine will check to see if we have lost a gateway. 
 176  */
 177  
 178 void ip_route_check(unsigned long daddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 179 {
 180 }
 181 
 182 
 183 /* 
 184  *      This routine puts the options at the end of an ip header. 
 185  */
 186  
 187 static int build_options(struct iphdr *iph, struct options *opt)
     /* [previous][next][first][last][top][bottom][index][help] */
 188 {
 189         unsigned char *ptr;
 190         ptr = (unsigned char *)(iph+1);         
 191         /* currently we don't support any options. */
 192         if(opt==NULL)
 193         {
 194                 *ptr = 0;
 195                 return (1);
 196         }
 197         else
 198         {
 199                 memcpy(ptr,opt->option_data,opt->option_length);
 200                 return((opt->option_length+3)/4);
 201         }
 202 }
 203 
 204 
 205 /*
 206  *      Take an skb, and fill in the MAC header. 
 207  */
 208  
 209 static int ip_send(struct sk_buff *skb, unsigned long daddr, int len, struct device *dev,
     /* [previous][next][first][last][top][bottom][index][help] */
 210         unsigned long saddr)
 211 {
 212         unsigned char *ptr;
 213         int mac;
 214 
 215         ptr = (unsigned char *)(skb + 1);
 216         mac = 0;
 217         skb->arp = 1;
 218         if (dev->hard_header) 
 219         {
 220                 mac = dev->hard_header(ptr, dev, ETH_P_IP, daddr, saddr, len);
 221         }       
 222         if (mac < 0) 
 223         {
 224                 mac = -mac;
 225                 skb->arp = 0;
 226         }
 227         skb->dev = dev;
 228         return(mac);
 229 }
 230 
 231 
 232 /*
 233  * This routine builds the appropriate hardware/IP headers for
 234  * the routine.  It assumes that if *dev != NULL then the
 235  * protocol knows what it's doing, otherwise it uses the
 236  * routing/ARP tables to select a device struct.
 237  */
 238 int ip_build_header(struct sk_buff *skb, unsigned long saddr, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 239                 struct device **dev, int type, struct options *opt, int len, int ttl,int tos)
 240 {
 241         static struct options optmem;
 242         struct iphdr *iph;
 243         struct rtable *rt;
 244         unsigned char *buff;
 245         unsigned long raddr;
 246         static int count = 0;
 247         int tmp;
 248         int optlen;
 249         
 250         if (saddr == 0) 
 251                 saddr = my_addr();
 252         
 253         DPRINTF((DBG_IP, "ip_build_header (skb=%X, saddr=%X, daddr=%X, *dev=%X,\n"
 254                 "                 type=%d, opt=%X, len = %d)\n",
 255                 skb, saddr, daddr, *dev, type, opt, len));
 256            
 257         buff = (unsigned char *)(skb + 1);
 258 
 259         /* See if we need to look up the device. */
 260         if (*dev == NULL) 
 261         {
 262                 rt = rt_route(daddr, &optmem);
 263                 if (rt == NULL) 
 264                         return(-ENETUNREACH);
 265 
 266                 *dev = rt->rt_dev;
 267                 if (saddr == 0x0100007FL && daddr != 0x0100007FL) 
 268                         saddr = rt->rt_dev->pa_addr;
 269                 raddr = rt->rt_gateway;
 270 
 271                 DPRINTF((DBG_IP, "ip_build_header: saddr set to %s\n", in_ntoa(saddr)));
 272                 opt = &optmem;
 273         } 
 274         else 
 275         {
 276                 /* We still need the address of the first hop. */
 277                 rt = rt_route(daddr, &optmem);
 278                 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
 279         }
 280         if (raddr == 0)
 281                 raddr = daddr;
 282 
 283         /* Now build the MAC header. */
 284         tmp = ip_send(skb, raddr, len, *dev, saddr);
 285         buff += tmp;
 286         len -= tmp;
 287 
 288         skb->dev = *dev;
 289         skb->saddr = saddr;
 290         if (skb->sk) 
 291                 skb->sk->saddr = saddr;
 292 
 293         /* Now build the IP header. */
 294 
 295         /* If we are using IPPROTO_RAW, then we don't need an IP header, since
 296            one is being supplied to us by the user */
 297 
 298         if(type == IPPROTO_RAW) 
 299                 return (tmp);
 300 
 301         iph = (struct iphdr *)buff;
 302         iph->version  = 4;
 303         iph->tos      = tos;
 304         iph->frag_off = 0;
 305         iph->ttl      = ttl;
 306         iph->daddr    = daddr;
 307         iph->saddr    = saddr;
 308         iph->protocol = type;
 309         iph->ihl      = 5;
 310         iph->id       = htons(count++);
 311         
 312         /* Setup the IP options. Length is in longs.*/
 313 
 314         optlen=build_options(iph, opt);
 315         iph->ihl+=optlen;
 316 
 317         return(20 + tmp + 4*optlen);    /* IP header plus MAC header size */
 318 }
 319 
 320 
 321 /*
 322  *      Interpret the incoming options
 323  */
 324 
 325 static int do_options(struct iphdr *iph, struct options **opt_ptr, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 326 {
 327         unsigned char *buff;
 328         int done = 0;
 329         int i, len = sizeof(struct iphdr);
 330         unsigned char *outbuf;
 331         struct options *opt;
 332         int ol;
 333         int optsiz;
 334         
 335         if(iph->ihl==5)
 336         {
 337                 *opt_ptr=NULL;
 338                 return(0);
 339         }
 340 
 341         /* Allocate a buffer to stuff the options (decoded) and the raw option data into */
 342         
 343         ol=(iph->ihl*4)-sizeof(struct iphdr);
 344         
 345         opt=(struct options *)kmalloc(ol+sizeof(*opt),GFP_ATOMIC);
 346         *opt_ptr=opt;
 347         
 348         if(opt==NULL)
 349                 return(1);
 350 
 351         opt->option_length=ol;  
 352         outbuf=(unsigned char *)(opt+1);
 353         opt->option_data=outbuf;
 354         
 355         /* Zero out the options. */
 356         opt->record_route.route_size = 0;
 357         opt->loose_route.route_size  = 0;
 358         opt->strict_route.route_size = 0;
 359         opt->tstamp.ptr              = 0;
 360         opt->security                = 0;
 361         opt->compartment             = 0;
 362         opt->handling                = 0;
 363         opt->stream                  = 0;
 364         opt->tcc                     = 0;
 365         
 366                 
 367         /* Advance the pointer to start at the options. */
 368         buff = (unsigned char *)(iph + 1);
 369         /* Copy the data */
 370         memcpy(outbuf,buff,opt->option_length);
 371         buff = outbuf;
 372         
 373         /* Now start the processing. */
 374         while (!done && len < iph->ihl*4) switch(*buff) 
 375         {
 376                 case IPOPT_END:
 377                         done = 1;
 378                         break;
 379                 case IPOPT_NOOP:
 380                         buff++;
 381                         len++;
 382                         break;
 383                 case IPOPT_SEC:
 384                         buff++;
 385                         if (*buff != 11) 
 386                                 return(1);
 387                         buff++;
 388                         opt->security = ntohs(*(unsigned short *)buff);
 389                         buff += 2;
 390                         opt->compartment = ntohs(*(unsigned short *)buff);
 391                         buff += 2;
 392                         opt->handling = ntohs(*(unsigned short *)buff);
 393                         buff += 2;
 394                         opt->tcc = ((*buff) << 16) + ntohs(*(unsigned short *)(buff+1));
 395                         buff += 3;
 396                         len += 11;
 397                         break;
 398                 case IPOPT_LSRR:
 399                         buff++;
 400                         if ((*buff - 3)% 4 != 0) 
 401                                 return(1);
 402                         if(*buff<2)
 403                                 return(1);
 404                         len += (optsiz= *buff);
 405                         opt->loose_route.route_size = (*buff -3)/4;
 406                         buff++;
 407                         if (*buff % 4 != 0) 
 408                                 return(1);
 409                         opt->loose_route.pointer = *buff/4 - 1;
 410                         if(*buff<=optsiz)
 411                                 *buff+=4;       /* Move on a route */
 412                         buff++;
 413                         buff++;
 414                         for (i = 0; i < opt->loose_route.route_size; i++) 
 415                         {
 416                                 if(i>=MAX_ROUTE)
 417                                         return(1);
 418                                 if(i==opt->strict_route.pointer)
 419                                         *(unsigned long *)buff=dev->pa_addr;
 420                                 opt->loose_route.route[i] = *(unsigned long *)buff;
 421                                 buff += 4;
 422                         }
 423                         break;
 424                 case IPOPT_SSRR:
 425                         buff++;
 426                         if ((*buff - 3)% 4 != 0) 
 427                                 return(1);
 428                         if(*buff<2)
 429                                 return(1);
 430                         len += (optsiz= *buff);
 431                         opt->strict_route.route_size = (*buff -3)/4;
 432                         buff++;
 433                         if (*buff % 4 != 0) 
 434                                 return(1);
 435                         opt->strict_route.pointer = *buff/4 - 1;
 436                         if(*buff<=optsiz)
 437                                 *buff+=4;
 438                         buff++;
 439                         buff++;
 440                         for (i = 0; i < opt->strict_route.route_size; i++) 
 441                         {
 442                                 if(i>=MAX_ROUTE)
 443                                         return(1);
 444                                 if(i==opt->strict_route.pointer)
 445                                         *(unsigned long *)buff=dev->pa_addr;                                    
 446                                 opt->strict_route.route[i] = *(unsigned long *)buff;
 447                                 buff += 4;
 448                         }       
 449                         break;
 450                 case IPOPT_RR:
 451                         buff++;
 452                         if ((*buff - 3)% 4 != 0) 
 453                                 return(1);
 454                         if(*buff<2)
 455                                 return(1);
 456                         len += (optsiz= *buff);
 457                         opt->record_route.route_size = (*buff -3)/4;
 458                         buff++;
 459                         if (*buff % 4 != 0)
 460                                 return(1);
 461                         opt->record_route.pointer = *buff/4 - 1;
 462                         if(*buff+4<=optsiz)
 463                                 *buff+=4;
 464                         buff++;
 465                         buff++;
 466                         for (i = 0; i < opt->record_route.route_size; i++) 
 467                         {
 468                                 if(i>=MAX_ROUTE)
 469                                         return 1;
 470                                 if(i==opt->record_route.pointer)
 471                                         *(unsigned long *)buff=dev->pa_addr;
 472                                 opt->record_route.route[i] = *(unsigned long *)buff;
 473                                 buff += 4;
 474                         }
 475                         break;
 476                 case IPOPT_SID:
 477                         len += 4;
 478                         buff +=2;
 479                         opt->stream = *(unsigned short *)buff;
 480                         buff += 2;
 481                         break;
 482                 case IPOPT_TIMESTAMP:
 483                         /* FIXME: This one isn't altered correctly yet */
 484                         buff++;
 485                         if(*buff<2)
 486                                 return 1;
 487                         len += *buff;
 488                         if (*buff % 4 != 0) 
 489                                 return(1);
 490                         opt->tstamp.len = *buff / 4 - 1;
 491                         buff++;
 492                         if ((*buff - 1) % 4 != 0) 
 493                                 return(1);
 494                         opt->tstamp.ptr = (*buff-1)/4;
 495                         buff++;
 496                         opt->tstamp.x.full_char = *buff;
 497                         buff++;
 498                         for (i = 0; i < opt->tstamp.len; i++) 
 499                         {
 500                                 opt->tstamp.data[i] = *(unsigned long *)buff;
 501                                 buff += 4;
 502                         }
 503                         break;
 504                 default:
 505                         return(1);
 506         }
 507 
 508         if (opt->record_route.route_size == 0) 
 509         {
 510                 if (opt->strict_route.route_size != 0) 
 511                 {
 512                         memcpy(&(opt->record_route), &(opt->strict_route),
 513                                              sizeof(opt->record_route));
 514                 } 
 515                 else if (opt->loose_route.route_size != 0) 
 516                 {
 517                         memcpy(&(opt->record_route), &(opt->loose_route),
 518                                              sizeof(opt->record_route));
 519                 }
 520         }
 521 
 522         if (opt->strict_route.route_size != 0 &&
 523                 opt->strict_route.route_size != opt->strict_route.pointer) 
 524         {
 525                 strict_route(iph, opt);
 526                 return(0);
 527         }
 528 
 529         if (opt->loose_route.route_size != 0 &&
 530             opt->loose_route.route_size != opt->loose_route.pointer) 
 531         {
 532                 loose_route(iph, opt);
 533                 return(0);
 534         }
 535 
 536         return(0);
 537 }
 538 
 539 /* 
 540  *      This is a version of ip_compute_csum() optimized for IP headers, which
 541  *      always checksum on 4 octet boundaries. 
 542  */
 543  
 544 static inline unsigned short ip_fast_csum(unsigned char * buff, int wlen)
     /* [previous][next][first][last][top][bottom][index][help] */
 545 {
 546         unsigned long sum = 0;
 547 
 548         if (wlen) {
 549                 unsigned long bogus;
 550                  __asm__("clc\n"
 551                         "1:\t"
 552                         "lodsl\n\t"
 553                         "adcl %3, %0\n\t"
 554                         "decl %2\n\t"
 555                         "jne 1b\n\t"
 556                         "adcl $0, %0\n\t"
 557                         "movl %0, %3\n\t"
 558                         "shrl $16, %3\n\t"
 559                         "addw %w3, %w0\n\t"
 560                         "adcw $0, %w0"
 561                         : "=r" (sum), "=S" (buff), "=r" (wlen), "=a" (bogus)
 562                         : "0"  (sum),  "1" (buff),  "2" (wlen));
 563         }
 564         return (~sum) & 0xffff;
 565 }
 566 
 567 /*
 568  * This routine does all the checksum computations that don't
 569  * require anything special (like copying or special headers).
 570  */
 571 
 572 unsigned short ip_compute_csum(unsigned char * buff, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
 573 {
 574         unsigned long sum = 0;
 575 
 576         /* Do the first multiple of 4 bytes and convert to 16 bits. */
 577         if (len > 3) {
 578                 __asm__("clc\n"
 579                         "1:\t"
 580                         "lodsl\n\t"
 581                         "adcl %%eax, %%ebx\n\t"
 582                         "loop 1b\n\t"
 583                         "adcl $0, %%ebx\n\t"
 584                         "movl %%ebx, %%eax\n\t"
 585                         "shrl $16, %%eax\n\t"
 586                         "addw %%ax, %%bx\n\t"
 587                         "adcw $0, %%bx"
 588                         : "=b" (sum) , "=S" (buff)
 589                         : "0" (sum), "c" (len >> 2) ,"1" (buff)
 590                         : "ax", "cx", "si", "bx" );
 591         }
 592         if (len & 2) {
 593                 __asm__("lodsw\n\t"
 594                         "addw %%ax, %%bx\n\t"
 595                         "adcw $0, %%bx"
 596                         : "=b" (sum), "=S" (buff)
 597                         : "0" (sum), "1" (buff)
 598                         : "bx", "ax", "si");
 599         }
 600         if (len & 1) {
 601                 __asm__("lodsb\n\t"
 602                         "movb $0, %%ah\n\t"
 603                         "addw %%ax, %%bx\n\t"
 604                         "adcw $0, %%bx"
 605                         : "=b" (sum), "=S" (buff)
 606                         : "0" (sum), "1" (buff)
 607                         : "bx", "ax", "si");
 608         }
 609         sum =~sum;
 610         return(sum & 0xffff);
 611 }
 612 
 613 /*
 614  *      Check the header of an incoming IP datagram.  This version is still used in slhc.c. 
 615  */
 616 
 617 int ip_csum(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 618 {
 619         return ip_fast_csum((unsigned char *)iph, iph->ihl);
 620 }
 621 
 622 /* 
 623  *      Generate a checksym for an outgoing IP datagram.  (RFC791, Page 14)
 624  */
 625 
 626 static void ip_send_check(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 627 {
 628         iph->check = 0;
 629         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 630 }
 631 
 632 /************************ Fragment Handlers From NET2E not yet with tweaks to beat 4K **********************************/
 633 
 634 static struct ipq *ipqueue = NULL;              /* IP fragment queue    */
 635 
 636 /* 
 637  *      Create a new fragment entry. 
 638  */
 639  
 640 static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, unsigned char *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
 641 {
 642         struct ipfrag *fp;
 643  
 644         fp = (struct ipfrag *) kmalloc(sizeof(struct ipfrag), GFP_ATOMIC);
 645         if (fp == NULL) 
 646         {
 647                 printk("IP: frag_create: no memory left !\n");
 648                 return(NULL);
 649         }
 650         memset(fp, 0, sizeof(struct ipfrag));
 651 
 652         /* Fill in the structure. */
 653         fp->offset = offset;
 654         fp->end = end;
 655         fp->len = end - offset;
 656         fp->skb = skb;
 657         fp->ptr = ptr;
 658  
 659         return(fp);
 660 }
 661  
 662  
 663 /*
 664  * Find the correct entry in the "incomplete datagrams" queue for
 665  * this IP datagram, and return the queue entry address if found.
 666  */
 667 
 668 static struct ipq *ip_find(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 669 {
 670         struct ipq *qp;
 671         struct ipq *qplast;
 672  
 673         cli();
 674         qplast = NULL;
 675         for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next) 
 676         {
 677                 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
 678                         iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol) 
 679                 {
 680                         del_timer(&qp->timer);  /* So it doesnt vanish on us. The timer will be reset anyway */
 681                         sti();
 682                         return(qp);
 683                 }
 684         }
 685         sti();
 686         return(NULL);
 687 }
 688  
 689  
 690 /*
 691  * Remove an entry from the "incomplete datagrams" queue, either
 692  * because we completed, reassembled and processed it, or because
 693  * it timed out.
 694  */
 695 
 696 static void ip_free(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 697 {
 698         struct ipfrag *fp;
 699         struct ipfrag *xp;
 700 
 701         /* Stop the timer for this entry. */
 702 /*      printk("ip_free\n");*/
 703         del_timer(&qp->timer);
 704 
 705         /* Remove this entry from the "incomplete datagrams" queue. */
 706         cli();
 707         if (qp->prev == NULL) 
 708         {
 709                 ipqueue = qp->next;
 710                 if (ipqueue != NULL) 
 711                         ipqueue->prev = NULL;
 712         } 
 713         else 
 714         {
 715                 qp->prev->next = qp->next;
 716                 if (qp->next != NULL) 
 717                         qp->next->prev = qp->prev;
 718         }
 719  
 720         /* Release all fragment data. */
 721 /*      printk("ip_free: kill frag data\n");*/
 722         fp = qp->fragments;
 723         while (fp != NULL) 
 724         {
 725                 xp = fp->next;
 726                 IS_SKB(fp->skb);
 727                 kfree_skb(fp->skb,FREE_READ);
 728                 kfree_s(fp, sizeof(struct ipfrag));
 729                 fp = xp;
 730         }
 731         
 732 /*      printk("ip_free: cleanup\n");*/
 733  
 734         /* Release the MAC header. */
 735         kfree_s(qp->mac, qp->maclen);
 736  
 737         /* Release the IP header. */
 738         kfree_s(qp->iph, qp->ihlen + 8);
 739  
 740         /* Finally, release the queue descriptor itself. */
 741         kfree_s(qp, sizeof(struct ipq));
 742 /*      printk("ip_free:done\n");*/
 743         sti();
 744  }
 745  
 746  
 747 /*
 748  *      Oops- a fragment queue timed out.  Kill it and send an ICMP reply. 
 749  */
 750  
 751 static void ip_expire(unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 752 {
 753         struct ipq *qp;
 754  
 755         qp = (struct ipq *)arg;
 756         DPRINTF((DBG_IP, "IP: queue_expire: fragment queue 0x%X timed out!\n", qp));
 757  
 758         /* Send an ICMP "Fragment Reassembly Timeout" message. */
 759 #if 0           
 760         icmp_send(qp->iph->ip_src.s_addr, ICMP_TIME_EXCEEDED,
 761                     ICMP_EXC_FRAGTIME, qp->iph);
 762 #endif           
 763         if(qp->fragments!=NULL)
 764                 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
 765                                 ICMP_EXC_FRAGTIME, qp->dev);
 766  
 767         /* Nuke the fragment queue. */
 768         ip_free(qp);
 769 }
 770  
 771  
 772 /*
 773  * Add an entry to the 'ipq' queue for a newly received IP datagram.
 774  * We will (hopefully :-) receive all other fragments of this datagram
 775  * in time, so we just create a queue for this datagram, in which we
 776  * will insert the received fragments at their respective positions.
 777  */
 778 
 779 static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 780 {
 781         struct ipq *qp;
 782         int maclen;
 783         int ihlen;
 784 
 785         qp = (struct ipq *) kmalloc(sizeof(struct ipq), GFP_ATOMIC);
 786         if (qp == NULL) 
 787         {
 788                 printk("IP: create: no memory left !\n");
 789                 return(NULL);
 790         }
 791         memset(qp, 0, sizeof(struct ipq));
 792 
 793         /* Allocate memory for the MAC header. */
 794         maclen = ((unsigned long) iph) - ((unsigned long) (skb + 1));
 795         qp->mac = (unsigned char *) kmalloc(maclen, GFP_ATOMIC);
 796         if (qp->mac == NULL) 
 797         {
 798                 printk("IP: create: no memory left !\n");
 799                 kfree_s(qp, sizeof(struct ipq));
 800                 return(NULL);
 801         }
 802 
 803         /* Allocate memory for the IP header (plus 8 octects for ICMP). */
 804         ihlen = (iph->ihl * sizeof(unsigned long));
 805         qp->iph = (struct iphdr *) kmalloc(ihlen + 8, GFP_ATOMIC);
 806         if (qp->iph == NULL) 
 807         {
 808                 printk("IP: create: no memory left !\n");
 809                 kfree_s(qp->mac, maclen);
 810                 kfree_s(qp, sizeof(struct ipq));
 811                 return(NULL);
 812         }
 813 
 814         /* Fill in the structure. */
 815         memcpy(qp->mac, (skb + 1), maclen);
 816         memcpy(qp->iph, iph, ihlen + 8);
 817         qp->len = 0;
 818         qp->ihlen = ihlen;
 819         qp->maclen = maclen;
 820         qp->fragments = NULL;
 821         qp->dev = dev;
 822 /*      printk("Protocol = %d\n",qp->iph->protocol);*/
 823         
 824         /* Start a timer for this entry. */
 825         qp->timer.expires = IP_FRAG_TIME;               /* about 30 seconds     */
 826         qp->timer.data = (unsigned long) qp;            /* pointer to queue     */
 827         qp->timer.function = ip_expire;                 /* expire function      */
 828         add_timer(&qp->timer);
 829 
 830         /* Add this entry to the queue. */
 831         qp->prev = NULL;
 832         cli();
 833         qp->next = ipqueue;
 834         if (qp->next != NULL) 
 835                 qp->next->prev = qp;
 836         ipqueue = qp;
 837         sti();
 838         return(qp);
 839 }
 840  
 841  
 842 /*
 843  *      See if a fragment queue is complete. 
 844  */
 845  
 846 static int ip_done(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 847 {
 848         struct ipfrag *fp;
 849         int offset;
 850  
 851         /* Only possible if we received the final fragment. */
 852         if (qp->len == 0) 
 853                 return(0);
 854  
 855         /* Check all fragment offsets to see if they connect. */
 856         fp = qp->fragments;
 857         offset = 0;
 858         while (fp != NULL) 
 859         {
 860                 if (fp->offset > offset) 
 861                         return(0);      /* fragment(s) missing */
 862                 offset = fp->end;
 863                 fp = fp->next;
 864         }
 865  
 866         /* All fragments are present. */
 867         return(1);
 868  }
 869  
 870  
 871 /*
 872  *      Build a new IP datagram from all its fragments. 
 873  */
 874  
 875 static struct sk_buff *ip_glue(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 876 {
 877         struct sk_buff *skb;
 878         struct iphdr *iph;
 879         struct ipfrag *fp;
 880         unsigned char *ptr;
 881         int count, len;
 882  
 883         /* Allocate a new buffer for the datagram. */
 884         len = sizeof(struct sk_buff)+qp->maclen + qp->ihlen + qp->len;
 885         if ((skb = alloc_skb(len,GFP_ATOMIC)) == NULL) 
 886         {
 887                 printk("IP: queue_glue: no memory for glueing queue 0x%X\n", (int) qp);
 888                 ip_free(qp);
 889                 return(NULL);
 890         }
 891  
 892         /* Fill in the basic details. */
 893         skb->len = (len - qp->maclen);
 894         skb->h.raw = (unsigned char *) (skb + 1);
 895         skb->free = 1;
 896         skb->lock = 1;
 897  
 898         /* Copy the original MAC and IP headers into the new buffer. */
 899         ptr = (unsigned char *) skb->h.raw;
 900         memcpy(ptr, ((unsigned char *) qp->mac), qp->maclen);
 901 /*      printk("Copied %d bytes of mac header.\n",qp->maclen);*/
 902         ptr += qp->maclen;
 903         memcpy(ptr, ((unsigned char *) qp->iph), qp->ihlen);
 904 /*      printk("Copied %d byte of ip header.\n",qp->ihlen);*/
 905         ptr += qp->ihlen;
 906         skb->h.raw += qp->maclen;
 907         
 908 /*      printk("Protocol = %d\n",skb->h.iph->protocol);*/
 909         count = 0;
 910  
 911         /* Copy the data portions of all fragments into the new buffer. */
 912         fp = qp->fragments;
 913         while(fp != NULL) 
 914         {
 915                 if(count+fp->len>skb->len)
 916                 {
 917                         /* In case some fool sends us a silly fragment. */
 918                         printk("Invalid fragment list: Fragment over size.\n");
 919                         kfree_skb(skb,FREE_WRITE);
 920                         return NULL;
 921                 }
 922 /*              printk("Fragment %d size %d\n",fp->offset,fp->len);*/
 923                 memcpy((ptr + fp->offset), fp->ptr, fp->len);
 924                 count += fp->len;
 925                 fp = fp->next;
 926         }
 927  
 928         /* We glued together all fragments, so remove the queue entry. */
 929         ip_free(qp);
 930  
 931         /* Done with all fragments. Fixup the new IP header. */
 932         iph = skb->h.iph;
 933         iph->frag_off = 0;
 934         iph->tot_len = htons((iph->ihl * sizeof(unsigned long)) + count);
 935         return(skb);
 936 }
 937  
 938 
 939 /* 
 940  *      Process an incoming IP datagram fragment. 
 941  */
 942  
 943 static struct sk_buff *ip_defrag(struct iphdr *iph, struct sk_buff *skb, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 944 {
 945         struct ipfrag *prev, *next;
 946         struct ipfrag *tfp;
 947         struct ipq *qp;
 948         struct sk_buff *skb2;
 949         unsigned char *ptr;
 950         int flags, offset;
 951         int i, ihl, end;
 952 
 953         /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
 954         qp = ip_find(iph);
 955  
 956         /* Is this a non-fragmented datagram? */
 957         offset = ntohs(iph->frag_off);
 958         flags = offset & ~IP_OFFSET;
 959         offset &= IP_OFFSET;
 960         if (((flags & IP_MF) == 0) && (offset == 0)) 
 961         {
 962                 if (qp != NULL) 
 963                         ip_free(qp);    /* Huh? How could this exist?? */
 964                 return(skb);
 965         }
 966         offset <<= 3;           /* offset is in 8-byte chunks */
 967  
 968         /*
 969          * If the queue already existed, keep restarting its timer as long
 970          * as we still are receiving fragments.  Otherwise, create a fresh
 971          * queue entry.
 972          */
 973         if (qp != NULL) 
 974         {
 975                 del_timer(&qp->timer);
 976                 qp->timer.expires = IP_FRAG_TIME;       /* about 30 seconds     */
 977                 qp->timer.data = (unsigned long) qp;    /* pointer to queue     */
 978                 qp->timer.function = ip_expire;         /* expire function      */
 979                 add_timer(&qp->timer);
 980         } 
 981         else 
 982         {
 983                 if ((qp = ip_create(skb, iph, dev)) == NULL) 
 984                         return(NULL);
 985         }
 986  
 987         /* Determine the position of this fragment. */
 988         ihl = (iph->ihl * sizeof(unsigned long));
 989         end = offset + ntohs(iph->tot_len) - ihl;
 990  
 991         /* Point into the IP datagram 'data' part. */
 992         ptr = ((unsigned char *) (skb + 1)) + dev->hard_header_len + ihl;
 993  
 994         /* Is this the final fragment? */
 995         if ((flags & IP_MF) == 0) 
 996                 qp->len = end;
 997  
 998         /*
 999          * Find out which fragments are in front and at the back of us
1000          * in the chain of fragments so far.  We must know where to put
1001          * this fragment, right?
1002          */
1003         prev = NULL;
1004         for(next = qp->fragments; next != NULL; next = next->next) 
1005         {
1006                 if (next->offset > offset) 
1007                         break;  /* bingo! */
1008                 prev = next;
1009         }       
1010  
1011         /*
1012          * We found where to put this one.
1013          * Check for overlap with preceeding fragment, and, if needed,
1014          * align things so that any overlaps are eliminated.
1015          */
1016         if (prev != NULL && offset < prev->end) 
1017         {
1018                 i = prev->end - offset;
1019                 offset += i;    /* ptr into datagram */
1020                 ptr += i;       /* ptr into fragment data */
1021                 DPRINTF((DBG_IP, "IP: defrag: fixed low overlap %d bytes\n", i));
1022         }       
1023  
1024         /*
1025          * Look for overlap with succeeding segments.
1026          * If we can merge fragments, do it.
1027          */
1028    
1029         for(; next != NULL; next = tfp) 
1030         {
1031                 tfp = next->next;
1032                 if (next->offset >= end) 
1033                         break;          /* no overlaps at all */
1034  
1035                 i = end - next->offset;                 /* overlap is 'i' bytes */
1036                 next->len -= i;                         /* so reduce size of    */
1037                 next->offset += i;                      /* next fragment        */
1038                 next->ptr += i;
1039                 
1040                 /* If we get a frag size of <= 0, remove it. */
1041                 if (next->len <= 0) 
1042                 {
1043                         DPRINTF((DBG_IP, "IP: defrag: removing frag 0x%X (len %d)\n",
1044                                                         next, next->len));
1045                         if (next->prev != NULL) 
1046                                 next->prev->next = next->next;
1047                         else 
1048                                 qp->fragments = next->next;
1049                 
1050                         if (tfp->next != NULL) 
1051                                 next->next->prev = next->prev;
1052                         
1053                         kfree_s(next, sizeof(struct ipfrag));
1054                 }
1055                 DPRINTF((DBG_IP, "IP: defrag: fixed high overlap %d bytes\n", i));
1056         }
1057  
1058         /* Insert this fragment in the chain of fragments. */
1059         tfp = NULL;
1060         tfp = ip_frag_create(offset, end, skb, ptr);
1061         tfp->prev = prev;
1062         tfp->next = next;
1063         if (prev != NULL) 
1064                 prev->next = tfp;
1065         else 
1066                 qp->fragments = tfp;
1067    
1068         if (next != NULL) 
1069                 next->prev = tfp;
1070  
1071         /*
1072          * OK, so we inserted this new fragment into the chain.
1073          * Check if we now have a full IP datagram which we can
1074          * bump up to the IP layer...
1075          */
1076    
1077         if (ip_done(qp)) 
1078         {
1079                 skb2 = ip_glue(qp);             /* glue together the fragments */
1080                 return(skb2);
1081         }
1082         return(NULL);
1083  }
1084  
1085  
1086  /*
1087   * This IP datagram is too large to be sent in one piece.  Break it up into
1088   * smaller pieces (each of size equal to the MAC header plus IP header plus
1089   * a block of the data of the original IP data part) that will yet fit in a
1090   * single device frame, and queue such a frame for sending by calling the
1091   * ip_queue_xmit().  Note that this is recursion, and bad things will happen
1092   * if this function causes a loop...
1093   */
1094  
1095  void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int is_frag)
     /* [previous][next][first][last][top][bottom][index][help] */
1096  {
1097         struct iphdr *iph;
1098         unsigned char *raw;
1099         unsigned char *ptr;
1100         struct sk_buff *skb2;
1101         int left, mtu, hlen, len;
1102         int offset;
1103  
1104         /* Point into the IP datagram header. */
1105         raw = (unsigned char *) (skb + 1);
1106         iph = (struct iphdr *) (raw + dev->hard_header_len);
1107         
1108         /* Setup starting values. */
1109         hlen = (iph->ihl * sizeof(unsigned long));
1110         left = ntohs(iph->tot_len) - hlen;
1111         hlen += dev->hard_header_len;
1112         mtu = (dev->mtu - hlen);
1113         ptr = (raw + hlen);
1114         
1115         DPRINTF((DBG_IP, "IP: Fragmentation Desired\n"));
1116         DPRINTF((DBG_IP, "    DEV=%s, MTU=%d, LEN=%d SRC=%s",
1117                 dev->name, dev->mtu, left, in_ntoa(iph->saddr)));
1118         DPRINTF((DBG_IP, " DST=%s\n", in_ntoa(iph->daddr)));
1119  
1120         /* Check for any "DF" flag. */
1121         if (ntohs(iph->frag_off) & IP_DF) 
1122         {
1123                 DPRINTF((DBG_IP, "IP: Fragmentation Desired, but DF set !\n"));
1124                 DPRINTF((DBG_IP, "    DEV=%s, MTU=%d, LEN=%d SRC=%s",
1125                         dev->name, dev->mtu, left, in_ntoa(iph->saddr)));
1126                 DPRINTF((DBG_IP, " DST=%s\n", in_ntoa(iph->daddr)));
1127  
1128                 /*
1129                  * FIXME:
1130                  * We should send an ICMP warning message here!
1131                  */
1132                  
1133                 icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev); 
1134                 return;
1135         }
1136         
1137         /*
1138          *      If it won't fit then error it. 
1139          *      NOTE: We don't send a ICMP here. Suppose the ICMP didn't fit.... 
1140          */
1141            
1142         if(mtu<8)
1143         {
1144                 return;
1145         }
1146  
1147         /* Fragment the datagram. */
1148         if (is_frag & 2)
1149           offset = (ntohs(iph->frag_off) & 0x1fff) << 3;
1150         else
1151           offset = 0;
1152         while(left > 0) 
1153         {
1154                 len = left;
1155                 if (len+8 > mtu) 
1156                         len = (dev->mtu - hlen - 8);
1157                 if ((left - len) >= 8) 
1158                 {
1159                         len /= 8;
1160                         len *= 8;
1161                 }
1162                 DPRINTF((DBG_IP,"IP: frag: creating fragment of %d bytes (%d total)\n",
1163                                                         len, len + hlen));
1164  
1165                 /* Allocate buffer. */
1166                 if ((skb2 = alloc_skb(sizeof(struct sk_buff) + len + hlen,GFP_KERNEL)) == NULL) 
1167                 {
1168                         printk("IP: frag: no memory for new fragment!\n");
1169                         return;
1170                 }
1171                 skb2->arp = skb->arp;
1172                 skb2->free = skb->free;
1173                 skb2->len = len + hlen;
1174                 skb2->h.raw=(char *)(skb2+1);
1175  
1176                 if (sk) 
1177                         sk->wmem_alloc += skb2->mem_len;
1178  
1179                 /* Copy the packet header into the new buffer. */
1180                 memcpy(skb2->h.raw, raw, hlen);
1181  
1182                 /* Copy a block of the IP datagram. */
1183                 memcpy(skb2->h.raw + hlen, ptr, len);
1184                 left -= len;
1185 
1186                 skb2->h.raw+=dev->hard_header_len; 
1187                 /* Fill in the new header fields. */
1188                 iph = (struct iphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
1189                 iph->frag_off = htons((offset >> 3));
1190                 /* Added AC : If we are fragmenting a fragment thats not the
1191                    last fragment then keep MF on each bit */
1192                 if (left > 0 || (is_frag & 1)) 
1193                         iph->frag_off |= htons(IP_MF);
1194                 ptr += len;
1195                 offset += len;
1196 /*              printk("Queue frag\n");*/
1197  
1198                 /* Put this fragment into the sending queue. */
1199                 ip_queue_xmit(sk, dev, skb2, 1);
1200 /*              printk("Queued\n");*/
1201         }
1202  }
1203  
1204 
1205 
1206 #ifdef CONFIG_IP_FORWARD
1207 
1208 /* 
1209  *      Forward an IP datagram to its next destination. 
1210  */
1211  
1212 static void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag)
     /* [previous][next][first][last][top][bottom][index][help] */
1213 {
1214         struct device *dev2;
1215         struct iphdr *iph;
1216         struct sk_buff *skb2;
1217         struct rtable *rt;
1218         unsigned char *ptr;
1219         unsigned long raddr;
1220 
1221         /*
1222          * Only forward packets that were fired at us when we are in promiscuous
1223          * mode. In standard mode we rely on the driver to filter for us.
1224          */
1225    
1226         if(dev->flags&IFF_PROMISC)
1227         {
1228                 if(memcmp((char *)&skb[1],dev->dev_addr,dev->addr_len))
1229                         return;
1230         }
1231   
1232         /*
1233          * According to the RFC, we must first decrease the TTL field. If
1234          * that reaches zero, we must reply an ICMP control message telling
1235          * that the packet's lifetime expired. RFC791 page 30.
1236          */
1237         iph = skb->h.iph;
1238         iph->ttl--;
1239         if (iph->ttl <= 0) 
1240         {
1241                 DPRINTF((DBG_IP, "\nIP: *** datagram expired: TTL=0 (ignored) ***\n"));
1242                 DPRINTF((DBG_IP, "    SRC = %s   ", in_ntoa(iph->saddr)));
1243                 DPRINTF((DBG_IP, "    DST = %s (ignored)\n", in_ntoa(iph->daddr)));
1244         
1245                 /* Tell the sender its packet died... */
1246                 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, dev);
1247                 return;
1248         }
1249 
1250         /* Re-compute the IP header checksum. */
1251         ip_send_check(iph);
1252 
1253         /*
1254          * OK, the packet is still valid.  Fetch its destination address,
1255          * and give it to the IP sender for further processing.
1256          */
1257         rt = rt_route(iph->daddr, NULL);
1258         if (rt == NULL) 
1259         {
1260                 DPRINTF((DBG_IP, "\nIP: *** routing (phase I) failed ***\n"));
1261 
1262                 /* Tell the sender its packet cannot be delivered... */
1263                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, dev);
1264                 return;
1265         }
1266 
1267 
1268         /*
1269          * Gosh.  Not only is the packet valid; we even know how to
1270          * forward it onto its final destination.  Can we say this
1271          * is being plain lucky?
1272          * If the router told us that there is no GW, use the dest.
1273          * IP address itself- we seem to be connected directly...
1274          */
1275         raddr = rt->rt_gateway;
1276         if (raddr != 0) 
1277         {
1278                 rt = rt_route(raddr, NULL);
1279                 if (rt == NULL) 
1280                 {
1281                         DPRINTF((DBG_IP, "\nIP: *** routing (phase II) failed ***\n"));
1282 
1283                         /* Tell the sender its packet cannot be delivered... */
1284                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, dev);
1285                         return;
1286                 }
1287                 if (rt->rt_gateway != 0) 
1288                         raddr = rt->rt_gateway;
1289         } 
1290         else 
1291                 raddr = iph->daddr;
1292         dev2 = rt->rt_dev;
1293 
1294         /*
1295          * Never forward out on the same interface, its not allowed, its often not pretty either (except for on
1296          * source routing)
1297          */
1298         if (dev == dev2)
1299                 return;
1300         /*
1301          * We now allocate a new buffer, and copy the datagram into it.
1302          * If the indicated interface is up and running, kick it.
1303          */
1304         DPRINTF((DBG_IP, "\nIP: *** fwd %s -> ", in_ntoa(iph->saddr)));
1305         DPRINTF((DBG_IP, "%s (via %s), LEN=%d\n",
1306                         in_ntoa(raddr), dev2->name, skb->len));
1307 
1308         if (dev2->flags & IFF_UP) 
1309         {
1310                 skb2 = (struct sk_buff *) alloc_skb(sizeof(struct sk_buff) +
1311                        dev2->hard_header_len + skb->len, GFP_ATOMIC);
1312                 if (skb2 == NULL) 
1313                 {
1314                         printk("\nIP: No memory available for IP forward\n");
1315                         return;
1316                 }
1317                 ptr = (unsigned char *)(skb2 + 1);
1318                 skb2->sk = NULL;
1319                 skb2->free = 1;
1320                 skb2->len = skb->len + dev2->hard_header_len;
1321                 skb2->next = NULL;
1322                 skb2->h.raw = ptr;
1323 
1324                 /* Copy the packet data into the new buffer. */
1325                 memcpy(ptr + dev2->hard_header_len, skb->h.raw, skb->len);
1326                 
1327                 /* Now build the MAC header. */
1328                 (void) ip_send(skb2, raddr, skb->len, dev2, dev2->pa_addr);
1329 
1330                 if(skb2->len > dev2->mtu)
1331                 {
1332                         ip_fragment(NULL,skb2,dev2, is_frag);
1333                         kfree_skb(skb2,FREE_WRITE);
1334                 }
1335                 else
1336                         dev2->queue_xmit(skb2, dev2, SOPRI_NORMAL);
1337         }
1338 }
1339 
1340 
1341 #endif
1342 
1343 
1344 /*
1345  *      This function receives all incoming IP datagrams. 
1346  */
1347  
1348 int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
     /* [previous][next][first][last][top][bottom][index][help] */
1349 {
1350         struct iphdr *iph = skb->h.iph;
1351         unsigned char hash;
1352         unsigned char flag = 0;
1353         unsigned char opts_p = 0;       /* Set iff the packet has options. */
1354         struct inet_protocol *ipprot;
1355         struct options *opt=NULL;
1356 
1357         int brd;
1358         int is_frag=0;
1359 
1360         DPRINTF((DBG_IP, "<<\n"));
1361 
1362         /* Is the datagram acceptable? */
1363         if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0) 
1364         {
1365                 DPRINTF((DBG_IP, "\nIP: *** datagram error ***\n"));
1366                 DPRINTF((DBG_IP, "    SRC = %s   ", in_ntoa(iph->saddr)));
1367                 DPRINTF((DBG_IP, "    DST = %s (ignored)\n", in_ntoa(iph->daddr)));
1368                 skb->sk = NULL;
1369                 kfree_skb(skb, FREE_WRITE);
1370                 return(0);
1371         }
1372 
1373         if (iph->ihl != 5) 
1374         {       /* Fast path for the typical optionless IP packet. */
1375 #ifdef IP_DEBUG         
1376                 ip_print(iph);          /* Bogus, only for debugging. */
1377 #endif
1378                 if (do_options(iph, &opt,dev) != 0)
1379                 {
1380                         if(opt)
1381                                 kfree(opt);
1382                         return 0;
1383                 }
1384 /*              skb->ip_options=opt_ptr;*/
1385                 kfree(opt);
1386                 opt = NULL;
1387                 opts_p = 0/*1*/;
1388         }
1389 
1390         if (iph->frag_off & 0x0020)
1391                 is_frag|=1;
1392         if (ntohs(iph->frag_off) & 0x1fff)
1393                 is_frag|=2;
1394         
1395         /* Do any IP forwarding required.  chk_addr() is expensive -- avoid it someday. */
1396         if ((brd = chk_addr(iph->daddr)) == 0) 
1397         {
1398 #ifdef CONFIG_IP_FORWARD
1399                 ip_forward(skb, dev, is_frag);
1400 #else
1401                 printk("Machine %x tried to use us as a forwarder to %x but we have forwarding disabled!\n",
1402                         iph->saddr,iph->daddr);
1403 #endif                  
1404                 skb->sk = NULL;
1405                 kfree_skb(skb, FREE_WRITE);
1406                 return(0);
1407         }
1408 
1409   /*
1410    * Reassemble IP fragments. 
1411    */
1412 
1413         if(is_frag)
1414         {
1415 #ifdef CONFIG_IP_DEFRAG
1416                 skb=ip_defrag(iph,skb,dev);
1417                 if(skb==NULL)
1418                 {
1419                         return 0;
1420                 }
1421                 iph=skb->h.iph;
1422 #else
1423                 printk("\nIP: *** datagram fragmentation not yet implemented ***\n");
1424                 printk("    SRC = %s   ", in_ntoa(iph->saddr));
1425                 printk("    DST = %s (ignored)\n", in_ntoa(iph->daddr));
1426                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
1427                 skb->sk = NULL;
1428                 kfree_skb(skb, FREE_WRITE);
1429                 return(0);
1430 #endif
1431         }
1432 
1433 
1434         /* Point into the IP datagram, just past the header. */
1435         skb->ip_hdr = iph;
1436         skb->h.raw += iph->ihl*4;
1437         hash = iph->protocol & (MAX_INET_PROTOS -1);
1438         
1439         /* Find someone to deliver it too */
1440         
1441         for (ipprot = (struct inet_protocol *)inet_protos[hash];
1442                 ipprot != NULL;
1443                 ipprot=(struct inet_protocol *)ipprot->next)
1444         {
1445                 struct sk_buff *skb2;
1446 
1447                 if (ipprot->protocol != iph->protocol) 
1448                         continue;
1449                 DPRINTF((DBG_IP, "Using protocol = %X:\n", ipprot));
1450                 print_ipprot(ipprot);
1451 
1452        /*
1453         * See if we need to make a copy of it.  This will
1454         * only be set if more than one protocol wants it. 
1455         * and then not for the last one.
1456         */
1457                 if (ipprot->copy) 
1458                 {
1459                         skb2 = alloc_skb(skb->mem_len, GFP_ATOMIC);
1460                         if (skb2 == NULL) 
1461                                 continue;
1462                         memcpy(skb2, skb, skb->mem_len);
1463                         skb2->mem_addr = skb2;
1464                         skb2->h.raw = (unsigned char *)(
1465                                 (unsigned long)skb2 +
1466                                 (unsigned long) skb->h.raw -
1467                                 (unsigned long)skb);
1468                         skb2->free=1;
1469                 } 
1470                 else 
1471                 {
1472                         skb2 = skb;
1473                 }
1474                 flag = 1;
1475 
1476         /*
1477          * Pass on the datagram to each protocol that wants it,
1478          * based on the datagram protocol.  We should really
1479          * check the protocol handler's return values here...
1480          */
1481                 ipprot->handler(skb2, dev, opts_p ? opt : 0, iph->daddr,
1482                         (ntohs(iph->tot_len) - (iph->ihl * 4)),
1483                         iph->saddr, 0, ipprot);
1484 
1485         }
1486 
1487   /*
1488    * All protocols checked.
1489    * If this packet was a broadcast, we may *not* reply to it, since that
1490    * causes (proven, grin) ARP storms and a leakage of memory (i.e. all
1491    * ICMP reply messages get queued up for transmission...)
1492    */
1493         if (!flag) 
1494         {
1495                 if (brd != IS_BROADCAST)
1496                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
1497                 skb->sk = NULL;
1498                 kfree_skb(skb, FREE_WRITE);
1499         }
1500 
1501         return(0);
1502 }
1503 
1504 
1505 /*
1506  * Queues a packet to be sent, and starts the transmitter
1507  * if necessary.  if free = 1 then we free the block after
1508  * transmit, otherwise we don't.
1509  * This routine also needs to put in the total length, and
1510  * compute the checksum.
1511  */
1512 void ip_queue_xmit(struct sock *sk, struct device *dev, 
     /* [previous][next][first][last][top][bottom][index][help] */
1513               struct sk_buff *skb, int free)
1514 {
1515         struct iphdr *iph;
1516         unsigned char *ptr;
1517 
1518         if (sk == NULL)
1519                 free = 1;
1520         if (dev == NULL) 
1521         {
1522                 printk("IP: ip_queue_xmit dev = NULL\n");
1523                 return;
1524         }
1525         IS_SKB(skb);
1526         skb->free = free;
1527         skb->dev = dev;
1528         skb->when = jiffies;
1529   
1530         DPRINTF((DBG_IP, ">>\n"));
1531         ptr = (unsigned char *)(skb + 1);
1532         ptr += dev->hard_header_len;
1533         iph = (struct iphdr *)ptr;
1534         iph->tot_len = ntohs(skb->len-dev->hard_header_len);
1535         
1536         if(skb->len > dev->mtu)
1537         {
1538 /*              printk("Fragment!\n");*/
1539                 ip_fragment(sk,skb,dev,0);
1540                 IS_SKB(skb);
1541                 kfree_skb(skb,FREE_WRITE);
1542                 return;
1543         }
1544   
1545         ip_send_check(iph);
1546 #ifdef IP_DEBUG
1547         ip_print(iph);
1548 #endif
1549         skb->next = NULL;
1550 
1551         /* See if this is the one trashing our queue. Ross? */
1552         skb->magic = 1;
1553         if (!free) 
1554         {
1555                 skb->link3 = NULL;
1556                 sk->packets_out++;
1557                 cli();
1558                 if (sk->send_head == NULL) 
1559                 {
1560                         sk->send_tail = skb;
1561                         sk->send_head = skb;
1562                 } 
1563                 else 
1564                 {
1565                         /* See if we've got a problem. */
1566                         if (sk->send_tail == NULL) 
1567                         {
1568                                 printk("IP: ***bug sk->send_tail == NULL != sk->send_head\n");
1569                                 sort_send(sk);
1570                         } 
1571                         else 
1572                         {
1573                                 sk->send_tail->link3 = skb;
1574                                 sk->send_tail = skb;
1575                         }
1576                 }
1577                 sti();
1578                 reset_timer(sk, TIME_WRITE,
1579                 backoff(sk->backoff) * (2 * sk->mdev + sk->rtt));
1580         } 
1581         else 
1582         {
1583                 skb->sk = sk;
1584         }
1585 
1586         /* If the indicated interface is up and running, kick it. */
1587         if (dev->flags & IFF_UP) 
1588         {
1589                 if (sk != NULL) 
1590                 {
1591                         dev->queue_xmit(skb, dev, sk->priority);
1592                 } 
1593                 else 
1594                 {
1595                         dev->queue_xmit(skb, dev, SOPRI_NORMAL);
1596                 }
1597         } 
1598         else 
1599         {
1600                 if (free) kfree_skb(skb, FREE_WRITE);
1601         }
1602 }
1603 
1604 
1605 void ip_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
1606 {
1607         struct sk_buff * skb;
1608         struct proto *prot;
1609         struct device *dev;
1610 
1611         prot = sk->prot;
1612         skb = sk->send_head;
1613         while (skb != NULL) 
1614         {
1615                 dev = skb->dev;
1616                 /* I know this can't happen but as it does.. */
1617                 if(dev==NULL)
1618                 {
1619                         printk("ip_retransmit: NULL device bug!\n");
1620                         goto oops;
1621                 }
1622         
1623                 IS_SKB(skb);
1624         
1625                 /*
1626                  * The rebuild_header function sees if the ARP is done.
1627                  * If not it sends a new ARP request, and if so it builds
1628                  * the header.
1629                  */
1630                 cli();  /* We might get interrupted by an arp reply here and fill
1631                            the frame in twice. Because of the technique used this
1632                            would be a little sad */
1633                 if (!skb->arp) 
1634                 {
1635                         if (dev->rebuild_header(skb+1, dev)) 
1636                         {
1637                                 sti();  /* Failed to rebuild - next */
1638                                 if (!all) 
1639                                         break;
1640                                 skb = (struct sk_buff *)skb->link3;
1641                                 continue;
1642                         }
1643                 }
1644                 skb->arp = 1;
1645                 sti();
1646                 skb->when = jiffies;
1647         
1648                 /* If the interface is (still) up and running, kick it. */
1649                 if (dev->flags & IFF_UP) 
1650                 {
1651                         if (sk) 
1652                                 dev->queue_xmit(skb, dev, sk->priority);
1653                 }
1654 
1655 oops:           sk->retransmits++;
1656                 sk->prot->retransmits ++;
1657                 if (!all) 
1658                         break;
1659 
1660                 /* This should cut it off before we send too many packets. */
1661                 if (sk->retransmits > sk->cong_window) 
1662                         break;
1663                 skb = (struct sk_buff *)skb->link3;
1664         }
1665 
1666   /*
1667    * Increase the RTT time every time we retransmit. 
1668    * This will cause exponential back off on how hard we try to
1669    * get through again.  Once we get through, the rtt will settle
1670    * back down reasonably quickly.
1671    */
1672         sk->backoff++;
1673         reset_timer(sk, TIME_WRITE, backoff(sk->backoff) * (2 * sk->mdev + sk->rtt));
1674 }       
1675 
1676 /*
1677  *      Backoff function - the subject of much research 
1678  */
1679 
1680 int backoff(int n)
     /* [previous][next][first][last][top][bottom][index][help] */
1681 {
1682         /* Use binary exponential up to retry #4, and quadratic after that
1683          * This yields the sequence
1684          * 1, 2, 4, 8, 16, 25, 36, 49, 64, 81, 100 ...
1685          */
1686 
1687         if(n<0)
1688         {
1689                 printk("Backoff < 0!\n");
1690                 return 16;      /* Make up a value */
1691         }
1692         
1693         if(n <= 4)
1694                 return 1 << n;  /* Binary exponential back off */
1695         else
1696         {
1697                 if(n<255)
1698                         return n * n;   /* Quadratic back off */
1699                 else
1700                 {
1701                         printk("Overloaded backoff!\n");
1702                         return 255*255;
1703                 }
1704         }
1705 }
1706 
1707 
1708 /*
1709  *      Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
1710  *      an IP socket.
1711  */
1712  
1713 int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
1714 {
1715         int val,err;
1716         
1717         if (optval == NULL) 
1718                 return(-EINVAL);
1719 
1720         err=verify_area(VERIFY_READ, optval, sizeof(int));
1721         if(err)
1722                 return err;
1723         
1724         val = get_fs_long((unsigned long *)optval);
1725 
1726         if(level!=SOL_IP)
1727                 return -EOPNOTSUPP;
1728 
1729         switch(optname)
1730         {
1731                 case IP_TOS:
1732                         if(val<0||val>255)
1733                                 return -EINVAL;
1734                         sk->ip_tos=val;
1735                         return 0;
1736                 case IP_TTL:
1737                         if(val<1||val<255)
1738                                 return -EINVAL;
1739                         sk->ip_ttl=val;
1740                         return 0;
1741                 /* IP_OPTIONS and friends go here eventually */
1742                 default:
1743                         return(-ENOPROTOOPT);
1744         }
1745 }
1746 
1747 int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
1748 {
1749         int val,err;
1750         
1751         if(level!=SOL_IP)
1752                 return -EOPNOTSUPP;
1753                 
1754         switch(optname)
1755         {
1756                 case IP_TOS:
1757                         val=sk->ip_tos;
1758                         break;
1759                 case IP_TTL:
1760                         val=sk->ip_ttl;
1761                         break;
1762                 default:
1763                         return(-ENOPROTOOPT);
1764         }
1765         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
1766         if(err)
1767                 return err;
1768         put_fs_long(sizeof(int),(unsigned long *) optlen);
1769 
1770         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
1771         if(err)
1772                 return err;
1773         put_fs_long(val,(unsigned long *)optval);
1774 
1775         return(0);
1776 }       

/* [previous][next][first][last][top][bottom][index][help] */