root/net/ipv4/ip.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip_ioctl
  2. ip_send
  3. ip_build_header
  4. ip_send_check
  5. ip_frag_create
  6. ip_find
  7. ip_free
  8. ip_expire
  9. ip_create
  10. ip_done
  11. ip_glue
  12. ip_defrag
  13. ip_fragment
  14. ip_forward
  15. ip_rcv
  16. ip_loopback
  17. ip_queue_xmit
  18. ip_mc_procinfo
  19. ip_mc_find_devfor
  20. ip_setsockopt
  21. ip_getsockopt
  22. ip_build_xmit
  23. ip_rt_event
  24. ip_init

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              The Internet Protocol (IP) module.
   7  *
   8  * Version:     @(#)ip.c        1.0.16b 9/1/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Donald Becker, <becker@super.org>
  13  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  14  *              Richard Underwood
  15  *              Stefan Becker, <stefanb@yello.ping.de>
  16  *              Jorge Cwik, <jorge@laser.satlink.net>
  17  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  18  *              
  19  *
  20  * Fixes:
  21  *              Alan Cox        :       Commented a couple of minor bits of surplus code
  22  *              Alan Cox        :       Undefining IP_FORWARD doesn't include the code
  23  *                                      (just stops a compiler warning).
  24  *              Alan Cox        :       Frames with >=MAX_ROUTE record routes, strict routes or loose routes
  25  *                                      are junked rather than corrupting things.
  26  *              Alan Cox        :       Frames to bad broadcast subnets are dumped
  27  *                                      We used to process them non broadcast and
  28  *                                      boy could that cause havoc.
  29  *              Alan Cox        :       ip_forward sets the free flag on the
  30  *                                      new frame it queues. Still crap because
  31  *                                      it copies the frame but at least it
  32  *                                      doesn't eat memory too.
  33  *              Alan Cox        :       Generic queue code and memory fixes.
  34  *              Fred Van Kempen :       IP fragment support (borrowed from NET2E)
  35  *              Gerhard Koerting:       Forward fragmented frames correctly.
  36  *              Gerhard Koerting:       Fixes to my fix of the above 8-).
  37  *              Gerhard Koerting:       IP interface addressing fix.
  38  *              Linus Torvalds  :       More robustness checks
  39  *              Alan Cox        :       Even more checks: Still not as robust as it ought to be
  40  *              Alan Cox        :       Save IP header pointer for later
  41  *              Alan Cox        :       ip option setting
  42  *              Alan Cox        :       Use ip_tos/ip_ttl settings
  43  *              Alan Cox        :       Fragmentation bogosity removed
  44  *                                      (Thanks to Mark.Bush@prg.ox.ac.uk)
  45  *              Dmitry Gorodchanin :    Send of a raw packet crash fix.
  46  *              Alan Cox        :       Silly ip bug when an overlength
  47  *                                      fragment turns up. Now frees the
  48  *                                      queue.
  49  *              Linus Torvalds/ :       Memory leakage on fragmentation
  50  *              Alan Cox        :       handling.
  51  *              Gerhard Koerting:       Forwarding uses IP priority hints
  52  *              Teemu Rantanen  :       Fragment problems.
  53  *              Alan Cox        :       General cleanup, comments and reformat
  54  *              Alan Cox        :       SNMP statistics
  55  *              Alan Cox        :       BSD address rule semantics. Also see
  56  *                                      UDP as there is a nasty checksum issue
  57  *                                      if you do things the wrong way.
  58  *              Alan Cox        :       Always defrag, moved IP_FORWARD to the config.in file
  59  *              Alan Cox        :       IP options adjust sk->priority.
  60  *              Pedro Roque     :       Fix mtu/length error in ip_forward.
  61  *              Alan Cox        :       Avoid ip_chk_addr when possible.
  62  *      Richard Underwood       :       IP multicasting.
  63  *              Alan Cox        :       Cleaned up multicast handlers.
  64  *              Alan Cox        :       RAW sockets demultiplex in the BSD style.
  65  *              Gunther Mayer   :       Fix the SNMP reporting typo
  66  *              Alan Cox        :       Always in group 224.0.0.1
  67  *      Pauline Middelink       :       Fast ip_checksum update when forwarding
  68  *                                      Masquerading support.
  69  *              Alan Cox        :       Multicast loopback error for 224.0.0.1
  70  *              Alan Cox        :       IP_MULTICAST_LOOP option.
  71  *              Alan Cox        :       Use notifiers.
  72  *              Bjorn Ekwall    :       Removed ip_csum (from slhc.c too)
  73  *              Bjorn Ekwall    :       Moved ip_fast_csum to ip.h (inline!)
  74  *              Stefan Becker   :       Send out ICMP HOST REDIRECT
  75  *      Arnt Gulbrandsen        :       ip_build_xmit
  76  *              Alan Cox        :       Per socket routing cache
  77  *              Alan Cox        :       Fixed routing cache, added header cache.
  78  *              Alan Cox        :       Loopback didnt work right in original ip_build_xmit - fixed it.
  79  *              Alan Cox        :       Only send ICMP_REDIRECT if src/dest are the same net.
  80  *              Alan Cox        :       Incoming IP option handling.
  81  *              Alan Cox        :       Set saddr on raw output frames as per BSD.
  82  *              Alan Cox        :       Stopped broadcast source route explosions.
  83  *              Alan Cox        :       Can disable source routing
  84  *              Takeshi Sone    :       Masquerading didn't work.
  85  *      Dave Bonn,Alan Cox      :       Faster IP forwarding whenever possible.
  86  *              Alan Cox        :       Memory leaks, tramples, misc debugging.
  87  *              Alan Cox        :       Fixed multicast (by popular demand 8))
  88  *              Alan Cox        :       Fixed forwarding (by even more popular demand 8))
  89  *              Alan Cox        :       Fixed SNMP statistics [I think]
  90  *
  91  *  
  92  *
  93  * To Fix:
  94  *              IP option processing is mostly not needed. ip_forward needs to know about routing rules
  95  *              and time stamp but that's about all. Use the route mtu field here too
  96  *              IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
  97  *              and could be made very efficient with the addition of some virtual memory hacks to permit
  98  *              the allocation of a buffer that can then be 'grown' by twiddling page tables.
  99  *              Output fragmentation wants updating along with the buffer management to use a single 
 100  *              interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
 101  *              output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
 102  *              fragmentation anyway.
 103  *
 104  *              FIXME: copy frag 0 iph to qp->iph
 105  *
 106  *              This program is free software; you can redistribute it and/or
 107  *              modify it under the terms of the GNU General Public License
 108  *              as published by the Free Software Foundation; either version
 109  *              2 of the License, or (at your option) any later version.
 110  */
 111 
 112 #include <asm/segment.h>
 113 #include <asm/system.h>
 114 #include <linux/types.h>
 115 #include <linux/kernel.h>
 116 #include <linux/sched.h>
 117 #include <linux/mm.h>
 118 #include <linux/string.h>
 119 #include <linux/errno.h>
 120 #include <linux/config.h>
 121 
 122 #include <linux/socket.h>
 123 #include <linux/sockios.h>
 124 #include <linux/in.h>
 125 #include <linux/inet.h>
 126 #include <linux/netdevice.h>
 127 #include <linux/etherdevice.h>
 128 #include <linux/proc_fs.h>
 129 
 130 #include <net/snmp.h>
 131 #include <net/ip.h>
 132 #include <net/protocol.h>
 133 #include <net/route.h>
 134 #include <net/tcp.h>
 135 #include <net/udp.h>
 136 #include <linux/skbuff.h>
 137 #include <net/sock.h>
 138 #include <net/arp.h>
 139 #include <net/icmp.h>
 140 #include <net/raw.h>
 141 #include <net/checksum.h>
 142 #include <linux/igmp.h>
 143 #include <linux/ip_fw.h>
 144 
 145 #define CONFIG_IP_DEFRAG
 146 
 147 extern int last_retran;
 148 extern void sort_send(struct sock *sk);
 149 
 150 #define min(a,b)        ((a)<(b)?(a):(b))
 151 
 152 /*
 153  *      SNMP management statistics
 154  */
 155 
 156 #ifdef CONFIG_IP_FORWARD
 157 struct ip_mib ip_statistics={1,64,};    /* Forwarding=Yes, Default TTL=64 */
 158 #else
 159 struct ip_mib ip_statistics={2,64,};    /* Forwarding=No, Default TTL=64 */
 160 #endif
 161 
 162 /*
 163  *      Handle the issuing of an ioctl() request
 164  *      for the ip device. This is scheduled to
 165  *      disappear
 166  */
 167 
 168 int ip_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 169 {
 170         switch(cmd)
 171         {
 172                 default:
 173                         return(-EINVAL);
 174         }
 175 }
 176 
 177 
 178 /*
 179  *      Take an skb, and fill in the MAC header.
 180  */
 181 
 182 static int ip_send(struct sk_buff *skb, unsigned long daddr, int len, struct device *dev, unsigned long saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 183 {
 184         int mac = 0;
 185 
 186         skb->dev = dev;
 187         skb->arp = 1;
 188         if (dev->hard_header)
 189         {
 190                 /*
 191                  *      Build a hardware header. Source address is our mac, destination unknown
 192                  *      (rebuild header will sort this out)
 193                  */
 194                 skb_reserve(skb,(dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */
 195                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 196                 if (mac < 0)
 197                 {
 198                         mac = -mac;
 199                         skb->arp = 0;
 200                         skb->raddr = daddr;     /* next routing address */
 201                 }
 202         }
 203         return mac;
 204 }
 205 
 206 int ip_id_count = 0;
 207 
 208 /*
 209  * This routine builds the appropriate hardware/IP headers for
 210  * the routine.  It assumes that if *dev != NULL then the
 211  * protocol knows what it's doing, otherwise it uses the
 212  * routing/ARP tables to select a device struct.
 213  */
 214 int ip_build_header(struct sk_buff *skb, unsigned long saddr, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 215                 struct device **dev, int type, struct options *opt, int len, int tos, int ttl)
 216 {
 217         struct rtable *rt;
 218         unsigned long raddr;
 219         int tmp;
 220         unsigned long src;
 221         struct iphdr *iph;
 222 
 223         /*
 224          *      See if we need to look up the device.
 225          */
 226 
 227 #ifdef CONFIG_IP_MULTICAST      
 228         if(MULTICAST(daddr) && *dev==NULL && skb->sk && *skb->sk->ip_mc_name)
 229                 *dev=dev_get(skb->sk->ip_mc_name);
 230 #endif
 231         if (*dev == NULL)
 232         {
 233                 if(skb->localroute)
 234                         rt = ip_rt_local(daddr, NULL, &src);
 235                 else
 236                         rt = ip_rt_route(daddr, NULL, &src);
 237                 if (rt == NULL)
 238                 {
 239                         ip_statistics.IpOutNoRoutes++;
 240                         return(-ENETUNREACH);
 241                 }
 242 
 243                 *dev = rt->rt_dev;
 244                 /*
 245                  *      If the frame is from us and going off machine it MUST MUST MUST
 246                  *      have the output device ip address and never the loopback
 247                  */
 248                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 249                         saddr = src;/*rt->rt_dev->pa_addr;*/
 250                 raddr = rt->rt_gateway;
 251 
 252         }
 253         else
 254         {
 255                 /*
 256                  *      We still need the address of the first hop.
 257                  */
 258                 if(skb->localroute)
 259                         rt = ip_rt_local(daddr, NULL, &src);
 260                 else
 261                         rt = ip_rt_route(daddr, NULL, &src);
 262                 /*
 263                  *      If the frame is from us and going off machine it MUST MUST MUST
 264                  *      have the output device ip address and never the loopback
 265                  */
 266                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 267                         saddr = src;/*rt->rt_dev->pa_addr;*/
 268 
 269                 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
 270         }
 271 
 272         /*
 273          *      No source addr so make it our addr
 274          */
 275         if (saddr == 0)
 276                 saddr = src;
 277 
 278         /*
 279          *      No gateway so aim at the real destination
 280          */
 281         if (raddr == 0)
 282                 raddr = daddr;
 283 
 284         /*
 285          *      Now build the MAC header.
 286          */
 287 
 288         tmp = ip_send(skb, raddr, len, *dev, saddr);
 289 
 290         /*
 291          *      Book keeping
 292          */
 293 
 294         skb->dev = *dev;
 295         skb->saddr = saddr;
 296         if (skb->sk)
 297                 skb->sk->saddr = saddr;
 298 
 299         /*
 300          *      Now build the IP header.
 301          */
 302 
 303         /*
 304          *      If we are using IPPROTO_RAW, then we don't need an IP header, since
 305          *      one is being supplied to us by the user
 306          */
 307 
 308         if(type == IPPROTO_RAW)
 309                 return (tmp);
 310 
 311         /*
 312          *      Build the IP addresses
 313          */
 314          
 315         iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr));
 316 
 317         iph->version  = 4;
 318         iph->ihl      = 5;
 319         iph->tos      = tos;
 320         iph->frag_off = 0;
 321         iph->ttl      = ttl;
 322         iph->daddr    = daddr;
 323         iph->saddr    = saddr;
 324         iph->protocol = type;
 325         skb->ip_hdr   = iph;
 326 
 327         return(20 + tmp);       /* IP header plus MAC header size */
 328 }
 329 
 330 
 331 /*
 332  *      Generate a checksum for an outgoing IP datagram.
 333  */
 334 
 335 void ip_send_check(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 336 {
 337         iph->check = 0;
 338         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 339 }
 340 
 341 /************************ Fragment Handlers From NET2E **********************************/
 342 
 343 
 344 /*
 345  *      This fragment handler is a bit of a heap. On the other hand it works quite
 346  *      happily and handles things quite well.
 347  */
 348 
 349 static struct ipq *ipqueue = NULL;              /* IP fragment queue    */
 350 
 351 /*
 352  *      Create a new fragment entry.
 353  */
 354 
 355 static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, unsigned char *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
 356 {
 357         struct ipfrag *fp;
 358 
 359         fp = (struct ipfrag *) kmalloc(sizeof(struct ipfrag), GFP_ATOMIC);
 360         if (fp == NULL)
 361         {
 362                 NETDEBUG(printk("IP: frag_create: no memory left !\n"));
 363                 return(NULL);
 364         }
 365         memset(fp, 0, sizeof(struct ipfrag));
 366 
 367         /* Fill in the structure. */
 368         fp->offset = offset;
 369         fp->end = end;
 370         fp->len = end - offset;
 371         fp->skb = skb;
 372         fp->ptr = ptr;
 373 
 374         return(fp);
 375 }
 376 
 377 
 378 /*
 379  *      Find the correct entry in the "incomplete datagrams" queue for
 380  *      this IP datagram, and return the queue entry address if found.
 381  */
 382 
 383 static struct ipq *ip_find(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 384 {
 385         struct ipq *qp;
 386         struct ipq *qplast;
 387 
 388         cli();
 389         qplast = NULL;
 390         for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
 391         {
 392                 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
 393                         iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
 394                 {
 395                         del_timer(&qp->timer);  /* So it doesn't vanish on us. The timer will be reset anyway */
 396                         sti();
 397                         return(qp);
 398                 }
 399         }
 400         sti();
 401         return(NULL);
 402 }
 403 
 404 
 405 /*
 406  *      Remove an entry from the "incomplete datagrams" queue, either
 407  *      because we completed, reassembled and processed it, or because
 408  *      it timed out.
 409  */
 410 
 411 static void ip_free(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 412 {
 413         struct ipfrag *fp;
 414         struct ipfrag *xp;
 415 
 416         /*
 417          * Stop the timer for this entry.
 418          */
 419 
 420         del_timer(&qp->timer);
 421 
 422         /* Remove this entry from the "incomplete datagrams" queue. */
 423         cli();
 424         if (qp->prev == NULL)
 425         {
 426                 ipqueue = qp->next;
 427                 if (ipqueue != NULL)
 428                         ipqueue->prev = NULL;
 429         }
 430         else
 431         {
 432                 qp->prev->next = qp->next;
 433                 if (qp->next != NULL)
 434                         qp->next->prev = qp->prev;
 435         }
 436 
 437         /* Release all fragment data. */
 438 
 439         fp = qp->fragments;
 440         while (fp != NULL)
 441         {
 442                 xp = fp->next;
 443                 IS_SKB(fp->skb);
 444                 kfree_skb(fp->skb,FREE_READ);
 445                 kfree_s(fp, sizeof(struct ipfrag));
 446                 fp = xp;
 447         }
 448 
 449         /* Release the IP header. */
 450         kfree_s(qp->iph, 64 + 8);
 451 
 452         /* Finally, release the queue descriptor itself. */
 453         kfree_s(qp, sizeof(struct ipq));
 454         sti();
 455 }
 456 
 457 
 458 /*
 459  *      Oops- a fragment queue timed out.  Kill it and send an ICMP reply.
 460  */
 461 
 462 static void ip_expire(unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 463 {
 464         struct ipq *qp;
 465 
 466         qp = (struct ipq *)arg;
 467 
 468         /*
 469          *      Send an ICMP "Fragment Reassembly Timeout" message.
 470          */
 471 
 472         ip_statistics.IpReasmTimeout++;
 473         ip_statistics.IpReasmFails++;   
 474         /* This if is always true... shrug */
 475         if(qp->fragments!=NULL)
 476                 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
 477                                 ICMP_EXC_FRAGTIME, 0, qp->dev);
 478 
 479         /*
 480          *      Nuke the fragment queue.
 481          */
 482         ip_free(qp);
 483 }
 484 
 485 
 486 /*
 487  *      Add an entry to the 'ipq' queue for a newly received IP datagram.
 488  *      We will (hopefully :-) receive all other fragments of this datagram
 489  *      in time, so we just create a queue for this datagram, in which we
 490  *      will insert the received fragments at their respective positions.
 491  */
 492 
 493 static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 494 {
 495         struct ipq *qp;
 496         int ihlen;
 497 
 498         qp = (struct ipq *) kmalloc(sizeof(struct ipq), GFP_ATOMIC);
 499         if (qp == NULL)
 500         {
 501                 NETDEBUG(printk("IP: create: no memory left !\n"));
 502                 return(NULL);
 503                 skb->dev = qp->dev;
 504         }
 505         memset(qp, 0, sizeof(struct ipq));
 506 
 507         /*
 508          *      Allocate memory for the IP header (plus 8 octets for ICMP).
 509          */
 510 
 511         ihlen = iph->ihl * 4;
 512         qp->iph = (struct iphdr *) kmalloc(64 + 8, GFP_ATOMIC);
 513         if (qp->iph == NULL)
 514         {
 515                 NETDEBUG(printk("IP: create: no memory left !\n"));
 516                 kfree_s(qp, sizeof(struct ipq));
 517                 return(NULL);
 518         }
 519 
 520         memcpy(qp->iph, iph, ihlen + 8);
 521         qp->len = 0;
 522         qp->ihlen = ihlen;
 523         qp->fragments = NULL;
 524         qp->dev = dev;
 525 
 526         /* Start a timer for this entry. */
 527         qp->timer.expires = jiffies + IP_FRAG_TIME;     /* about 30 seconds     */
 528         qp->timer.data = (unsigned long) qp;            /* pointer to queue     */
 529         qp->timer.function = ip_expire;                 /* expire function      */
 530         add_timer(&qp->timer);
 531 
 532         /* Add this entry to the queue. */
 533         qp->prev = NULL;
 534         cli();
 535         qp->next = ipqueue;
 536         if (qp->next != NULL)
 537                 qp->next->prev = qp;
 538         ipqueue = qp;
 539         sti();
 540         return(qp);
 541 }
 542 
 543 
 544 /*
 545  *      See if a fragment queue is complete.
 546  */
 547 
 548 static int ip_done(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 549 {
 550         struct ipfrag *fp;
 551         int offset;
 552 
 553         /* Only possible if we received the final fragment. */
 554         if (qp->len == 0)
 555                 return(0);
 556 
 557         /* Check all fragment offsets to see if they connect. */
 558         fp = qp->fragments;
 559         offset = 0;
 560         while (fp != NULL)
 561         {
 562                 if (fp->offset > offset)
 563                         return(0);      /* fragment(s) missing */
 564                 offset = fp->end;
 565                 fp = fp->next;
 566         }
 567 
 568         /* All fragments are present. */
 569         return(1);
 570 }
 571 
 572 
 573 /*
 574  *      Build a new IP datagram from all its fragments.
 575  *
 576  *      FIXME: We copy here because we lack an effective way of handling lists
 577  *      of bits on input. Until the new skb data handling is in I'm not going
 578  *      to touch this with a bargepole. 
 579  */
 580 
 581 static struct sk_buff *ip_glue(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 582 {
 583         struct sk_buff *skb;
 584         struct iphdr *iph;
 585         struct ipfrag *fp;
 586         unsigned char *ptr;
 587         int count, len;
 588 
 589         /*
 590          *      Allocate a new buffer for the datagram.
 591          */
 592         len = qp->ihlen + qp->len;
 593 
 594         if ((skb = dev_alloc_skb(len)) == NULL)
 595         {
 596                 ip_statistics.IpReasmFails++;
 597                 NETDEBUG(printk("IP: queue_glue: no memory for gluing queue %p\n", qp));
 598                 ip_free(qp);
 599                 return(NULL);
 600         }
 601 
 602         /* Fill in the basic details. */
 603         skb_put(skb,len);
 604         skb->h.raw = skb->data;
 605         skb->free = 1;
 606 
 607         /* Copy the original IP headers into the new buffer. */
 608         ptr = (unsigned char *) skb->h.raw;
 609         memcpy(ptr, ((unsigned char *) qp->iph), qp->ihlen);
 610         ptr += qp->ihlen;
 611 
 612         count = 0;
 613 
 614         /* Copy the data portions of all fragments into the new buffer. */
 615         fp = qp->fragments;
 616         while(fp != NULL)
 617         {
 618                 if(count+fp->len > skb->len)
 619                 {
 620                         NETDEBUG(printk("Invalid fragment list: Fragment over size.\n"));
 621                         ip_free(qp);
 622                         kfree_skb(skb,FREE_WRITE);
 623                         ip_statistics.IpReasmFails++;
 624                         return NULL;
 625                 }
 626                 memcpy((ptr + fp->offset), fp->ptr, fp->len);
 627                 count += fp->len;
 628                 fp = fp->next;
 629         }
 630 
 631         /* We glued together all fragments, so remove the queue entry. */
 632         ip_free(qp);
 633 
 634         /* Done with all fragments. Fixup the new IP header. */
 635         iph = skb->h.iph;
 636         iph->frag_off = 0;
 637         iph->tot_len = htons((iph->ihl * 4) + count);
 638         skb->ip_hdr = iph;
 639 
 640         ip_statistics.IpReasmOKs++;
 641         return(skb);
 642 }
 643 
 644 
 645 /*
 646  *      Process an incoming IP datagram fragment.
 647  */
 648 
 649 static struct sk_buff *ip_defrag(struct iphdr *iph, struct sk_buff *skb, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 650 {
 651         struct ipfrag *prev, *next, *tmp;
 652         struct ipfrag *tfp;
 653         struct ipq *qp;
 654         struct sk_buff *skb2;
 655         unsigned char *ptr;
 656         int flags, offset;
 657         int i, ihl, end;
 658 
 659         ip_statistics.IpReasmReqds++;
 660 
 661         /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
 662         qp = ip_find(iph);
 663 
 664         /* Is this a non-fragmented datagram? */
 665         offset = ntohs(iph->frag_off);
 666         flags = offset & ~IP_OFFSET;
 667         offset &= IP_OFFSET;
 668         if (((flags & IP_MF) == 0) && (offset == 0))
 669         {
 670                 if (qp != NULL)
 671                         ip_free(qp);    /* Huh? How could this exist?? */
 672                 return(skb);
 673         }
 674 
 675         offset <<= 3;           /* offset is in 8-byte chunks */
 676 
 677         /*
 678          * If the queue already existed, keep restarting its timer as long
 679          * as we still are receiving fragments.  Otherwise, create a fresh
 680          * queue entry.
 681          */
 682 
 683         if (qp != NULL)
 684         {
 685                 del_timer(&qp->timer);
 686                 qp->timer.expires = jiffies + IP_FRAG_TIME;     /* about 30 seconds */
 687                 qp->timer.data = (unsigned long) qp;    /* pointer to queue */
 688                 qp->timer.function = ip_expire;         /* expire function */
 689                 add_timer(&qp->timer);
 690         }
 691         else
 692         {
 693                 /*
 694                  *      If we failed to create it, then discard the frame
 695                  */
 696                 if ((qp = ip_create(skb, iph, dev)) == NULL)
 697                 {
 698                         skb->sk = NULL;
 699                         kfree_skb(skb, FREE_READ);
 700                         ip_statistics.IpReasmFails++;
 701                         return NULL;
 702                 }
 703         }
 704 
 705         /*
 706          *      Determine the position of this fragment.
 707          */
 708 
 709         ihl = iph->ihl * 4;
 710         end = offset + ntohs(iph->tot_len) - ihl;
 711 
 712         /*
 713          *      Point into the IP datagram 'data' part.
 714          */
 715 
 716         ptr = skb->data + ihl;
 717 
 718         /*
 719          *      Is this the final fragment?
 720          */
 721 
 722         if ((flags & IP_MF) == 0)
 723                 qp->len = end;
 724 
 725         /*
 726          *      Find out which fragments are in front and at the back of us
 727          *      in the chain of fragments so far.  We must know where to put
 728          *      this fragment, right?
 729          */
 730 
 731         prev = NULL;
 732         for(next = qp->fragments; next != NULL; next = next->next)
 733         {
 734                 if (next->offset > offset)
 735                         break;  /* bingo! */
 736                 prev = next;
 737         }
 738 
 739         /*
 740          *      We found where to put this one.
 741          *      Check for overlap with preceding fragment, and, if needed,
 742          *      align things so that any overlaps are eliminated.
 743          */
 744         if (prev != NULL && offset < prev->end)
 745         {
 746                 i = prev->end - offset;
 747                 offset += i;    /* ptr into datagram */
 748                 ptr += i;       /* ptr into fragment data */
 749         }
 750 
 751         /*
 752          * Look for overlap with succeeding segments.
 753          * If we can merge fragments, do it.
 754          */
 755 
 756         for(tmp=next; tmp != NULL; tmp = tfp)
 757         {
 758                 tfp = tmp->next;
 759                 if (tmp->offset >= end)
 760                         break;          /* no overlaps at all */
 761 
 762                 i = end - next->offset;                 /* overlap is 'i' bytes */
 763                 tmp->len -= i;                          /* so reduce size of    */
 764                 tmp->offset += i;                       /* next fragment        */
 765                 tmp->ptr += i;
 766                 /*
 767                  *      If we get a frag size of <= 0, remove it and the packet
 768                  *      that it goes with.
 769                  */
 770                 if (tmp->len <= 0)
 771                 {
 772                         if (tmp->prev != NULL)
 773                                 tmp->prev->next = tmp->next;
 774                         else
 775                                 qp->fragments = tmp->next;
 776 
 777                         if (tfp->next != NULL)
 778                                 tmp->next->prev = tmp->prev;
 779                         
 780                         next=tfp;       /* We have killed the original next frame */
 781 
 782                         kfree_skb(tmp->skb,FREE_READ);
 783                         kfree_s(tmp, sizeof(struct ipfrag));
 784                 }
 785         }
 786 
 787         /*
 788          *      Insert this fragment in the chain of fragments.
 789          */
 790 
 791         tfp = NULL;
 792         tfp = ip_frag_create(offset, end, skb, ptr);
 793 
 794         /*
 795          *      No memory to save the fragment - so throw the lot
 796          */
 797 
 798         if (!tfp)
 799         {
 800                 skb->sk = NULL;
 801                 kfree_skb(skb, FREE_READ);
 802                 return NULL;
 803         }
 804         tfp->prev = prev;
 805         tfp->next = next;
 806         if (prev != NULL)
 807                 prev->next = tfp;
 808         else
 809                 qp->fragments = tfp;
 810 
 811         if (next != NULL)
 812                 next->prev = tfp;
 813 
 814         /*
 815          *      OK, so we inserted this new fragment into the chain.
 816          *      Check if we now have a full IP datagram which we can
 817          *      bump up to the IP layer...
 818          */
 819 
 820         if (ip_done(qp))
 821         {
 822                 skb2 = ip_glue(qp);             /* glue together the fragments */
 823                 return(skb2);
 824         }
 825         return(NULL);
 826 }
 827 
 828 
 829 /*
 830  *      This IP datagram is too large to be sent in one piece.  Break it up into
 831  *      smaller pieces (each of size equal to the MAC header plus IP header plus
 832  *      a block of the data of the original IP data part) that will yet fit in a
 833  *      single device frame, and queue such a frame for sending by calling the
 834  *      ip_queue_xmit().  Note that this is recursion, and bad things will happen
 835  *      if this function causes a loop...
 836  *
 837  *      Yes this is inefficient, feel free to submit a quicker one.
 838  *
 839  *      **Protocol Violation**
 840  *      We copy all the options to each fragment. !FIXME!
 841  */
 842  
 843 void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int is_frag)
     /* [previous][next][first][last][top][bottom][index][help] */
 844 {
 845         struct iphdr *iph;
 846         unsigned char *raw;
 847         unsigned char *ptr;
 848         struct sk_buff *skb2;
 849         int left, mtu, hlen, len;
 850         int offset;
 851         unsigned long flags;
 852 
 853         /*
 854          *      Point into the IP datagram header.
 855          */
 856 
 857         raw = skb->data;
 858 #if 0
 859         iph = (struct iphdr *) (raw + dev->hard_header_len);    
 860         skb->ip_hdr = iph;
 861 #else
 862         iph = skb->ip_hdr;
 863 #endif
 864 
 865         /*
 866          *      Setup starting values.
 867          */
 868 
 869         hlen = iph->ihl * 4;
 870         left = ntohs(iph->tot_len) - hlen;      /* Space per frame */
 871         hlen += dev->hard_header_len;           /* Total header size */
 872         mtu = (dev->mtu - hlen);                /* Size of data space */
 873         ptr = (raw + hlen);                     /* Where to start from */
 874 
 875         /*
 876          *      Check for any "DF" flag. [DF means do not fragment]
 877          */
 878 
 879         if (ntohs(iph->frag_off) & IP_DF)
 880         {
 881                 /*
 882                  *      Reply giving the MTU of the failed hop.
 883                  */
 884                 ip_statistics.IpFragFails++;
 885                 icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev->mtu, dev);
 886                 return;
 887         }
 888 
 889         /*
 890          *      The protocol doesn't seem to say what to do in the case that the
 891          *      frame + options doesn't fit the mtu. As it used to fall down dead
 892          *      in this case we were fortunate it didn't happen
 893          */
 894 
 895         if(mtu<8)
 896         {
 897                 /* It's wrong but it's better than nothing */
 898                 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev->mtu, dev);
 899                 ip_statistics.IpFragFails++;
 900                 return;
 901         }
 902 
 903         /*
 904          *      Fragment the datagram.
 905          */
 906 
 907         /*
 908          *      The initial offset is 0 for a complete frame. When
 909          *      fragmenting fragments it's wherever this one starts.
 910          */
 911 
 912         if (is_frag & 2)
 913                 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
 914         else
 915                 offset = 0;
 916 
 917 
 918         /*
 919          *      Keep copying data until we run out.
 920          */
 921 
 922         while(left > 0)
 923         {
 924                 len = left;
 925                 /* IF: it doesn't fit, use 'mtu' - the data space left */
 926                 if (len > mtu)
 927                         len = mtu;
 928                 /* IF: we are not sending upto and including the packet end
 929                    then align the next start on an eight byte boundary */
 930                 if (len < left)
 931                 {
 932                         len/=8;
 933                         len*=8;
 934                 }
 935                 /*
 936                  *      Allocate buffer.
 937                  */
 938 
 939                 if ((skb2 = alloc_skb(len + hlen+15,GFP_ATOMIC)) == NULL)
 940                 {
 941                         NETDEBUG(printk("IP: frag: no memory for new fragment!\n"));
 942                         ip_statistics.IpFragFails++;
 943                         return;
 944                 }
 945 
 946                 /*
 947                  *      Set up data on packet
 948                  */
 949 
 950                 skb2->arp = skb->arp;
 951                 if(skb->free==0)
 952                         printk("IP fragmenter: BUG free!=1 in fragmenter\n");
 953                 skb2->free = 1;
 954                 skb_put(skb2,len + hlen);
 955                 skb2->h.raw=(char *) skb2->data;
 956                 /*
 957                  *      Charge the memory for the fragment to any owner
 958                  *      it might possess
 959                  */
 960 
 961                 save_flags(flags);
 962                 if (sk)
 963                 {
 964                         cli();
 965                         sk->wmem_alloc += skb2->truesize;
 966                         skb2->sk=sk;
 967                 }
 968                 restore_flags(flags);
 969                 skb2->raddr = skb->raddr;       /* For rebuild_header - must be here */
 970 
 971                 /*
 972                  *      Copy the packet header into the new buffer.
 973                  */
 974 
 975                 memcpy(skb2->h.raw, raw, hlen);
 976 
 977                 /*
 978                  *      Copy a block of the IP datagram.
 979                  */
 980                 memcpy(skb2->h.raw + hlen, ptr, len);
 981                 left -= len;
 982 
 983                 skb2->h.raw+=dev->hard_header_len;
 984 
 985                 /*
 986                  *      Fill in the new header fields.
 987                  */
 988                 iph = (struct iphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
 989                 iph->frag_off = htons((offset >> 3));
 990                 /*
 991                  *      Added AC : If we are fragmenting a fragment thats not the
 992                  *                 last fragment then keep MF on each bit
 993                  */
 994                 if (left > 0 || (is_frag & 1))
 995                         iph->frag_off |= htons(IP_MF);
 996                 ptr += len;
 997                 offset += len;
 998 
 999                 /*
1000                  *      Put this fragment into the sending queue.
1001                  */
1002 
1003                 ip_statistics.IpFragCreates++;
1004 
1005                 ip_queue_xmit(sk, dev, skb2, 2);
1006         }
1007         ip_statistics.IpFragOKs++;
1008 }
1009 
1010 
1011 
1012 #ifdef CONFIG_IP_FORWARD
1013 
1014 /*
1015  *      Forward an IP datagram to its next destination.
1016  */
1017 
1018 int ip_forward(struct sk_buff *skb, struct device *dev, int is_frag, unsigned long target_addr, int target_strict)
     /* [previous][next][first][last][top][bottom][index][help] */
1019 {
1020         struct device *dev2;    /* Output device */
1021         struct iphdr *iph;      /* Our header */
1022         struct sk_buff *skb2;   /* Output packet */
1023         struct rtable *rt;      /* Route we use */
1024         unsigned char *ptr;     /* Data pointer */
1025         unsigned long raddr;    /* Router IP address */
1026 #ifdef CONFIG_IP_FIREWALL
1027         int fw_res = 0;         /* Forwarding result */ 
1028         
1029         /* 
1030          *      See if we are allowed to forward this.
1031          *      Note: demasqueraded fragments are always 'back'warded.
1032          */
1033 
1034         
1035         if(!(is_frag&4))
1036         {
1037                 fw_res=ip_fw_chk(skb->h.iph, dev, ip_fw_fwd_chain, ip_fw_fwd_policy, 0);
1038                 switch (fw_res) {
1039                 case 1:
1040 #ifdef CONFIG_IP_MASQUERADE
1041                 case 2:
1042 #endif
1043                         break;
1044                 case -1:
1045                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev);
1046                         /* fall thru */
1047                 default:
1048                         return -1;
1049                 }
1050         }
1051 #endif
1052         /*
1053          *      According to the RFC, we must first decrease the TTL field. If
1054          *      that reaches zero, we must reply an ICMP control message telling
1055          *      that the packet's lifetime expired.
1056          *
1057          *      Exception:
1058          *      We may not generate an ICMP for an ICMP. icmp_send does the
1059          *      enforcement of this so we can forget it here. It is however
1060          *      sometimes VERY important.
1061          */
1062 
1063         iph = skb->h.iph;
1064         iph->ttl--;
1065 
1066         /*
1067          *      Re-compute the IP header checksum.
1068          *      This is inefficient. We know what has happened to the header
1069          *      and could thus adjust the checksum as Phil Karn does in KA9Q
1070          */
1071 
1072         iph->check = ntohs(iph->check) + 0x0100;
1073         if ((iph->check & 0xFF00) == 0)
1074                 iph->check++;           /* carry overflow */
1075         iph->check = htons(iph->check);
1076 
1077         if (iph->ttl <= 0)
1078         {
1079                 /* Tell the sender its packet died... */
1080                 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0, dev);
1081                 return -1;
1082         }
1083 
1084         /*
1085          * OK, the packet is still valid.  Fetch its destination address,
1086          * and give it to the IP sender for further processing.
1087          */
1088 
1089         rt = ip_rt_route(target_addr, NULL, NULL);
1090         if (rt == NULL)
1091         {
1092                 /*
1093                  *      Tell the sender its packet cannot be delivered. Again
1094                  *      ICMP is screened later.
1095                  */
1096                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0, dev);
1097                 return -1;
1098         }
1099 
1100 
1101         /*
1102          * Gosh.  Not only is the packet valid; we even know how to
1103          * forward it onto its final destination.  Can we say this
1104          * is being plain lucky?
1105          * If the router told us that there is no GW, use the dest.
1106          * IP address itself- we seem to be connected directly...
1107          */
1108 
1109         raddr = rt->rt_gateway;
1110 
1111         if (raddr != 0)
1112         {
1113                 /*
1114                  *      Strict routing permits no gatewaying
1115                  */
1116                 
1117                 if(target_strict)
1118                 {
1119                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0, dev);
1120                         return -1;
1121                 }
1122         
1123                 /*
1124                  *      There is a gateway so find the correct route for it.
1125                  *      Gateways cannot in turn be gatewayed.
1126                  */
1127 
1128                 rt = ip_rt_route(raddr, NULL, NULL);
1129                 if (rt == NULL)
1130                 {
1131                         /*
1132                          *      Tell the sender its packet cannot be delivered...
1133                          */
1134                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev);
1135                         return -1;
1136                 }
1137                 if (rt->rt_gateway != 0)
1138                         raddr = rt->rt_gateway;
1139         }
1140         else
1141                 raddr = target_addr;
1142 
1143         /*
1144          *      Having picked a route we can now send the frame out.
1145          */
1146 
1147         dev2 = rt->rt_dev;
1148         
1149         /*
1150          *      In IP you never have to forward a frame on the interface that it 
1151          *      arrived upon. We now generate an ICMP HOST REDIRECT giving the route
1152          *      we calculated.
1153          */
1154 #ifndef CONFIG_IP_NO_ICMP_REDIRECT
1155         if (dev == dev2 && !((iph->saddr^iph->daddr)&dev->pa_mask) && (rt->rt_flags&RTF_MODIFIED))
1156                 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, raddr, dev);
1157 #endif          
1158 
1159         /*
1160          * We now may allocate a new buffer, and copy the datagram into it.
1161          * If the indicated interface is up and running, kick it.
1162          */
1163 
1164         if (dev2->flags & IFF_UP)
1165         {
1166 #ifdef CONFIG_IP_MASQUERADE
1167                 /*
1168                  * If this fragment needs masquerading, make it so...
1169                  * (Dont masquerade de-masqueraded fragments)
1170                  */
1171                 if (!(is_frag&4) && fw_res==2)
1172                         ip_fw_masquerade(&skb, dev2);
1173 #endif
1174                 IS_SKB(skb);
1175 
1176                 if(skb_headroom(skb)<dev2->hard_header_len)
1177                 {
1178                         skb2 = alloc_skb(dev2->hard_header_len + skb->len + 15, GFP_ATOMIC);
1179                         IS_SKB(skb2);
1180                 
1181                         /*
1182                          *      This is rare and since IP is tolerant of network failures
1183                          *      quite harmless.
1184                          */
1185                 
1186                         if (skb2 == NULL)
1187                         {
1188                                 NETDEBUG(printk("\nIP: No memory available for IP forward\n"));
1189                                 return -1;
1190                         }
1191                 
1192                         /*
1193                          *      Add the physical headers.
1194                          */
1195 
1196                         ip_send(skb2,raddr,skb->len,dev2,dev2->pa_addr);
1197 
1198                         /*
1199                          *      We have to copy the bytes over as the new header wouldn't fit
1200                          *      the old buffer. This should be very rare.
1201                          */              
1202                         
1203                         ptr = skb_put(skb2,skb->len);
1204                         skb2->free = 1;
1205                         skb2->h.raw = ptr;
1206 
1207                         /*
1208                          *      Copy the packet data into the new buffer.
1209                          */
1210                         memcpy(ptr, skb->h.raw, skb->len);
1211                 }
1212                 else
1213                 {
1214                         /* 
1215                          *      Build a new MAC header. 
1216                          */
1217 
1218                         skb2 = skb;             
1219                         skb2->dev=dev2;
1220                         skb->arp=1;
1221                         skb->raddr=raddr;
1222                         if(dev2->hard_header)
1223                         {
1224                                 if(dev2->hard_header(skb, dev2, ETH_P_IP, NULL, NULL, skb->len)<0)
1225                                         skb->arp=0;
1226                         }
1227                         ip_statistics.IpForwDatagrams++;
1228                 }
1229                 /*
1230                  *      See if it needs fragmenting. Note in ip_rcv we tagged
1231                  *      the fragment type. This must be right so that
1232                  *      the fragmenter does the right thing.
1233                  */
1234 
1235                 if(skb2->len > dev2->mtu + dev2->hard_header_len)
1236                 {
1237                         ip_fragment(NULL,skb2,dev2, is_frag);
1238                         kfree_skb(skb2,FREE_WRITE);
1239                 }
1240                 else
1241                 {
1242 #ifdef CONFIG_IP_ACCT           
1243                         /*
1244                          *      Count mapping we shortcut
1245                          */
1246                          
1247                         ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
1248 #endif                  
1249                         
1250                         /*
1251                          *      Map service types to priority. We lie about
1252                          *      throughput being low priority, but it's a good
1253                          *      choice to help improve general usage.
1254                          */
1255                         if(iph->tos & IPTOS_LOWDELAY)
1256                                 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
1257                         else if(iph->tos & IPTOS_THROUGHPUT)
1258                                 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
1259                         else
1260                                 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
1261                 }
1262         }
1263         else
1264                 return -1;
1265         
1266         /*
1267          *      Tell the caller if their buffer is free.
1268          */
1269          
1270         if(skb==skb2)
1271                 return 0;
1272         return 1;
1273 }
1274 
1275 
1276 #endif
1277 
1278 /*
1279  *      This function receives all incoming IP datagrams.
1280  *
1281  *      On entry skb->data points to the start of the IP header and
1282  *      the MAC header has been removed.
1283  */
1284 
1285 int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
     /* [previous][next][first][last][top][bottom][index][help] */
1286 {
1287         struct iphdr *iph = skb->h.iph;
1288         struct sock *raw_sk=NULL;
1289         unsigned char hash;
1290         unsigned char flag = 0;
1291         struct inet_protocol *ipprot;
1292         int brd=IS_MYADDR;
1293         unsigned long target_addr;
1294         int target_strict=0;
1295         int is_frag=0;
1296 #ifdef CONFIG_IP_FIREWALL
1297         int err;
1298 #endif  
1299 
1300         ip_statistics.IpInReceives++;
1301 
1302         /*
1303          *      Tag the ip header of this packet so we can find it
1304          */
1305 
1306         skb->ip_hdr = iph;
1307 
1308         /*
1309          *      RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
1310          *      RFC1122: 3.1.2.3 MUST discard a frame with invalid source address [NEEDS FIXING].
1311          *
1312          *      Is the datagram acceptable?
1313          *
1314          *      1.      Length at least the size of an ip header
1315          *      2.      Version of 4
1316          *      3.      Checksums correctly. [Speed optimisation for later, skip loopback checksums]
1317          *      4.      Doesn't have a bogus length
1318          *      (5.     We ought to check for IP multicast addresses and undefined types.. does this matter ?)
1319          */
1320 
1321         if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0
1322                 || skb->len < ntohs(iph->tot_len))
1323         {
1324                 ip_statistics.IpInHdrErrors++;
1325                 kfree_skb(skb, FREE_WRITE);
1326                 return(0);
1327         }
1328 
1329         /*
1330          *      Our transport medium may have padded the buffer out. Now we know it
1331          *      is IP we can trim to the true length of the frame.
1332          *      Note this now means skb->len holds ntohs(iph->tot_len).
1333          */
1334 
1335         skb_trim(skb,ntohs(iph->tot_len));
1336         
1337         /*
1338          *      See if the firewall wants to dispose of the packet. 
1339          */
1340 
1341 #ifdef  CONFIG_IP_FIREWALL
1342         
1343         if ((err=ip_fw_chk(iph,dev,ip_fw_blk_chain,ip_fw_blk_policy, 0))<1)
1344         {
1345                 if(err==-1)
1346                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev);
1347                 kfree_skb(skb, FREE_WRITE);
1348                 return 0;       
1349         }
1350 
1351 #endif
1352         
1353 
1354         /*
1355          *      Next analyse the packet for options. Studies show under one packet in
1356          *      a thousand have options....
1357          */
1358          
1359         target_addr = iph->daddr;
1360 
1361         if (iph->ihl != 5)
1362         { 
1363                 /* Humph.. options. Lots of annoying fiddly bits */
1364                 
1365                 /*
1366                  *      This is straight from the RFC. It might even be right ;)
1367                  *
1368                  *      RFC 1122: 3.2.1.8 STREAMID option is obsolete and MUST be ignored.
1369                  *      RFC 1122: 3.2.1.8 MUST NOT crash on a zero length option.
1370                  *      RFC 1122: 3.2.1.8 MUST support acting as final destination of a source route.
1371                  */
1372                  
1373                 int opt_space=4*(iph->ihl-5);
1374                 int opt_size;
1375                 unsigned char *opt_ptr=skb->h.raw+sizeof(struct iphdr);
1376         
1377                 skb->ip_summed=0;               /* Our free checksum is bogus for this case */
1378                         
1379                 while(opt_space>0)
1380                 {
1381                         if(*opt_ptr==IPOPT_NOOP)
1382                         {
1383                                 opt_ptr++;
1384                                 opt_space--;
1385                                 continue;
1386                         }
1387                         if(*opt_ptr==IPOPT_END)
1388                                 break;  /* Done */
1389                         if(opt_space<2 || (opt_size=opt_ptr[1])<2 || opt_ptr[1]>opt_space)
1390                         {
1391                                 /*
1392                                  *      RFC 1122: 3.2.2.5  SHOULD send parameter problem reports.
1393                                  */
1394                                 icmp_send(skb, ICMP_PARAMETERPROB, 0, 0, skb->dev);
1395                                 kfree_skb(skb, FREE_READ);
1396                                 return -EINVAL;
1397                         }
1398                         switch(opt_ptr[0])
1399                         {
1400                                 case IPOPT_SEC:
1401                                         /* Should we drop this ?? */
1402                                         break;
1403                                 case IPOPT_SSRR:        /* These work almost the same way */
1404                                         target_strict=1;
1405                                         /* Fall through */
1406                                 case IPOPT_LSRR:
1407 #ifdef CONFIG_IP_NOSR
1408                                         kfree_skb(skb, FREE_READ);
1409                                         return -EINVAL;
1410 #endif                                  
1411                                 case IPOPT_RR:
1412                                 /*
1413                                  *      RFC 1122: 3.2.1.8 Support for RR is OPTIONAL.
1414                                  */
1415                                         if (iph->daddr!=skb->dev->pa_addr && (brd = ip_chk_addr(iph->daddr)) == 0) 
1416                                                 break;
1417                                         if((opt_size<3) || ( opt_ptr[0]==IPOPT_RR && opt_ptr[2] > opt_size-4 ))
1418                                         {
1419                                                 if(ip_chk_addr(iph->daddr))
1420                                                         icmp_send(skb, ICMP_PARAMETERPROB, 0, 0, skb->dev);
1421                                                 kfree_skb(skb, FREE_READ);
1422                                                 return -EINVAL;
1423                                         }
1424                                         if(opt_ptr[2] > opt_size-4 )
1425                                                 break;
1426                                         /* Bytes are [IPOPT_xxRR][Length][EntryPointer][Entry0][Entry1].... */
1427                                         /* This isn't going to be too portable - FIXME */
1428                                         if(opt_ptr[0]!=IPOPT_RR)
1429                                         {
1430                                                 int t;
1431                                                 target_addr=*(u32 *)(&opt_ptr[opt_ptr[2]]);     /* Get hop */
1432                                                 t=ip_chk_addr(target_addr);
1433                                                 if(t==IS_MULTICAST||t==IS_BROADCAST)
1434                                                 {
1435                                                         if(ip_chk_addr(iph->daddr))
1436                                                                 icmp_send(skb, ICMP_PARAMETERPROB, 0, 0, skb->dev);
1437                                                         kfree_skb(skb,FREE_READ);
1438                                                         return -EINVAL;                                         
1439                                                 }
1440                                         }
1441                                         *(u32 *)(&opt_ptr[opt_ptr[2]])=skb->dev->pa_addr;       /* Record hop */
1442                                         break;
1443                                 case IPOPT_TIMESTAMP:
1444                                 /*
1445                                  *      RFC 1122: 3.2.1.8 The timestamp option is OPTIONAL but if implemented
1446                                  *      MUST meet various rules (read the spec).
1447                                  */
1448                                         NETDEBUG(printk("ICMP: Someone finish the timestamp routine ;)\n"));
1449                                         break;
1450                                 default:
1451                                         break;
1452                         }
1453                         opt_ptr+=opt_size;
1454                         opt_space-=opt_size;
1455                 }
1456                                         
1457         }
1458 
1459 
1460         /*
1461          *      Remember if the frame is fragmented.
1462          */
1463          
1464         if(iph->frag_off)
1465         {
1466                 if (iph->frag_off & htons(IP_MF))
1467                         is_frag|=1;
1468                 /*
1469                  *      Last fragment ?
1470                  */
1471         
1472                 if (iph->frag_off & htons(IP_OFFSET))
1473                         is_frag|=2;
1474         }
1475         
1476         /*
1477          *      Do any IP forwarding required.  chk_addr() is expensive -- avoid it someday.
1478          *
1479          *      This is inefficient. While finding out if it is for us we could also compute
1480          *      the routing table entry. This is where the great unified cache theory comes
1481          *      in as and when someone implements it
1482          *
1483          *      For most hosts over 99% of packets match the first conditional
1484          *      and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at
1485          *      function entry.
1486          */
1487 
1488         if ( iph->daddr == skb->dev->pa_addr || (brd = ip_chk_addr(iph->daddr)) != 0)
1489         {
1490 #ifdef CONFIG_IP_MULTICAST      
1491 
1492                 if(brd==IS_MULTICAST && iph->daddr!=IGMP_ALL_HOSTS && !(dev->flags&IFF_LOOPBACK))
1493                 {
1494                         /*
1495                          *      Check it is for one of our groups
1496                          */
1497                         struct ip_mc_list *ip_mc=dev->ip_mc_list;
1498                         do
1499                         {
1500                                 if(ip_mc==NULL)
1501                                 {       
1502                                         kfree_skb(skb, FREE_WRITE);
1503                                         return 0;
1504                                 }
1505                                 if(ip_mc->multiaddr==iph->daddr)
1506                                         break;
1507                                 ip_mc=ip_mc->next;
1508                         }
1509                         while(1);
1510                 }
1511 #endif
1512 
1513 #ifdef CONFIG_IP_MASQUERADE
1514                 /*
1515                  * Do we need to de-masquerade this fragment?
1516                  */
1517                 if (ip_fw_demasquerade(skb)) 
1518                 {
1519                         struct iphdr *iph=skb->h.iph;
1520                         if(ip_forward(skb, dev, is_frag|4, iph->daddr, 0))
1521                                 kfree_skb(skb, FREE_WRITE);
1522                         return(0);
1523                 }
1524 #endif
1525 
1526                 /*
1527                  *      Account for the packet
1528                  */
1529  
1530 #ifdef CONFIG_IP_ACCT
1531                 ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
1532 #endif  
1533 
1534                 /*
1535                  *      Reassemble IP fragments.
1536                  */
1537 
1538                 if(is_frag)
1539                 {
1540                         /* Defragment. Obtain the complete packet if there is one */
1541                         skb=ip_defrag(iph,skb,dev);
1542                         if(skb==NULL)
1543                                 return 0;
1544                         skb->dev = dev;
1545                         iph=skb->h.iph;
1546                 }
1547 
1548                 /*
1549                  *      Point into the IP datagram, just past the header.
1550                  */
1551 
1552                 skb->ip_hdr = iph;
1553                 skb->h.raw += iph->ihl*4;
1554 
1555                 /*
1556                  *      Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies.
1557                  *
1558                  *      RFC 1122: SHOULD pass TOS value up to the transport layer.
1559                  */
1560  
1561                 hash = iph->protocol & (SOCK_ARRAY_SIZE-1);
1562 
1563                 /* 
1564                  *      If there maybe a raw socket we must check - if not we don't care less 
1565                  */
1566                  
1567                 if((raw_sk=raw_prot.sock_array[hash])!=NULL)
1568                 {
1569                         struct sock *sknext=NULL;
1570                         struct sk_buff *skb1;
1571                         raw_sk=get_sock_raw(raw_sk, hash,  iph->saddr, iph->daddr);
1572                         if(raw_sk)      /* Any raw sockets */
1573                         {
1574                                 do
1575                                 {
1576                                         /* Find the next */
1577                                         sknext=get_sock_raw(raw_sk->next, hash, iph->saddr, iph->daddr);
1578                                         if(sknext)
1579                                                 skb1=skb_clone(skb, GFP_ATOMIC);
1580                                         else
1581                                                 break;  /* One pending raw socket left */
1582                                         if(skb1)
1583                                                 raw_rcv(raw_sk, skb1, dev, iph->saddr,iph->daddr);
1584                                         raw_sk=sknext;
1585                                 }
1586                                 while(raw_sk!=NULL);
1587                                 
1588                                 /*
1589                                  *      Here either raw_sk is the last raw socket, or NULL if none 
1590                                  */
1591                                  
1592                                 /*
1593                                  *      We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy 
1594                                  */
1595                         }
1596                 }
1597         
1598                 /*
1599                  *      skb->h.raw now points at the protocol beyond the IP header.
1600                  */
1601         
1602                 hash = iph->protocol & (MAX_INET_PROTOS -1);
1603                 for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next)
1604                 {
1605                         struct sk_buff *skb2;
1606         
1607                         if (ipprot->protocol != iph->protocol)
1608                                 continue;
1609                        /*
1610                         *       See if we need to make a copy of it.  This will
1611                         *       only be set if more than one protocol wants it.
1612                         *       and then not for the last one. If there is a pending
1613                         *       raw delivery wait for that
1614                         */
1615         
1616                         if (ipprot->copy || raw_sk)
1617                         {
1618                                 skb2 = skb_clone(skb, GFP_ATOMIC);
1619                                 if(skb2==NULL)
1620                                         continue;
1621                         }
1622                         else
1623                         {
1624                                 skb2 = skb;
1625                         }
1626                         flag = 1;
1627 
1628                        /*
1629                         *       Pass on the datagram to each protocol that wants it,
1630                         *       based on the datagram protocol.  We should really
1631                         *       check the protocol handler's return values here...
1632                         */
1633 
1634                         ipprot->handler(skb2, dev, NULL, iph->daddr,
1635                                 (ntohs(iph->tot_len) - (iph->ihl * 4)),
1636                                 iph->saddr, 0, ipprot);
1637 
1638                 }
1639 
1640                 /*
1641                  *      All protocols checked.
1642                  *      If this packet was a broadcast, we may *not* reply to it, since that
1643                  *      causes (proven, grin) ARP storms and a leakage of memory (i.e. all
1644                  *      ICMP reply messages get queued up for transmission...)
1645                  */
1646 
1647                 if(raw_sk!=NULL)        /* Shift to last raw user */
1648                         raw_rcv(raw_sk, skb, dev, iph->saddr, iph->daddr);
1649                 else if (!flag)         /* Free and report errors */
1650                 {
1651                         if (brd != IS_BROADCAST && brd!=IS_MULTICAST)
1652                                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0, dev);   
1653                         kfree_skb(skb, FREE_WRITE);
1654                 }
1655 
1656                 return(0);
1657         }
1658 
1659         /*
1660          *      Do any IP forwarding required.
1661          */
1662         
1663         /*
1664          *      Don't forward multicast or broadcast frames.
1665          */
1666 
1667         if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST)
1668         {
1669                 kfree_skb(skb,FREE_WRITE);
1670                 return 0;
1671         }
1672 
1673         /*
1674          *      The packet is for another target. Forward the frame
1675          */
1676 
1677 #ifdef CONFIG_IP_FORWARD
1678         if(ip_forward(skb, dev, is_frag, target_addr, target_strict))
1679                 kfree_skb(skb, FREE_WRITE);
1680 #else
1681 /*      printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n",
1682                         iph->saddr,iph->daddr);*/
1683         ip_statistics.IpInAddrErrors++;
1684         kfree_skb(skb, FREE_WRITE);
1685 #endif
1686         return(0);
1687 }
1688         
1689 
1690 /*
1691  *      Loop a packet back to the sender.
1692  */
1693  
1694 static void ip_loopback(struct device *old_dev, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
1695 {
1696         extern struct device loopback_dev;
1697         struct device *dev=&loopback_dev;
1698         int len=skb->len-old_dev->hard_header_len;
1699         struct sk_buff *newskb=dev_alloc_skb(len+dev->hard_header_len+15);
1700         
1701         if(newskb==NULL)
1702                 return;
1703                 
1704         newskb->link3=NULL;
1705         newskb->sk=NULL;
1706         newskb->dev=dev;
1707         newskb->saddr=skb->saddr;
1708         newskb->daddr=skb->daddr;
1709         newskb->raddr=skb->raddr;
1710         newskb->free=1;
1711         newskb->lock=0;
1712         newskb->users=0;
1713         newskb->pkt_type=skb->pkt_type;
1714         
1715         /*
1716          *      Put a MAC header on the packet
1717          */
1718         ip_send(newskb, skb->ip_hdr->daddr, len, dev, skb->ip_hdr->saddr);
1719         /*
1720          *      Add the rest of the data space. 
1721          */
1722         newskb->ip_hdr=(struct iphdr *)skb_put(newskb, len);
1723         /*
1724          *      Copy the data
1725          */
1726         memcpy(newskb->ip_hdr,skb->ip_hdr,len);
1727 
1728         /* Recurse. The device check against IFF_LOOPBACK will stop infinite recursion */
1729                 
1730         /*printk("Loopback output queued [%lX to %lX].\n", newskb->ip_hdr->saddr,newskb->ip_hdr->daddr);*/
1731         ip_queue_xmit(NULL, dev, newskb, 1);
1732 }
1733 
1734 
1735 /*
1736  * Queues a packet to be sent, and starts the transmitter
1737  * if necessary.  if free = 1 then we free the block after
1738  * transmit, otherwise we don't. If free==2 we not only
1739  * free the block but also don't assign a new ip seq number.
1740  * This routine also needs to put in the total length,
1741  * and compute the checksum
1742  */
1743 
1744 void ip_queue_xmit(struct sock *sk, struct device *dev,
     /* [previous][next][first][last][top][bottom][index][help] */
1745               struct sk_buff *skb, int free)
1746 {
1747         struct iphdr *iph;
1748 /*      unsigned char *ptr;*/
1749 
1750         /* Sanity check */
1751         if (dev == NULL)
1752         {
1753                 NETDEBUG(printk("IP: ip_queue_xmit dev = NULL\n"));
1754                 return;
1755         }
1756 
1757         IS_SKB(skb);
1758 
1759         /*
1760          *      Do some book-keeping in the packet for later
1761          */
1762 
1763 
1764         skb->dev = dev;
1765         skb->when = jiffies;
1766 
1767         /*
1768          *      Find the IP header and set the length. This is bad
1769          *      but once we get the skb data handling code in the
1770          *      hardware will push its header sensibly and we will
1771          *      set skb->ip_hdr to avoid this mess and the fixed
1772          *      header length problem
1773          */
1774 
1775 #if 0
1776         ptr = skb->data;
1777         ptr += dev->hard_header_len;
1778         iph = (struct iphdr *)ptr;      
1779         skb->ip_hdr = iph;
1780 #else
1781         iph = skb->ip_hdr;
1782 #endif
1783         iph->tot_len = ntohs(skb->len-(((unsigned char *)iph)-skb->data));
1784 
1785 #ifdef CONFIG_IP_FIREWALL
1786         if(ip_fw_chk(iph, dev, ip_fw_blk_chain, ip_fw_blk_policy, 0) != 1)
1787                 /* just don't send this packet */
1788                 return;
1789 #endif  
1790 
1791         /*
1792          *      No reassigning numbers to fragments...
1793          */
1794 
1795         if(free!=2)
1796                 iph->id      = htons(ip_id_count++);
1797         else
1798                 free=1;
1799 
1800         /* All buffers without an owner socket get freed */
1801         if (sk == NULL)
1802                 free = 1;
1803 
1804         skb->free = free;
1805 
1806         /*
1807          *      Do we need to fragment. Again this is inefficient.
1808          *      We need to somehow lock the original buffer and use
1809          *      bits of it.
1810          */
1811 
1812         if(ntohs(iph->tot_len)> dev->mtu)
1813         {
1814                 ip_fragment(sk,skb,dev,0);
1815                 IS_SKB(skb);
1816                 kfree_skb(skb,FREE_WRITE);
1817                 return;
1818         }
1819 
1820         /*
1821          *      Add an IP checksum
1822          */
1823 
1824         ip_send_check(iph);
1825 
1826         /*
1827          *      Print the frame when debugging
1828          */
1829 
1830         /*
1831          *      More debugging. You cannot queue a packet already on a list
1832          *      Spot this and moan loudly.
1833          */
1834         if (skb->next != NULL)
1835         {
1836                 NETDEBUG(printk("ip_queue_xmit: next != NULL\n"));
1837                 skb_unlink(skb);
1838         }
1839 
1840         /*
1841          *      If a sender wishes the packet to remain unfreed
1842          *      we add it to his send queue. This arguably belongs
1843          *      in the TCP level since nobody else uses it. BUT
1844          *      remember IPng might change all the rules.
1845          */
1846 
1847         if (!free)
1848         {
1849                 unsigned long flags;
1850                 /* The socket now has more outstanding blocks */
1851 
1852                 sk->packets_out++;
1853 
1854                 /* Protect the list for a moment */
1855                 save_flags(flags);
1856                 cli();
1857 
1858                 if (skb->link3 != NULL)
1859                 {
1860                         NETDEBUG(printk("ip.c: link3 != NULL\n"));
1861                         skb->link3 = NULL;
1862                 }
1863                 if (sk->send_head == NULL)
1864                 {
1865                         sk->send_tail = skb;
1866                         sk->send_head = skb;
1867                 }
1868                 else
1869                 {
1870                         sk->send_tail->link3 = skb;
1871                         sk->send_tail = skb;
1872                 }
1873                 /* skb->link3 is NULL */
1874 
1875                 /* Interrupt restore */
1876                 restore_flags(flags);
1877         }
1878         else
1879                 /* Remember who owns the buffer */
1880                 skb->sk = sk;
1881 
1882         /*
1883          *      If the indicated interface is up and running, send the packet.
1884          */
1885          
1886         ip_statistics.IpOutRequests++;
1887 #ifdef CONFIG_IP_ACCT
1888         ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
1889 #endif  
1890         
1891 #ifdef CONFIG_IP_MULTICAST      
1892 
1893         /*
1894          *      Multicasts are looped back for other local users
1895          */
1896          
1897         if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK))
1898         {
1899                 if(sk==NULL || sk->ip_mc_loop)
1900                 {
1901                         if(iph->daddr==IGMP_ALL_HOSTS)
1902                                 ip_loopback(dev,skb);
1903                         else
1904                         {
1905                                 struct ip_mc_list *imc=dev->ip_mc_list;
1906                                 while(imc!=NULL)
1907                                 {
1908                                         if(imc->multiaddr==iph->daddr)
1909                                         {
1910                                                 ip_loopback(dev,skb);
1911                                                 break;
1912                                         }
1913                                         imc=imc->next;
1914                                 }
1915                         }
1916                 }
1917                 /* Multicasts with ttl 0 must not go beyond the host */
1918                 
1919                 if(skb->ip_hdr->ttl==0)
1920                 {
1921                         kfree_skb(skb, FREE_READ);
1922                         return;
1923                 }
1924         }
1925 #endif
1926         if((dev->flags&IFF_BROADCAST) && (iph->daddr==dev->pa_brdaddr||iph->daddr==0xFFFFFFFF) && !(dev->flags&IFF_LOOPBACK))
1927                 ip_loopback(dev,skb);
1928                 
1929         if (dev->flags & IFF_UP)
1930         {
1931                 /*
1932                  *      If we have an owner use its priority setting,
1933                  *      otherwise use NORMAL
1934                  */
1935 
1936                 if (sk != NULL)
1937                 {
1938                         dev_queue_xmit(skb, dev, sk->priority);
1939                 }
1940                 else
1941                 {
1942                         dev_queue_xmit(skb, dev, SOPRI_NORMAL);
1943                 }
1944         }
1945         else
1946         {
1947                 ip_statistics.IpOutDiscards++;
1948                 if (free)
1949                         kfree_skb(skb, FREE_WRITE);
1950         }
1951 }
1952 
1953 
1954 
1955 #ifdef CONFIG_IP_MULTICAST
1956 
1957 /*
1958  *      Write an multicast group list table for the IGMP daemon to
1959  *      read.
1960  */
1961  
1962 int ip_mc_procinfo(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
1963 {
1964         off_t pos=0, begin=0;
1965         struct ip_mc_list *im;
1966         unsigned long flags;
1967         int len=0;
1968         struct device *dev;
1969         
1970         len=sprintf(buffer,"Device    : Count\tGroup    Users Timer\n");  
1971         save_flags(flags);
1972         cli();
1973         
1974         for(dev = dev_base; dev; dev = dev->next)
1975         {
1976                 if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST))
1977                 {
1978                         len+=sprintf(buffer+len,"%-10s: %5d\n",
1979                                         dev->name, dev->mc_count);
1980                         for(im = dev->ip_mc_list; im; im = im->next)
1981                         {
1982                                 len+=sprintf(buffer+len,
1983                                         "\t\t\t%08lX %5d %d:%08lX\n",
1984                                         im->multiaddr, im->users,
1985                                         im->tm_running, im->timer.expires-jiffies);
1986                                 pos=begin+len;
1987                                 if(pos<offset)
1988                                 {
1989                                         len=0;
1990                                         begin=pos;
1991                                 }
1992                                 if(pos>offset+length)
1993                                         break;
1994                         }
1995                 }
1996         }
1997         restore_flags(flags);
1998         *start=buffer+(offset-begin);
1999         len-=(offset-begin);
2000         if(len>length)
2001                 len=length;     
2002         return len;
2003 }
2004 
2005 
2006 /*
2007  *      Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
2008  *      an IP socket.
2009  *
2010  *      We implement IP_TOS (type of service), IP_TTL (time to live).
2011  *
2012  *      Next release we will sort out IP_OPTIONS since for some people are kind of important.
2013  */
2014 
2015 static struct device *ip_mc_find_devfor(unsigned long addr)
     /* [previous][next][first][last][top][bottom][index][help] */
2016 {
2017         struct device *dev;
2018         for(dev = dev_base; dev; dev = dev->next)
2019         {
2020                 if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST)&&
2021                         (dev->pa_addr==addr))
2022                         return dev;
2023         }
2024 
2025         return NULL;
2026 }
2027 
2028 #endif
2029 
2030 int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
2031 {
2032         int val,err;
2033         unsigned char ucval;
2034 #if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT)
2035         struct ip_fw tmp_fw;
2036 #endif  
2037         if (optval == NULL)
2038                 return(-EINVAL);
2039 
2040         err=verify_area(VERIFY_READ, optval, sizeof(int));
2041         if(err)
2042                 return err;
2043 
2044         val = get_user((int *) optval);
2045         ucval=get_user((unsigned char *) optval);
2046 
2047         if(level!=SOL_IP)
2048                 return -EOPNOTSUPP;
2049 
2050         switch(optname)
2051         {
2052                 case IP_TOS:
2053                         if(val<0||val>255)
2054                                 return -EINVAL;
2055                         sk->ip_tos=val;
2056                         if(val==IPTOS_LOWDELAY)
2057                                 sk->priority=SOPRI_INTERACTIVE;
2058                         if(val==IPTOS_THROUGHPUT)
2059                                 sk->priority=SOPRI_BACKGROUND;
2060                         return 0;
2061                 case IP_TTL:
2062                         if(val<1||val>255)
2063                                 return -EINVAL;
2064                         sk->ip_ttl=val;
2065                         return 0;
2066 #ifdef CONFIG_IP_MULTICAST
2067                 case IP_MULTICAST_TTL: 
2068                 {
2069                         sk->ip_mc_ttl=(int)ucval;
2070                         return 0;
2071                 }
2072                 case IP_MULTICAST_LOOP: 
2073                 {
2074                         if(ucval!=0 && ucval!=1)
2075                                  return -EINVAL;
2076                         sk->ip_mc_loop=(int)ucval;
2077                         return 0;
2078                 }
2079                 case IP_MULTICAST_IF: 
2080                 {
2081                         struct in_addr addr;
2082                         struct device *dev=NULL;
2083                         
2084                         /*
2085                          *      Check the arguments are allowable
2086                          */
2087 
2088                         err=verify_area(VERIFY_READ, optval, sizeof(addr));
2089                         if(err)
2090                                 return err;
2091                                 
2092                         memcpy_fromfs(&addr,optval,sizeof(addr));
2093                         
2094                         
2095                         /*
2096                          *      What address has been requested
2097                          */
2098                         
2099                         if(addr.s_addr==INADDR_ANY)     /* Default */
2100                         {
2101                                 sk->ip_mc_name[0]=0;
2102                                 return 0;
2103                         }
2104                         
2105                         /*
2106                          *      Find the device
2107                          */
2108                          
2109                         dev=ip_mc_find_devfor(addr.s_addr);
2110                                                 
2111                         /*
2112                          *      Did we find one
2113                          */
2114                          
2115                         if(dev) 
2116                         {
2117                                 strcpy(sk->ip_mc_name,dev->name);
2118                                 return 0;
2119                         }
2120                         return -EADDRNOTAVAIL;
2121                 }
2122                 
2123                 case IP_ADD_MEMBERSHIP: 
2124                 {
2125                 
2126 /*
2127  *      FIXME: Add/Del membership should have a semaphore protecting them from re-entry
2128  */
2129                         struct ip_mreq mreq;
2130                         unsigned long route_src;
2131                         struct rtable *rt;
2132                         struct device *dev=NULL;
2133                         
2134                         /*
2135                          *      Check the arguments.
2136                          */
2137 
2138                         err=verify_area(VERIFY_READ, optval, sizeof(mreq));
2139                         if(err)
2140                                 return err;
2141 
2142                         memcpy_fromfs(&mreq,optval,sizeof(mreq));
2143 
2144                         /* 
2145                          *      Get device for use later
2146                          */
2147 
2148                         if(mreq.imr_interface.s_addr==INADDR_ANY) 
2149                         {
2150                                 /*
2151                                  *      Not set so scan.
2152                                  */
2153                                 if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,NULL, &route_src))!=NULL)
2154                                 {
2155                                         dev=rt->rt_dev;
2156                                         rt->rt_use--;
2157                                 }
2158                         }
2159                         else
2160                         {
2161                                 /*
2162                                  *      Find a suitable device.
2163                                  */
2164                                 
2165                                 dev=ip_mc_find_devfor(mreq.imr_interface.s_addr);
2166                         }
2167                         
2168                         /*
2169                          *      No device, no cookies.
2170                          */
2171                          
2172                         if(!dev)
2173                                 return -ENODEV;
2174                                 
2175                         /*
2176                          *      Join group.
2177                          */
2178                          
2179                         return ip_mc_join_group(sk,dev,mreq.imr_multiaddr.s_addr);
2180                 }
2181                 
2182                 case IP_DROP_MEMBERSHIP: 
2183                 {
2184                         struct ip_mreq mreq;
2185                         struct rtable *rt;
2186                         unsigned long route_src;
2187                         struct device *dev=NULL;
2188 
2189                         /*
2190                          *      Check the arguments
2191                          */
2192                          
2193                         err=verify_area(VERIFY_READ, optval, sizeof(mreq));
2194                         if(err)
2195                                 return err;
2196 
2197                         memcpy_fromfs(&mreq,optval,sizeof(mreq));
2198 
2199                         /*
2200                          *      Get device for use later 
2201                          */
2202  
2203                         if(mreq.imr_interface.s_addr==INADDR_ANY) 
2204                         {
2205                                 if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,NULL, &route_src))!=NULL)
2206                                 {
2207                                         dev=rt->rt_dev;
2208                                         rt->rt_use--;
2209                                 }
2210                         }
2211                         else 
2212                         {
2213                         
2214                                 dev=ip_mc_find_devfor(mreq.imr_interface.s_addr);
2215                         }
2216                         
2217                         /*
2218                          *      Did we find a suitable device.
2219                          */
2220                          
2221                         if(!dev)
2222                                 return -ENODEV;
2223                                 
2224                         /*
2225                          *      Leave group
2226                          */
2227                          
2228                         return ip_mc_leave_group(sk,dev,mreq.imr_multiaddr.s_addr);
2229                 }
2230 #endif                  
2231 #ifdef CONFIG_IP_FIREWALL
2232                 case IP_FW_ADD_BLK:
2233                 case IP_FW_DEL_BLK:
2234                 case IP_FW_ADD_FWD:
2235                 case IP_FW_DEL_FWD:
2236                 case IP_FW_CHK_BLK:
2237                 case IP_FW_CHK_FWD:
2238                 case IP_FW_FLUSH_BLK:
2239                 case IP_FW_FLUSH_FWD:
2240                 case IP_FW_ZERO_BLK:
2241                 case IP_FW_ZERO_FWD:
2242                 case IP_FW_POLICY_BLK:
2243                 case IP_FW_POLICY_FWD:
2244                         if(!suser())
2245                                 return -EPERM;
2246                         if(optlen>sizeof(tmp_fw) || optlen<1)
2247                                 return -EINVAL;
2248                         err=verify_area(VERIFY_READ,optval,optlen);
2249                         if(err)
2250                                 return err;
2251                         memcpy_fromfs(&tmp_fw,optval,optlen);
2252                         err=ip_fw_ctl(optname, &tmp_fw,optlen);
2253                         return -err;    /* -0 is 0 after all */
2254                         
2255 #endif
2256 #ifdef CONFIG_IP_ACCT
2257                 case IP_ACCT_DEL:
2258                 case IP_ACCT_ADD:
2259                 case IP_ACCT_FLUSH:
2260                 case IP_ACCT_ZERO:
2261                         if(!suser())
2262                                 return -EPERM;
2263                         if(optlen>sizeof(tmp_fw) || optlen<1)
2264                                 return -EINVAL;
2265                         err=verify_area(VERIFY_READ,optval,optlen);
2266                         if(err)
2267                                 return err;
2268                         memcpy_fromfs(&tmp_fw, optval,optlen);
2269                         err=ip_acct_ctl(optname, &tmp_fw,optlen);
2270                         return -err;    /* -0 is 0 after all */
2271 #endif
2272                 /* IP_OPTIONS and friends go here eventually */
2273                 default:
2274                         return(-ENOPROTOOPT);
2275         }
2276 }
2277 
2278 /*
2279  *      Get the options. Note for future reference. The GET of IP options gets the
2280  *      _received_ ones. The set sets the _sent_ ones.
2281  */
2282 
2283 int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
2284 {
2285         int val,err;
2286 #ifdef CONFIG_IP_MULTICAST
2287         int len;
2288 #endif
2289         
2290         if(level!=SOL_IP)
2291                 return -EOPNOTSUPP;
2292 
2293         switch(optname)
2294         {
2295                 case IP_TOS:
2296                         val=sk->ip_tos;
2297                         break;
2298                 case IP_TTL:
2299                         val=sk->ip_ttl;
2300                         break;
2301 #ifdef CONFIG_IP_MULTICAST                      
2302                 case IP_MULTICAST_TTL:
2303                         val=sk->ip_mc_ttl;
2304                         break;
2305                 case IP_MULTICAST_LOOP:
2306                         val=sk->ip_mc_loop;
2307                         break;
2308                 case IP_MULTICAST_IF:
2309                         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
2310                         if(err)
2311                                 return err;
2312                         len=strlen(sk->ip_mc_name);
2313                         err=verify_area(VERIFY_WRITE, optval, len);
2314                         if(err)
2315                                 return err;
2316                         put_user(len,(int *) optlen);
2317                         memcpy_tofs((void *)optval,sk->ip_mc_name, len);
2318                         return 0;
2319 #endif
2320                 default:
2321                         return(-ENOPROTOOPT);
2322         }
2323         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
2324         if(err)
2325                 return err;
2326         put_user(sizeof(int),(int *) optlen);
2327 
2328         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
2329         if(err)
2330                 return err;
2331         put_user(val,(int *) optval);
2332 
2333         return(0);
2334 }
2335 
2336 /*
2337  *      Build and send a packet, with as little as one copy
2338  *
2339  *      Doesn't care much about ip options... option length can be
2340  *      different for fragment at 0 and other fragments.
2341  *
2342  *      Note that the fragment at the highest offset is sent first,
2343  *      so the getfrag routine can fill in the TCP/UDP checksum header
2344  *      field in the last fragment it sends... actually it also helps
2345  *      the reassemblers, they can put most packets in at the head of
2346  *      the fragment queue, and they know the total size in advance. This
2347  *      last feature will measurable improve the Linux fragment handler.
2348  *
2349  *      The callback has five args, an arbitrary pointer (copy of frag),
2350  *      the source IP address (may depend on the routing table), the 
2351  *      destination adddress (char *), the offset to copy from, and the
2352  *      length to be copied.
2353  * 
2354  */
2355 
2356 int ip_build_xmit(struct sock *sk,
     /* [previous][next][first][last][top][bottom][index][help] */
2357                    void getfrag (const void *,
2358                                  int,
2359                                  char *,
2360                                  unsigned int,
2361                                  unsigned int),
2362                    const void *frag,
2363                    unsigned short int length,
2364                    int daddr,
2365                    int flags,
2366                    int type) 
2367 {
2368         struct rtable *rt;
2369         unsigned int fraglen, maxfraglen, fragheaderlen;
2370         int offset, mf;
2371         unsigned long saddr;
2372         unsigned short id;
2373         struct iphdr *iph;
2374         int local=0;
2375         struct device *dev;
2376         int nfrags=0;
2377         
2378         ip_statistics.IpOutRequests++;
2379 
2380 
2381 #ifdef CONFIG_IP_MULTICAST      
2382         if(sk && MULTICAST(daddr) && *sk->ip_mc_name)
2383         {
2384                 dev=dev_get(sk->ip_mc_name);
2385                 if(!dev)
2386                         return -ENODEV;
2387                 rt=NULL;
2388                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
2389                         saddr = sk->saddr;
2390                 else
2391                         saddr = dev->pa_addr;
2392         }
2393         else
2394         {
2395 #endif  
2396                 /*
2397                  *      Perform the IP routing decisions
2398                  */
2399          
2400                 if(sk->localroute || flags&MSG_DONTROUTE)
2401                         local=1;
2402         
2403                 rt = sk->ip_route_cache;
2404                 
2405                 /*
2406                  *      See if the routing cache is outdated. We need to clean this up once we are happy it is reliable
2407                  *      by doing the invalidation actively in the route change and header change.
2408                  */
2409         
2410                 saddr=sk->ip_route_saddr;        
2411                 if(!rt || sk->ip_route_stamp != rt_stamp || daddr!=sk->ip_route_daddr || sk->ip_route_local!=local || sk->saddr!=sk->ip_route_saddr)
2412                 {
2413                         if(local)
2414                                 rt = ip_rt_local(daddr, NULL, &saddr);
2415                         else
2416                                 rt = ip_rt_route(daddr, NULL, &saddr);
2417                         sk->ip_route_local=local;
2418                         sk->ip_route_daddr=daddr;
2419                         sk->ip_route_saddr=saddr;
2420                         sk->ip_route_stamp=rt_stamp;
2421                         sk->ip_route_cache=rt;
2422                         sk->ip_hcache_ver=NULL;
2423                         sk->ip_hcache_state= 0;
2424                 }
2425                 else if(rt)
2426                 {
2427                         /*
2428                          *      Attempt header caches only if the cached route is being reused. Header cache
2429                          *      is not ultra cheap to set up. This means we only set it up on the second packet,
2430                          *      so one shot communications are not slowed. We assume (seems reasonable) that 2 is
2431                          *      probably going to be a stream of data.
2432                          */
2433                         if(rt->rt_dev->header_cache && sk->ip_hcache_state!= -1)
2434                         {
2435                                 if(sk->ip_hcache_ver==NULL || sk->ip_hcache_stamp!=*sk->ip_hcache_ver)
2436                                         rt->rt_dev->header_cache(rt->rt_dev,sk,saddr,daddr);
2437                                 else
2438                                         /* Can't cache. Remember this */
2439                                         sk->ip_hcache_state= -1;
2440                         }
2441                 }
2442                 
2443                 if (rt == NULL) 
2444                 {
2445                         ip_statistics.IpOutNoRoutes++;
2446                         return(-ENETUNREACH);
2447                 }
2448         
2449                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
2450                         saddr = sk->saddr;
2451                         
2452                 dev=rt->rt_dev;
2453 #ifdef CONFIG_IP_MULTICAST
2454         }
2455 #endif          
2456 
2457         /*
2458          *      Now compute the buffer space we require
2459          */ 
2460          
2461         /*
2462          *      Try the simple case first. This leaves broadcast, multicast, fragmented frames, and by
2463          *      choice RAW frames within 20 bytes of maximum size(rare) to the long path
2464          */
2465          
2466         if(length+20 <= dev->mtu && !MULTICAST(daddr) && daddr!=0xFFFFFFFF && daddr!=dev->pa_brdaddr)
2467         {       
2468                 int error;
2469                 struct sk_buff *skb=sock_alloc_send_skb(sk, length+20+15+dev->hard_header_len,0,&error);
2470                 if(skb==NULL)
2471                 {
2472                         ip_statistics.IpOutDiscards++;
2473                         return error;
2474                 }
2475                 skb->dev=dev;
2476                 skb->free=1;
2477                 skb->when=jiffies;
2478                 skb->sk=sk;
2479                 skb->arp=0;
2480                 skb->saddr=saddr;
2481                 length+=20;     /* We do this twice so the subtract once is quicker */
2482                 skb->raddr=(rt&&rt->rt_gateway)?rt->rt_gateway:daddr;
2483                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
2484                 if(sk->ip_hcache_state>0)
2485                 {
2486                         memcpy(skb_push(skb,dev->hard_header_len),sk->ip_hcache_data,dev->hard_header_len);
2487                         skb->arp=1;
2488                 }
2489                 else if(dev->hard_header)
2490                 {
2491                         if(dev->hard_header(skb,dev,ETH_P_IP,NULL,NULL,0)>0)
2492                                 skb->arp=1;
2493                 }
2494                 skb->ip_hdr=iph=(struct iphdr *)skb_put(skb,length);
2495                 if(type!=IPPROTO_RAW)
2496                 {
2497                         iph->version=4;
2498                         iph->ihl=5;
2499                         iph->tos=sk->ip_tos;
2500                         iph->tot_len = htons(length);
2501                         iph->id=htons(ip_id_count++);
2502                         iph->frag_off = 0;
2503                         iph->ttl=sk->ip_ttl;
2504                         iph->protocol=type;
2505                         iph->saddr=saddr;
2506                         iph->daddr=daddr;
2507                         iph->check=0;
2508                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
2509                         getfrag(frag,saddr,(void *)(iph+1),0, length-20);
2510                 }
2511                 else
2512                         getfrag(frag,saddr,(void *)iph,0,length);
2513 #ifdef CONFIG_IP_ACCT
2514                 ip_fw_chk((void *)skb->data,dev,ip_acct_chain, IP_FW_F_ACCEPT,1);
2515 #endif          
2516                 if(dev->flags&IFF_UP)
2517                         dev_queue_xmit(skb,dev,sk->priority);
2518                 else
2519                 {
2520                         ip_statistics.IpOutDiscards++;
2521                         kfree_skb(skb, FREE_WRITE);
2522                 }
2523                 return 0;
2524         }
2525                         
2526                         
2527         fragheaderlen = dev->hard_header_len;
2528         if(type != IPPROTO_RAW)
2529                 fragheaderlen += 20;
2530                 
2531         /*
2532          *      Fragheaderlen is the size of 'overhead' on each buffer. Now work
2533          *      out the size of the frames to send.
2534          */
2535          
2536         maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen;
2537         
2538         /*
2539          *      Start at the end of the frame by handling the remainder.
2540          */
2541          
2542         offset = length - (length % (maxfraglen - fragheaderlen));
2543         
2544         /*
2545          *      Amount of memory to allocate for final fragment.
2546          */
2547          
2548         fraglen = length - offset + fragheaderlen;
2549         
2550         if(fraglen==0)
2551         {
2552                 fraglen = maxfraglen;
2553                 offset -= maxfraglen-fragheaderlen;
2554         }
2555         
2556         
2557         /*
2558          *      The last fragment will not have MF (more fragments) set.
2559          */
2560          
2561         mf = 0;
2562 
2563         /*
2564          *      Can't fragment raw packets 
2565          */
2566          
2567         if (type == IPPROTO_RAW && offset > 0)
2568                 return(-EMSGSIZE);
2569 
2570         /*
2571          *      Get an identifier
2572          */
2573          
2574         id = htons(ip_id_count++);
2575 
2576         /*
2577          *      Being outputting the bytes.
2578          */
2579          
2580         do 
2581         {
2582                 struct sk_buff * skb;
2583                 int error;
2584                 char *data;
2585 
2586                 /*
2587                  *      Get the memory we require with some space left for alignment.
2588                  */
2589 
2590                 skb = sock_alloc_send_skb(sk, fraglen+15, 0, &error);
2591                 if (skb == NULL)
2592                 {
2593                         ip_statistics.IpOutDiscards++;
2594                         if(nfrags>1)
2595                                 ip_statistics.IpFragCreates++;                  
2596                         return(error);
2597                 }
2598                 
2599                 /*
2600                  *      Fill in the control structures
2601                  */
2602                  
2603                 skb->next = skb->prev = NULL;
2604                 skb->dev = dev;
2605                 skb->when = jiffies;
2606                 skb->free = 1; /* dubious, this one */
2607                 skb->sk = sk;
2608                 skb->arp = 0;
2609                 skb->saddr = saddr;
2610                 skb->raddr = (rt&&rt->rt_gateway) ? rt->rt_gateway : daddr;
2611                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
2612                 data = skb_put(skb, fraglen-dev->hard_header_len);
2613 
2614                 /*
2615                  *      Save us ARP and stuff. In the optimal case we do no route lookup (route cache ok)
2616                  *      no ARP lookup (arp cache ok) and output. The cache checks are still too slow but
2617                  *      this can be fixed later. For gateway routes we ought to have a rt->.. header cache
2618                  *      pointer to speed header cache builds for identical targets.
2619                  */
2620                  
2621                 if(sk->ip_hcache_state>0)
2622                 {
2623                         memcpy(skb_push(skb,dev->hard_header_len),sk->ip_hcache_data, dev->hard_header_len);
2624                         skb->arp=1;
2625                 }
2626                 else if (dev->hard_header)
2627                 {
2628                         if(dev->hard_header(skb, dev, ETH_P_IP, 
2629                                                 NULL, NULL, 0)>0)
2630                                 skb->arp=1;
2631                 }
2632                 
2633                 /*
2634                  *      Find where to start putting bytes.
2635                  */
2636                  
2637                 skb->ip_hdr = iph = (struct iphdr *)data;
2638 
2639                 /*
2640                  *      Only write IP header onto non-raw packets 
2641                  */
2642                  
2643                 if(type != IPPROTO_RAW) 
2644                 {
2645 
2646                         iph->version = 4;
2647                         iph->ihl = 5; /* ugh */
2648                         iph->tos = sk->ip_tos;
2649                         iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
2650                         iph->id = id;
2651                         iph->frag_off = htons(offset>>3);
2652                         iph->frag_off |= mf;
2653 #ifdef CONFIG_IP_MULTICAST
2654                         if (MULTICAST(daddr))
2655                                 iph->ttl = sk->ip_mc_ttl;
2656                         else
2657 #endif
2658                                 iph->ttl = sk->ip_ttl;
2659                         iph->protocol = type;
2660                         iph->check = 0;
2661                         iph->saddr = saddr;
2662                         iph->daddr = daddr;
2663                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
2664                         data += iph->ihl*4;
2665                         
2666                         /*
2667                          *      Any further fragments will have MF set.
2668                          */
2669                          
2670                         mf = htons(IP_MF);
2671                 }
2672                 
2673                 /*
2674                  *      User data callback
2675                  */
2676 
2677                 getfrag(frag, saddr, data, offset, fraglen-fragheaderlen);
2678                 
2679                 /*
2680                  *      Account for the fragment.
2681                  */
2682                  
2683 #ifdef CONFIG_IP_ACCT
2684                 if(!offset)
2685                         ip_fw_chk(iph, dev, ip_acct_chain, IP_FW_F_ACCEPT, 1);
2686 #endif  
2687                 offset -= (maxfraglen-fragheaderlen);
2688                 fraglen = maxfraglen;
2689 
2690 #ifdef CONFIG_IP_MULTICAST
2691 
2692                 /*
2693                  *      Multicasts are looped back for other local users
2694                  */
2695          
2696                 if (MULTICAST(daddr) && !(dev->flags&IFF_LOOPBACK)) 
2697                 {
2698                         /*
2699                          *      Loop back any frames. The check for IGMP_ALL_HOSTS is because
2700                          *      you are always magically a member of this group.
2701                          */
2702                          
2703                         if(sk==NULL || sk->ip_mc_loop) 
2704                         {
2705                                 if(skb->daddr==IGMP_ALL_HOSTS)
2706                                         ip_loopback(rt?rt->rt_dev:dev,skb);
2707                                 else 
2708                                 {
2709                                         struct ip_mc_list *imc=rt?rt->rt_dev->ip_mc_list:dev->ip_mc_list;
2710                                         while(imc!=NULL) 
2711                                         {
2712                                                 if(imc->multiaddr==daddr) 
2713                                                 {
2714                                                         ip_loopback(rt?rt->rt_dev:dev,skb);
2715                                                         break;
2716                                                 }
2717                                                 imc=imc->next;
2718                                         }
2719                                 }
2720                         }
2721 
2722                         /*
2723                          *      Multicasts with ttl 0 must not go beyond the host. Fixme: avoid the
2724                          *      extra clone.
2725                          */
2726 
2727                         if(skb->ip_hdr->ttl==0)
2728                                 kfree_skb(skb, FREE_READ);
2729                 }
2730 #endif
2731 
2732                 nfrags++;
2733                 
2734                 /*
2735                  *      BSD loops broadcasts
2736                  */
2737                  
2738                 if((dev->flags&IFF_BROADCAST) && (daddr==0xFFFFFFFF || daddr==dev->pa_brdaddr) && !(dev->flags&IFF_LOOPBACK))
2739                         ip_loopback(dev,skb);
2740 
2741                 /*
2742                  *      Now queue the bytes into the device.
2743                  */
2744                  
2745                 if (dev->flags & IFF_UP) 
2746                 {
2747                         dev_queue_xmit(skb, dev, sk->priority);
2748                 } 
2749                 else 
2750                 {
2751                         /*
2752                          *      Whoops... 
2753                          *
2754                          *      FIXME:  There is a small nasty here. During the ip_build_xmit we could
2755                          *      page fault between the route lookup and device send, the device might be
2756                          *      removed and unloaded.... We need to add device locks on this.
2757                          */
2758                          
2759                         ip_statistics.IpOutDiscards++;
2760                         if(nfrags>1)
2761                                 ip_statistics.IpFragCreates+=nfrags;
2762                         kfree_skb(skb, FREE_WRITE);
2763                         return(0); /* lose rest of fragments */
2764                 }
2765         } 
2766         while (offset >= 0);
2767         if(nfrags>1)
2768                 ip_statistics.IpFragCreates+=nfrags;
2769         return(0);
2770 }
2771     
2772 
2773 /*
2774  *      IP protocol layer initialiser
2775  */
2776 
2777 static struct packet_type ip_packet_type =
2778 {
2779         0,      /* MUTTER ntohs(ETH_P_IP),*/
2780         NULL,   /* All devices */
2781         ip_rcv,
2782         NULL,
2783         NULL,
2784 };
2785 
2786 /*
2787  *      Device notifier
2788  */
2789  
2790 static int ip_rt_event(unsigned long event, void *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
2791 {
2792         if(event==NETDEV_DOWN)
2793                 ip_rt_flush(ptr);
2794         return NOTIFY_DONE;
2795 }
2796 
2797 struct notifier_block ip_rt_notifier={
2798         ip_rt_event,
2799         NULL,
2800         0
2801 };
2802 
2803 /*
2804  *      IP registers the packet type and then calls the subprotocol initialisers
2805  */
2806 
2807 void ip_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
2808 {
2809         ip_packet_type.type=htons(ETH_P_IP);
2810         dev_add_pack(&ip_packet_type);
2811 
2812         /* So we flush routes when a device is downed */        
2813         register_netdevice_notifier(&ip_rt_notifier);
2814 
2815 /*      ip_raw_init();
2816         ip_packet_init();
2817         ip_tcp_init();
2818         ip_udp_init();*/
2819 
2820 #ifdef CONFIG_IP_MULTICAST
2821         proc_net_register(&(struct proc_dir_entry)
2822                           { PROC_NET_IGMP,  ip_mc_procinfo,  4,  "igmp"});
2823 #endif
2824 }
2825 

/* [previous][next][first][last][top][bottom][index][help] */