root/net/ipv4/ip.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip_ioctl
  2. ip_send
  3. ip_send_room
  4. ip_build_header
  5. ip_send_check
  6. ip_frag_create
  7. ip_find
  8. ip_free
  9. ip_expire
  10. ip_create
  11. ip_done
  12. ip_glue
  13. ip_defrag
  14. ip_fragment
  15. ip_forward
  16. ip_rcv
  17. ip_loopback
  18. ip_queue_xmit
  19. ip_mc_procinfo
  20. ip_mc_find_devfor
  21. ip_setsockopt
  22. ip_getsockopt
  23. ip_build_xmit
  24. ip_rt_event
  25. ip_init

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              The Internet Protocol (IP) module.
   7  *
   8  * Version:     @(#)ip.c        1.0.16b 9/1/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Donald Becker, <becker@super.org>
  13  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  14  *              Richard Underwood
  15  *              Stefan Becker, <stefanb@yello.ping.de>
  16  *              Jorge Cwik, <jorge@laser.satlink.net>
  17  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  18  *              
  19  *
  20  * Fixes:
  21  *              Alan Cox        :       Commented a couple of minor bits of surplus code
  22  *              Alan Cox        :       Undefining IP_FORWARD doesn't include the code
  23  *                                      (just stops a compiler warning).
  24  *              Alan Cox        :       Frames with >=MAX_ROUTE record routes, strict routes or loose routes
  25  *                                      are junked rather than corrupting things.
  26  *              Alan Cox        :       Frames to bad broadcast subnets are dumped
  27  *                                      We used to process them non broadcast and
  28  *                                      boy could that cause havoc.
  29  *              Alan Cox        :       ip_forward sets the free flag on the
  30  *                                      new frame it queues. Still crap because
  31  *                                      it copies the frame but at least it
  32  *                                      doesn't eat memory too.
  33  *              Alan Cox        :       Generic queue code and memory fixes.
  34  *              Fred Van Kempen :       IP fragment support (borrowed from NET2E)
  35  *              Gerhard Koerting:       Forward fragmented frames correctly.
  36  *              Gerhard Koerting:       Fixes to my fix of the above 8-).
  37  *              Gerhard Koerting:       IP interface addressing fix.
  38  *              Linus Torvalds  :       More robustness checks
  39  *              Alan Cox        :       Even more checks: Still not as robust as it ought to be
  40  *              Alan Cox        :       Save IP header pointer for later
  41  *              Alan Cox        :       ip option setting
  42  *              Alan Cox        :       Use ip_tos/ip_ttl settings
  43  *              Alan Cox        :       Fragmentation bogosity removed
  44  *                                      (Thanks to Mark.Bush@prg.ox.ac.uk)
  45  *              Dmitry Gorodchanin :    Send of a raw packet crash fix.
  46  *              Alan Cox        :       Silly ip bug when an overlength
  47  *                                      fragment turns up. Now frees the
  48  *                                      queue.
  49  *              Linus Torvalds/ :       Memory leakage on fragmentation
  50  *              Alan Cox        :       handling.
  51  *              Gerhard Koerting:       Forwarding uses IP priority hints
  52  *              Teemu Rantanen  :       Fragment problems.
  53  *              Alan Cox        :       General cleanup, comments and reformat
  54  *              Alan Cox        :       SNMP statistics
  55  *              Alan Cox        :       BSD address rule semantics. Also see
  56  *                                      UDP as there is a nasty checksum issue
  57  *                                      if you do things the wrong way.
  58  *              Alan Cox        :       Always defrag, moved IP_FORWARD to the config.in file
  59  *              Alan Cox        :       IP options adjust sk->priority.
  60  *              Pedro Roque     :       Fix mtu/length error in ip_forward.
  61  *              Alan Cox        :       Avoid ip_chk_addr when possible.
  62  *      Richard Underwood       :       IP multicasting.
  63  *              Alan Cox        :       Cleaned up multicast handlers.
  64  *              Alan Cox        :       RAW sockets demultiplex in the BSD style.
  65  *              Gunther Mayer   :       Fix the SNMP reporting typo
  66  *              Alan Cox        :       Always in group 224.0.0.1
  67  *      Pauline Middelink       :       Fast ip_checksum update when forwarding
  68  *                                      Masquerading support.
  69  *              Alan Cox        :       Multicast loopback error for 224.0.0.1
  70  *              Alan Cox        :       IP_MULTICAST_LOOP option.
  71  *              Alan Cox        :       Use notifiers.
  72  *              Bjorn Ekwall    :       Removed ip_csum (from slhc.c too)
  73  *              Bjorn Ekwall    :       Moved ip_fast_csum to ip.h (inline!)
  74  *              Stefan Becker   :       Send out ICMP HOST REDIRECT
  75  *      Arnt Gulbrandsen        :       ip_build_xmit
  76  *              Alan Cox        :       Per socket routing cache
  77  *              Alan Cox        :       Fixed routing cache, added header cache.
  78  *              Alan Cox        :       Loopback didnt work right in original ip_build_xmit - fixed it.
  79  *              Alan Cox        :       Only send ICMP_REDIRECT if src/dest are the same net.
  80  *              Alan Cox        :       Incoming IP option handling.
  81  *              Alan Cox        :       Set saddr on raw output frames as per BSD.
  82  *              Alan Cox        :       Stopped broadcast source route explosions.
  83  *              Alan Cox        :       Can disable source routing
  84  *              Takeshi Sone    :       Masquerading didn't work.
  85  *      Dave Bonn,Alan Cox      :       Faster IP forwarding whenever possible.
  86  *              Alan Cox        :       Memory leaks, tramples, misc debugging.
  87  *              Alan Cox        :       Fixed multicast (by popular demand 8))
  88  *              Alan Cox        :       Fixed forwarding (by even more popular demand 8))
  89  *              Alan Cox        :       Fixed SNMP statistics [I think]
  90  *      Gerhard Koerting        :       IP fragmentation forwarding fix
  91  *              Alan Cox        :       Device lock against page fault.
  92  *              Alan Cox        :       IP_HDRINCL facility.
  93  *      Werner Almesberger      :       Zero fragment bug
  94  *              Alan Cox        :       RAW IP frame length bug
  95  *              Alan Cox        :       Outgoing firewall on build_xmit
  96  *
  97  *  
  98  *
  99  * To Fix:
 100  *              IP option processing is mostly not needed. ip_forward needs to know about routing rules
 101  *              and time stamp but that's about all. Use the route mtu field here too
 102  *              IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
 103  *              and could be made very efficient with the addition of some virtual memory hacks to permit
 104  *              the allocation of a buffer that can then be 'grown' by twiddling page tables.
 105  *              Output fragmentation wants updating along with the buffer management to use a single 
 106  *              interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
 107  *              output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
 108  *              fragmentation anyway.
 109  *
 110  *              FIXME: copy frag 0 iph to qp->iph
 111  *
 112  *              This program is free software; you can redistribute it and/or
 113  *              modify it under the terms of the GNU General Public License
 114  *              as published by the Free Software Foundation; either version
 115  *              2 of the License, or (at your option) any later version.
 116  */
 117 
 118 #include <asm/segment.h>
 119 #include <asm/system.h>
 120 #include <linux/types.h>
 121 #include <linux/kernel.h>
 122 #include <linux/sched.h>
 123 #include <linux/mm.h>
 124 #include <linux/string.h>
 125 #include <linux/errno.h>
 126 #include <linux/config.h>
 127 
 128 #include <linux/socket.h>
 129 #include <linux/sockios.h>
 130 #include <linux/in.h>
 131 #include <linux/inet.h>
 132 #include <linux/netdevice.h>
 133 #include <linux/etherdevice.h>
 134 #include <linux/proc_fs.h>
 135 #include <linux/stat.h>
 136 
 137 #include <net/snmp.h>
 138 #include <net/ip.h>
 139 #include <net/protocol.h>
 140 #include <net/route.h>
 141 #include <net/tcp.h>
 142 #include <net/udp.h>
 143 #include <linux/skbuff.h>
 144 #include <net/sock.h>
 145 #include <net/arp.h>
 146 #include <net/icmp.h>
 147 #include <net/raw.h>
 148 #include <net/checksum.h>
 149 #include <linux/igmp.h>
 150 #include <linux/ip_fw.h>
 151 #include <linux/mroute.h>
 152 
 153 #define CONFIG_IP_DEFRAG
 154 
 155 extern int last_retran;
 156 extern void sort_send(struct sock *sk);
 157 
 158 #define min(a,b)        ((a)<(b)?(a):(b))
 159 
 160 /*
 161  *      SNMP management statistics
 162  */
 163 
 164 #ifdef CONFIG_IP_FORWARD
 165 struct ip_mib ip_statistics={1,64,};    /* Forwarding=Yes, Default TTL=64 */
 166 #else
 167 struct ip_mib ip_statistics={2,64,};    /* Forwarding=No, Default TTL=64 */
 168 #endif
 169 
 170 /*
 171  *      Handle the issuing of an ioctl() request
 172  *      for the ip device. This is scheduled to
 173  *      disappear
 174  */
 175 
 176 int ip_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 177 {
 178         switch(cmd)
 179         {
 180                 default:
 181                         return(-EINVAL);
 182         }
 183 }
 184 
 185 
 186 /*
 187  *      Take an skb, and fill in the MAC header.
 188  */
 189 
 190 static int ip_send(struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 191 {
 192         int mac = 0;
 193 
 194         skb->dev = dev;
 195         skb->arp = 1;
 196         if (dev->hard_header)
 197         {
 198                 /*
 199                  *      Build a hardware header. Source address is our mac, destination unknown
 200                  *      (rebuild header will sort this out)
 201                  */
 202                 skb_reserve(skb,(dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */
 203                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 204                 if (mac < 0)
 205                 {
 206                         mac = -mac;
 207                         skb->arp = 0;
 208                         skb->raddr = daddr;     /* next routing address */
 209                 }
 210         }
 211         return mac;
 212 }
 213 
 214 static int ip_send_room(struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 215 {
 216         int mac = 0;
 217 
 218         skb->dev = dev;
 219         skb->arp = 1;
 220         if (dev->hard_header)
 221         {
 222                 skb_reserve(skb,MAX_HEADER);
 223                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 224                 if (mac < 0)
 225                 {
 226                         mac = -mac;
 227                         skb->arp = 0;
 228                         skb->raddr = daddr;     /* next routing address */
 229                 }
 230         }
 231         return mac;
 232 }
 233 
 234 int ip_id_count = 0;
 235 
 236 /*
 237  * This routine builds the appropriate hardware/IP headers for
 238  * the routine.  It assumes that if *dev != NULL then the
 239  * protocol knows what it's doing, otherwise it uses the
 240  * routing/ARP tables to select a device struct.
 241  */
 242 int ip_build_header(struct sk_buff *skb, __u32 saddr, __u32 daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 243                 struct device **dev, int type, struct options *opt, int len, int tos, int ttl)
 244 {
 245         struct rtable *rt;
 246         __u32 raddr;
 247         int tmp;
 248         __u32 src;
 249         struct iphdr *iph;
 250 
 251         /*
 252          *      See if we need to look up the device.
 253          */
 254 
 255 #ifdef CONFIG_IP_MULTICAST      
 256         if(MULTICAST(daddr) && *dev==NULL && skb->sk && *skb->sk->ip_mc_name)
 257                 *dev=dev_get(skb->sk->ip_mc_name);
 258 #endif
 259         if (*dev == NULL)
 260         {
 261                 if(skb->localroute)
 262                         rt = ip_rt_local(daddr, NULL, &src);
 263                 else
 264                         rt = ip_rt_route(daddr, NULL, &src);
 265                 if (rt == NULL)
 266                 {
 267                         ip_statistics.IpOutNoRoutes++;
 268                         return(-ENETUNREACH);
 269                 }
 270 
 271                 *dev = rt->rt_dev;
 272                 /*
 273                  *      If the frame is from us and going off machine it MUST MUST MUST
 274                  *      have the output device ip address and never the loopback
 275                  */
 276                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 277                         saddr = src;/*rt->rt_dev->pa_addr;*/
 278                 raddr = rt->rt_gateway;
 279 
 280         }
 281         else
 282         {
 283                 /*
 284                  *      We still need the address of the first hop.
 285                  */
 286                 if(skb->localroute)
 287                         rt = ip_rt_local(daddr, NULL, &src);
 288                 else
 289                         rt = ip_rt_route(daddr, NULL, &src);
 290                 /*
 291                  *      If the frame is from us and going off machine it MUST MUST MUST
 292                  *      have the output device ip address and never the loopback
 293                  */
 294                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 295                         saddr = src;/*rt->rt_dev->pa_addr;*/
 296 
 297                 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
 298         }
 299 
 300         /*
 301          *      No source addr so make it our addr
 302          */
 303         if (saddr == 0)
 304                 saddr = src;
 305 
 306         /*
 307          *      No gateway so aim at the real destination
 308          */
 309         if (raddr == 0)
 310                 raddr = daddr;
 311 
 312         /*
 313          *      Now build the MAC header.
 314          */
 315 
 316         if(type==IPPROTO_TCP)
 317                 tmp = ip_send_room(skb, raddr, len, *dev, saddr);
 318         else
 319                 tmp = ip_send(skb, raddr, len, *dev, saddr);
 320 
 321         /*
 322          *      Book keeping
 323          */
 324 
 325         skb->dev = *dev;
 326         skb->saddr = saddr;
 327         if (skb->sk)
 328                 skb->sk->saddr = saddr;
 329 
 330         /*
 331          *      Now build the IP header.
 332          */
 333 
 334         /*
 335          *      If we are using IPPROTO_RAW, then we don't need an IP header, since
 336          *      one is being supplied to us by the user
 337          */
 338 
 339         if(type == IPPROTO_RAW)
 340                 return (tmp);
 341 
 342         /*
 343          *      Build the IP addresses
 344          */
 345          
 346         iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr));
 347 
 348         iph->version  = 4;
 349         iph->ihl      = 5;
 350         iph->tos      = tos;
 351         iph->frag_off = 0;
 352         iph->ttl      = ttl;
 353         iph->daddr    = daddr;
 354         iph->saddr    = saddr;
 355         iph->protocol = type;
 356         skb->ip_hdr   = iph;
 357 
 358         return(20 + tmp);       /* IP header plus MAC header size */
 359 }
 360 
 361 
 362 /*
 363  *      Generate a checksum for an outgoing IP datagram.
 364  */
 365 
 366 void ip_send_check(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 367 {
 368         iph->check = 0;
 369         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 370 }
 371 
 372 /************************ Fragment Handlers From NET2E **********************************/
 373 
 374 
 375 /*
 376  *      This fragment handler is a bit of a heap. On the other hand it works quite
 377  *      happily and handles things quite well.
 378  */
 379 
 380 static struct ipq *ipqueue = NULL;              /* IP fragment queue    */
 381 
 382 /*
 383  *      Create a new fragment entry.
 384  */
 385 
 386 static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, unsigned char *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
 387 {
 388         struct ipfrag *fp;
 389 
 390         fp = (struct ipfrag *) kmalloc(sizeof(struct ipfrag), GFP_ATOMIC);
 391         if (fp == NULL)
 392         {
 393                 NETDEBUG(printk("IP: frag_create: no memory left !\n"));
 394                 return(NULL);
 395         }
 396         memset(fp, 0, sizeof(struct ipfrag));
 397 
 398         /* Fill in the structure. */
 399         fp->offset = offset;
 400         fp->end = end;
 401         fp->len = end - offset;
 402         fp->skb = skb;
 403         fp->ptr = ptr;
 404 
 405         return(fp);
 406 }
 407 
 408 
 409 /*
 410  *      Find the correct entry in the "incomplete datagrams" queue for
 411  *      this IP datagram, and return the queue entry address if found.
 412  */
 413 
 414 static struct ipq *ip_find(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 415 {
 416         struct ipq *qp;
 417         struct ipq *qplast;
 418 
 419         cli();
 420         qplast = NULL;
 421         for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
 422         {
 423                 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
 424                         iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
 425                 {
 426                         del_timer(&qp->timer);  /* So it doesn't vanish on us. The timer will be reset anyway */
 427                         sti();
 428                         return(qp);
 429                 }
 430         }
 431         sti();
 432         return(NULL);
 433 }
 434 
 435 
 436 /*
 437  *      Remove an entry from the "incomplete datagrams" queue, either
 438  *      because we completed, reassembled and processed it, or because
 439  *      it timed out.
 440  */
 441 
 442 static void ip_free(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 443 {
 444         struct ipfrag *fp;
 445         struct ipfrag *xp;
 446 
 447         /*
 448          * Stop the timer for this entry.
 449          */
 450 
 451         del_timer(&qp->timer);
 452 
 453         /* Remove this entry from the "incomplete datagrams" queue. */
 454         cli();
 455         if (qp->prev == NULL)
 456         {
 457                 ipqueue = qp->next;
 458                 if (ipqueue != NULL)
 459                         ipqueue->prev = NULL;
 460         }
 461         else
 462         {
 463                 qp->prev->next = qp->next;
 464                 if (qp->next != NULL)
 465                         qp->next->prev = qp->prev;
 466         }
 467 
 468         /* Release all fragment data. */
 469 
 470         fp = qp->fragments;
 471         while (fp != NULL)
 472         {
 473                 xp = fp->next;
 474                 IS_SKB(fp->skb);
 475                 kfree_skb(fp->skb,FREE_READ);
 476                 kfree_s(fp, sizeof(struct ipfrag));
 477                 fp = xp;
 478         }
 479 
 480         /* Release the IP header. */
 481         kfree_s(qp->iph, 64 + 8);
 482 
 483         /* Finally, release the queue descriptor itself. */
 484         kfree_s(qp, sizeof(struct ipq));
 485         sti();
 486 }
 487 
 488 
 489 /*
 490  *      Oops- a fragment queue timed out.  Kill it and send an ICMP reply.
 491  */
 492 
 493 static void ip_expire(unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 494 {
 495         struct ipq *qp;
 496 
 497         qp = (struct ipq *)arg;
 498 
 499         /*
 500          *      Send an ICMP "Fragment Reassembly Timeout" message.
 501          */
 502 
 503         ip_statistics.IpReasmTimeout++;
 504         ip_statistics.IpReasmFails++;   
 505         /* This if is always true... shrug */
 506         if(qp->fragments!=NULL)
 507                 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
 508                                 ICMP_EXC_FRAGTIME, 0, qp->dev);
 509 
 510         /*
 511          *      Nuke the fragment queue.
 512          */
 513         ip_free(qp);
 514 }
 515 
 516 
 517 /*
 518  *      Add an entry to the 'ipq' queue for a newly received IP datagram.
 519  *      We will (hopefully :-) receive all other fragments of this datagram
 520  *      in time, so we just create a queue for this datagram, in which we
 521  *      will insert the received fragments at their respective positions.
 522  */
 523 
 524 static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 525 {
 526         struct ipq *qp;
 527         int ihlen;
 528 
 529         qp = (struct ipq *) kmalloc(sizeof(struct ipq), GFP_ATOMIC);
 530         if (qp == NULL)
 531         {
 532                 NETDEBUG(printk("IP: create: no memory left !\n"));
 533                 return(NULL);
 534                 skb->dev = qp->dev;
 535         }
 536         memset(qp, 0, sizeof(struct ipq));
 537 
 538         /*
 539          *      Allocate memory for the IP header (plus 8 octets for ICMP).
 540          */
 541 
 542         ihlen = iph->ihl * 4;
 543         qp->iph = (struct iphdr *) kmalloc(64 + 8, GFP_ATOMIC);
 544         if (qp->iph == NULL)
 545         {
 546                 NETDEBUG(printk("IP: create: no memory left !\n"));
 547                 kfree_s(qp, sizeof(struct ipq));
 548                 return(NULL);
 549         }
 550 
 551         memcpy(qp->iph, iph, ihlen + 8);
 552         qp->len = 0;
 553         qp->ihlen = ihlen;
 554         qp->fragments = NULL;
 555         qp->dev = dev;
 556 
 557         /* Start a timer for this entry. */
 558         qp->timer.expires = jiffies + IP_FRAG_TIME;     /* about 30 seconds     */
 559         qp->timer.data = (unsigned long) qp;            /* pointer to queue     */
 560         qp->timer.function = ip_expire;                 /* expire function      */
 561         add_timer(&qp->timer);
 562 
 563         /* Add this entry to the queue. */
 564         qp->prev = NULL;
 565         cli();
 566         qp->next = ipqueue;
 567         if (qp->next != NULL)
 568                 qp->next->prev = qp;
 569         ipqueue = qp;
 570         sti();
 571         return(qp);
 572 }
 573 
 574 
 575 /*
 576  *      See if a fragment queue is complete.
 577  */
 578 
 579 static int ip_done(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 580 {
 581         struct ipfrag *fp;
 582         int offset;
 583 
 584         /* Only possible if we received the final fragment. */
 585         if (qp->len == 0)
 586                 return(0);
 587 
 588         /* Check all fragment offsets to see if they connect. */
 589         fp = qp->fragments;
 590         offset = 0;
 591         while (fp != NULL)
 592         {
 593                 if (fp->offset > offset)
 594                         return(0);      /* fragment(s) missing */
 595                 offset = fp->end;
 596                 fp = fp->next;
 597         }
 598 
 599         /* All fragments are present. */
 600         return(1);
 601 }
 602 
 603 
 604 /*
 605  *      Build a new IP datagram from all its fragments.
 606  *
 607  *      FIXME: We copy here because we lack an effective way of handling lists
 608  *      of bits on input. Until the new skb data handling is in I'm not going
 609  *      to touch this with a bargepole. 
 610  */
 611 
 612 static struct sk_buff *ip_glue(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 613 {
 614         struct sk_buff *skb;
 615         struct iphdr *iph;
 616         struct ipfrag *fp;
 617         unsigned char *ptr;
 618         int count, len;
 619 
 620         /*
 621          *      Allocate a new buffer for the datagram.
 622          */
 623         len = qp->ihlen + qp->len;
 624 
 625         if ((skb = dev_alloc_skb(len)) == NULL)
 626         {
 627                 ip_statistics.IpReasmFails++;
 628                 NETDEBUG(printk("IP: queue_glue: no memory for gluing queue %p\n", qp));
 629                 ip_free(qp);
 630                 return(NULL);
 631         }
 632 
 633         /* Fill in the basic details. */
 634         skb_put(skb,len);
 635         skb->h.raw = skb->data;
 636         skb->free = 1;
 637 
 638         /* Copy the original IP headers into the new buffer. */
 639         ptr = (unsigned char *) skb->h.raw;
 640         memcpy(ptr, ((unsigned char *) qp->iph), qp->ihlen);
 641         ptr += qp->ihlen;
 642 
 643         count = 0;
 644 
 645         /* Copy the data portions of all fragments into the new buffer. */
 646         fp = qp->fragments;
 647         while(fp != NULL)
 648         {
 649                 if(count+fp->len > skb->len)
 650                 {
 651                         NETDEBUG(printk("Invalid fragment list: Fragment over size.\n"));
 652                         ip_free(qp);
 653                         kfree_skb(skb,FREE_WRITE);
 654                         ip_statistics.IpReasmFails++;
 655                         return NULL;
 656                 }
 657                 memcpy((ptr + fp->offset), fp->ptr, fp->len);
 658                 count += fp->len;
 659                 fp = fp->next;
 660         }
 661 
 662         /* We glued together all fragments, so remove the queue entry. */
 663         ip_free(qp);
 664 
 665         /* Done with all fragments. Fixup the new IP header. */
 666         iph = skb->h.iph;
 667         iph->frag_off = 0;
 668         iph->tot_len = htons((iph->ihl * 4) + count);
 669         skb->ip_hdr = iph;
 670 
 671         ip_statistics.IpReasmOKs++;
 672         return(skb);
 673 }
 674 
 675 
 676 /*
 677  *      Process an incoming IP datagram fragment.
 678  */
 679 
 680 static struct sk_buff *ip_defrag(struct iphdr *iph, struct sk_buff *skb, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 681 {
 682         struct ipfrag *prev, *next, *tmp;
 683         struct ipfrag *tfp;
 684         struct ipq *qp;
 685         struct sk_buff *skb2;
 686         unsigned char *ptr;
 687         int flags, offset;
 688         int i, ihl, end;
 689 
 690         ip_statistics.IpReasmReqds++;
 691 
 692         /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
 693         qp = ip_find(iph);
 694 
 695         /* Is this a non-fragmented datagram? */
 696         offset = ntohs(iph->frag_off);
 697         flags = offset & ~IP_OFFSET;
 698         offset &= IP_OFFSET;
 699         if (((flags & IP_MF) == 0) && (offset == 0))
 700         {
 701                 if (qp != NULL)
 702                         ip_free(qp);    /* Huh? How could this exist?? */
 703                 return(skb);
 704         }
 705 
 706         offset <<= 3;           /* offset is in 8-byte chunks */
 707 
 708         /*
 709          * If the queue already existed, keep restarting its timer as long
 710          * as we still are receiving fragments.  Otherwise, create a fresh
 711          * queue entry.
 712          */
 713 
 714         if (qp != NULL)
 715         {
 716                 del_timer(&qp->timer);
 717                 qp->timer.expires = jiffies + IP_FRAG_TIME;     /* about 30 seconds */
 718                 qp->timer.data = (unsigned long) qp;    /* pointer to queue */
 719                 qp->timer.function = ip_expire;         /* expire function */
 720                 add_timer(&qp->timer);
 721         }
 722         else
 723         {
 724                 /*
 725                  *      If we failed to create it, then discard the frame
 726                  */
 727                 if ((qp = ip_create(skb, iph, dev)) == NULL)
 728                 {
 729                         skb->sk = NULL;
 730                         kfree_skb(skb, FREE_READ);
 731                         ip_statistics.IpReasmFails++;
 732                         return NULL;
 733                 }
 734         }
 735 
 736         /*
 737          *      Determine the position of this fragment.
 738          */
 739 
 740         ihl = iph->ihl * 4;
 741         end = offset + ntohs(iph->tot_len) - ihl;
 742 
 743         /*
 744          *      Point into the IP datagram 'data' part.
 745          */
 746 
 747         ptr = skb->data + ihl;
 748 
 749         /*
 750          *      Is this the final fragment?
 751          */
 752 
 753         if ((flags & IP_MF) == 0)
 754                 qp->len = end;
 755 
 756         /*
 757          *      Find out which fragments are in front and at the back of us
 758          *      in the chain of fragments so far.  We must know where to put
 759          *      this fragment, right?
 760          */
 761 
 762         prev = NULL;
 763         for(next = qp->fragments; next != NULL; next = next->next)
 764         {
 765                 if (next->offset > offset)
 766                         break;  /* bingo! */
 767                 prev = next;
 768         }
 769 
 770         /*
 771          *      We found where to put this one.
 772          *      Check for overlap with preceding fragment, and, if needed,
 773          *      align things so that any overlaps are eliminated.
 774          */
 775         if (prev != NULL && offset < prev->end)
 776         {
 777                 i = prev->end - offset;
 778                 offset += i;    /* ptr into datagram */
 779                 ptr += i;       /* ptr into fragment data */
 780         }
 781 
 782         /*
 783          * Look for overlap with succeeding segments.
 784          * If we can merge fragments, do it.
 785          */
 786 
 787         for(tmp=next; tmp != NULL; tmp = tfp)
 788         {
 789                 tfp = tmp->next;
 790                 if (tmp->offset >= end)
 791                         break;          /* no overlaps at all */
 792 
 793                 i = end - next->offset;                 /* overlap is 'i' bytes */
 794                 tmp->len -= i;                          /* so reduce size of    */
 795                 tmp->offset += i;                       /* next fragment        */
 796                 tmp->ptr += i;
 797                 /*
 798                  *      If we get a frag size of <= 0, remove it and the packet
 799                  *      that it goes with.
 800                  */
 801                 if (tmp->len <= 0)
 802                 {
 803                         if (tmp->prev != NULL)
 804                                 tmp->prev->next = tmp->next;
 805                         else
 806                                 qp->fragments = tmp->next;
 807 
 808                         if (tfp->next != NULL)
 809                                 tmp->next->prev = tmp->prev;
 810                         
 811                         next=tfp;       /* We have killed the original next frame */
 812 
 813                         kfree_skb(tmp->skb,FREE_READ);
 814                         kfree_s(tmp, sizeof(struct ipfrag));
 815                 }
 816         }
 817 
 818         /*
 819          *      Insert this fragment in the chain of fragments.
 820          */
 821 
 822         tfp = NULL;
 823         tfp = ip_frag_create(offset, end, skb, ptr);
 824 
 825         /*
 826          *      No memory to save the fragment - so throw the lot
 827          */
 828 
 829         if (!tfp)
 830         {
 831                 skb->sk = NULL;
 832                 kfree_skb(skb, FREE_READ);
 833                 return NULL;
 834         }
 835         tfp->prev = prev;
 836         tfp->next = next;
 837         if (prev != NULL)
 838                 prev->next = tfp;
 839         else
 840                 qp->fragments = tfp;
 841 
 842         if (next != NULL)
 843                 next->prev = tfp;
 844 
 845         /*
 846          *      OK, so we inserted this new fragment into the chain.
 847          *      Check if we now have a full IP datagram which we can
 848          *      bump up to the IP layer...
 849          */
 850 
 851         if (ip_done(qp))
 852         {
 853                 skb2 = ip_glue(qp);             /* glue together the fragments */
 854                 return(skb2);
 855         }
 856         return(NULL);
 857 }
 858 
 859 
 860 /*
 861  *      This IP datagram is too large to be sent in one piece.  Break it up into
 862  *      smaller pieces (each of size equal to the MAC header plus IP header plus
 863  *      a block of the data of the original IP data part) that will yet fit in a
 864  *      single device frame, and queue such a frame for sending by calling the
 865  *      ip_queue_xmit().  Note that this is recursion, and bad things will happen
 866  *      if this function causes a loop...
 867  *
 868  *      Yes this is inefficient, feel free to submit a quicker one.
 869  *
 870  *      **Protocol Violation**
 871  *      We copy all the options to each fragment. !FIXME!
 872  */
 873  
 874 void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int is_frag)
     /* [previous][next][first][last][top][bottom][index][help] */
 875 {
 876         struct iphdr *iph;
 877         unsigned char *raw;
 878         unsigned char *ptr;
 879         struct sk_buff *skb2;
 880         int left, mtu, hlen, len;
 881         int offset;
 882         unsigned long flags;
 883 
 884         /*
 885          *      Point into the IP datagram header.
 886          */
 887 
 888         raw = skb->data;
 889 #if 0
 890         iph = (struct iphdr *) (raw + dev->hard_header_len);    
 891         skb->ip_hdr = iph;
 892 #else
 893         iph = skb->ip_hdr;
 894 #endif
 895 
 896         /*
 897          *      Setup starting values.
 898          */
 899 
 900         hlen = iph->ihl * 4;
 901         left = ntohs(iph->tot_len) - hlen;      /* Space per frame */
 902         hlen += dev->hard_header_len;           /* Total header size */
 903         mtu = (dev->mtu - hlen);                /* Size of data space */
 904         ptr = (raw + hlen);                     /* Where to start from */
 905 
 906         /*
 907          *      Check for any "DF" flag. [DF means do not fragment]
 908          */
 909 
 910         if (ntohs(iph->frag_off) & IP_DF)
 911         {
 912                 /*
 913                  *      Reply giving the MTU of the failed hop.
 914                  */
 915                 ip_statistics.IpFragFails++;
 916                 icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev->mtu, dev);
 917                 return;
 918         }
 919 
 920         /*
 921          *      The protocol doesn't seem to say what to do in the case that the
 922          *      frame + options doesn't fit the mtu. As it used to fall down dead
 923          *      in this case we were fortunate it didn't happen
 924          */
 925 
 926         if(mtu<8)
 927         {
 928                 /* It's wrong but it's better than nothing */
 929                 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev->mtu, dev);
 930                 ip_statistics.IpFragFails++;
 931                 return;
 932         }
 933 
 934         /*
 935          *      Fragment the datagram.
 936          */
 937 
 938         /*
 939          *      The initial offset is 0 for a complete frame. When
 940          *      fragmenting fragments it's wherever this one starts.
 941          */
 942 
 943         if (is_frag & 2)
 944                 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
 945         else
 946                 offset = 0;
 947 
 948 
 949         /*
 950          *      Keep copying data until we run out.
 951          */
 952 
 953         while(left > 0)
 954         {
 955                 len = left;
 956                 /* IF: it doesn't fit, use 'mtu' - the data space left */
 957                 if (len > mtu)
 958                         len = mtu;
 959                 /* IF: we are not sending upto and including the packet end
 960                    then align the next start on an eight byte boundary */
 961                 if (len < left)
 962                 {
 963                         len/=8;
 964                         len*=8;
 965                 }
 966                 /*
 967                  *      Allocate buffer.
 968                  */
 969 
 970                 if ((skb2 = alloc_skb(len + hlen+15,GFP_ATOMIC)) == NULL)
 971                 {
 972                         NETDEBUG(printk("IP: frag: no memory for new fragment!\n"));
 973                         ip_statistics.IpFragFails++;
 974                         return;
 975                 }
 976 
 977                 /*
 978                  *      Set up data on packet
 979                  */
 980 
 981                 skb2->arp = skb->arp;
 982                 if(skb->free==0)
 983                         printk("IP fragmenter: BUG free!=1 in fragmenter\n");
 984                 skb2->free = 1;
 985                 skb_put(skb2,len + hlen);
 986                 skb2->h.raw=(char *) skb2->data;
 987                 /*
 988                  *      Charge the memory for the fragment to any owner
 989                  *      it might possess
 990                  */
 991 
 992                 save_flags(flags);
 993                 if (sk)
 994                 {
 995                         cli();
 996                         sk->wmem_alloc += skb2->truesize;
 997                         skb2->sk=sk;
 998                 }
 999                 restore_flags(flags);
1000                 skb2->raddr = skb->raddr;       /* For rebuild_header - must be here */
1001 
1002                 /*
1003                  *      Copy the packet header into the new buffer.
1004                  */
1005 
1006                 memcpy(skb2->h.raw, raw, hlen);
1007 
1008                 /*
1009                  *      Copy a block of the IP datagram.
1010                  */
1011                 memcpy(skb2->h.raw + hlen, ptr, len);
1012                 left -= len;
1013 
1014                 skb2->h.raw+=dev->hard_header_len;
1015 
1016                 /*
1017                  *      Fill in the new header fields.
1018                  */
1019                 iph = (struct iphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
1020                 iph->frag_off = htons((offset >> 3));
1021                 skb2->ip_hdr = iph;
1022                 /*
1023                  *      Added AC : If we are fragmenting a fragment thats not the
1024                  *                 last fragment then keep MF on each bit
1025                  */
1026                 if (left > 0 || (is_frag & 1))
1027                         iph->frag_off |= htons(IP_MF);
1028                 ptr += len;
1029                 offset += len;
1030 
1031                 /*
1032                  *      Put this fragment into the sending queue.
1033                  */
1034 
1035                 ip_statistics.IpFragCreates++;
1036 
1037                 ip_queue_xmit(sk, dev, skb2, 2);
1038         }
1039         ip_statistics.IpFragOKs++;
1040 }
1041 
1042 
1043 
1044 #ifdef CONFIG_IP_FORWARD
1045 
1046 /*
1047  *      Forward an IP datagram to its next destination.
1048  */
1049 
1050 int ip_forward(struct sk_buff *skb, struct device *dev, int is_frag, unsigned long target_addr, int target_strict)
     /* [previous][next][first][last][top][bottom][index][help] */
1051 {
1052         struct device *dev2;    /* Output device */
1053         struct iphdr *iph;      /* Our header */
1054         struct sk_buff *skb2;   /* Output packet */
1055         struct rtable *rt;      /* Route we use */
1056         unsigned char *ptr;     /* Data pointer */
1057         unsigned long raddr;    /* Router IP address */
1058 #ifdef CONFIG_IP_FIREWALL
1059         int fw_res = 0;         /* Forwarding result */ 
1060 #ifdef CONFIG_IP_MASQUERADE     
1061         struct sk_buff *skb_in = skb;   /* So we can remember if the masquerader did some swaps */
1062 #endif  
1063         
1064         /* 
1065          *      See if we are allowed to forward this.
1066          *      Note: demasqueraded fragments are always 'back'warded.
1067          */
1068 
1069         
1070         if(!(is_frag&4))
1071         {
1072                 fw_res=ip_fw_chk(skb->h.iph, dev, ip_fw_fwd_chain, ip_fw_fwd_policy, 0);
1073                 switch (fw_res) {
1074                 case FW_ACCEPT:
1075 #ifdef CONFIG_IP_MASQUERADE
1076                 case FW_MASQUERADE:
1077 #endif
1078                         break;
1079                 case FW_REJECT:
1080                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev);
1081                         /* fall thru */
1082                 default:
1083                         return -1;
1084                 }
1085         }
1086 #endif
1087         /*
1088          *      According to the RFC, we must first decrease the TTL field. If
1089          *      that reaches zero, we must reply an ICMP control message telling
1090          *      that the packet's lifetime expired.
1091          *
1092          *      Exception:
1093          *      We may not generate an ICMP for an ICMP. icmp_send does the
1094          *      enforcement of this so we can forget it here. It is however
1095          *      sometimes VERY important.
1096          */
1097 
1098         iph = skb->h.iph;
1099         iph->ttl--;
1100 
1101         /*
1102          *      Re-compute the IP header checksum.
1103          *      This is inefficient. We know what has happened to the header
1104          *      and could thus adjust the checksum as Phil Karn does in KA9Q
1105          */
1106 
1107         iph->check = ntohs(iph->check) + 0x0100;
1108         if ((iph->check & 0xFF00) == 0)
1109                 iph->check++;           /* carry overflow */
1110         iph->check = htons(iph->check);
1111 
1112         if (iph->ttl <= 0)
1113         {
1114                 /* Tell the sender its packet died... */
1115                 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0, dev);
1116                 return -1;
1117         }
1118 
1119         /*
1120          * OK, the packet is still valid.  Fetch its destination address,
1121          * and give it to the IP sender for further processing.
1122          */
1123 
1124         rt = ip_rt_route(target_addr, NULL, NULL);
1125         if (rt == NULL)
1126         {
1127                 /*
1128                  *      Tell the sender its packet cannot be delivered. Again
1129                  *      ICMP is screened later.
1130                  */
1131                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0, dev);
1132                 return -1;
1133         }
1134 
1135 
1136         /*
1137          * Gosh.  Not only is the packet valid; we even know how to
1138          * forward it onto its final destination.  Can we say this
1139          * is being plain lucky?
1140          * If the router told us that there is no GW, use the dest.
1141          * IP address itself- we seem to be connected directly...
1142          */
1143 
1144         raddr = rt->rt_gateway;
1145 
1146         if (raddr != 0)
1147         {
1148                 /*
1149                  *      Strict routing permits no gatewaying
1150                  */
1151                 
1152                 if(target_strict)
1153                 {
1154                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0, dev);
1155                         return -1;
1156                 }
1157         
1158                 /*
1159                  *      There is a gateway so find the correct route for it.
1160                  *      Gateways cannot in turn be gatewayed.
1161                  */
1162 
1163                 rt = ip_rt_route(raddr, NULL, NULL);
1164                 if (rt == NULL)
1165                 {
1166                         /*
1167                          *      Tell the sender its packet cannot be delivered...
1168                          */
1169                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev);
1170                         return -1;
1171                 }
1172                 if (rt->rt_gateway != 0)
1173                         raddr = rt->rt_gateway;
1174         }
1175         else
1176                 raddr = target_addr;
1177 
1178         /*
1179          *      Having picked a route we can now send the frame out.
1180          */
1181 
1182         dev2 = rt->rt_dev;
1183         
1184         /*
1185          *      In IP you never have to forward a frame on the interface that it 
1186          *      arrived upon. We now generate an ICMP HOST REDIRECT giving the route
1187          *      we calculated.
1188          */
1189 #ifndef CONFIG_IP_NO_ICMP_REDIRECT
1190         if (dev == dev2 && !((iph->saddr^iph->daddr)&dev->pa_mask) && (rt->rt_flags&RTF_MODIFIED))
1191                 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, raddr, dev);
1192 #endif          
1193 
1194         /*
1195          * We now may allocate a new buffer, and copy the datagram into it.
1196          * If the indicated interface is up and running, kick it.
1197          */
1198 
1199         if (dev2->flags & IFF_UP)
1200         {
1201 #ifdef CONFIG_IP_MASQUERADE
1202                 /*
1203                  * If this fragment needs masquerading, make it so...
1204                  * (Dont masquerade de-masqueraded fragments)
1205                  */
1206                 if (!(is_frag&4) && fw_res==2)
1207                         ip_fw_masquerade(&skb, dev2);
1208 #endif
1209                 IS_SKB(skb);
1210 
1211                 if(skb_headroom(skb)<dev2->hard_header_len)
1212                 {
1213                         skb2 = alloc_skb(dev2->hard_header_len + skb->len + 15, GFP_ATOMIC);
1214                         IS_SKB(skb2);
1215                 
1216                         /*
1217                          *      This is rare and since IP is tolerant of network failures
1218                          *      quite harmless.
1219                          */
1220                 
1221                         if (skb2 == NULL)
1222                         {
1223                                 NETDEBUG(printk("\nIP: No memory available for IP forward\n"));
1224                                 return -1;
1225                         }
1226                 
1227                         /*
1228                          *      Add the physical headers.
1229                          */
1230 
1231                         ip_send(skb2,raddr,skb->len,dev2,dev2->pa_addr);
1232 
1233                         /*
1234                          *      We have to copy the bytes over as the new header wouldn't fit
1235                          *      the old buffer. This should be very rare.
1236                          */              
1237                         
1238                         ptr = skb_put(skb2,skb->len);
1239                         skb2->free = 1;
1240                         skb2->h.raw = ptr;
1241 
1242                         /*
1243                          *      Copy the packet data into the new buffer.
1244                          */
1245                         memcpy(ptr, skb->h.raw, skb->len);
1246                 }
1247                 else
1248                 {
1249                         /* 
1250                          *      Build a new MAC header. 
1251                          */
1252 
1253                         skb2 = skb;             
1254                         skb2->dev=dev2;
1255                         skb->arp=1;
1256                         skb->raddr=raddr;
1257                         if(dev2->hard_header)
1258                         {
1259                                 if(dev2->hard_header(skb, dev2, ETH_P_IP, NULL, NULL, skb->len)<0)
1260                                         skb->arp=0;
1261                         }
1262                         ip_statistics.IpForwDatagrams++;
1263                 }
1264                 /*
1265                  *      See if it needs fragmenting. Note in ip_rcv we tagged
1266                  *      the fragment type. This must be right so that
1267                  *      the fragmenter does the right thing.
1268                  */
1269 
1270                 if(skb2->len > dev2->mtu + dev2->hard_header_len)
1271                 {
1272                         ip_fragment(NULL,skb2,dev2, is_frag);
1273                         kfree_skb(skb2,FREE_WRITE);
1274                 }
1275                 else
1276                 {
1277 #ifdef CONFIG_IP_ACCT           
1278                         /*
1279                          *      Count mapping we shortcut
1280                          */
1281                          
1282                         ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
1283 #endif                  
1284                         
1285                         /*
1286                          *      Map service types to priority. We lie about
1287                          *      throughput being low priority, but it's a good
1288                          *      choice to help improve general usage.
1289                          */
1290                         if(iph->tos & IPTOS_LOWDELAY)
1291                                 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
1292                         else if(iph->tos & IPTOS_THROUGHPUT)
1293                                 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
1294                         else
1295                                 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
1296                 }
1297         }
1298         else
1299                 return -1;
1300         
1301         /*
1302          *      Tell the caller if their buffer is free.
1303          */      
1304          
1305         if(skb==skb2)
1306                 return 0;       
1307 
1308 #ifdef CONFIG_IP_MASQUERADE     
1309         /*
1310          *      The original is free. Free our copy and
1311          *      tell the caller not to free.
1312          */
1313         if(skb!=skb_in)
1314         {
1315                 kfree_skb(skb_in, FREE_WRITE);
1316                 return 0;
1317         }
1318 #endif  
1319         return 1;
1320 }
1321 
1322 
1323 #endif
1324 
1325 /*
1326  *      This function receives all incoming IP datagrams.
1327  *
1328  *      On entry skb->data points to the start of the IP header and
1329  *      the MAC header has been removed.
1330  */
1331 
1332 int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
     /* [previous][next][first][last][top][bottom][index][help] */
1333 {
1334         struct iphdr *iph = skb->h.iph;
1335         struct sock *raw_sk=NULL;
1336         unsigned char hash;
1337         unsigned char flag = 0;
1338         struct inet_protocol *ipprot;
1339         int brd=IS_MYADDR;
1340         unsigned long target_addr;
1341         int target_strict=0;
1342         int is_frag=0;
1343 #ifdef CONFIG_IP_FIREWALL
1344         int err;
1345 #endif  
1346 
1347 #ifdef CONFIG_NET_IPV6
1348         /* 
1349          *      Intercept IPv6 frames. We dump ST-II and invalid types just below..
1350          */
1351          
1352         if(iph->version == 6)
1353                 return ipv6_rcv(skb,dev,pt);
1354 #endif          
1355 
1356         ip_statistics.IpInReceives++;
1357 
1358         /*
1359          *      Tag the ip header of this packet so we can find it
1360          */
1361 
1362         skb->ip_hdr = iph;
1363 
1364         /*
1365          *      RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
1366          *      RFC1122: 3.1.2.3 MUST discard a frame with invalid source address [NEEDS FIXING].
1367          *
1368          *      Is the datagram acceptable?
1369          *
1370          *      1.      Length at least the size of an ip header
1371          *      2.      Version of 4
1372          *      3.      Checksums correctly. [Speed optimisation for later, skip loopback checksums]
1373          *      4.      Doesn't have a bogus length
1374          *      (5.     We ought to check for IP multicast addresses and undefined types.. does this matter ?)
1375          */
1376 
1377         if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0
1378                 || skb->len < ntohs(iph->tot_len))
1379         {
1380                 ip_statistics.IpInHdrErrors++;
1381                 kfree_skb(skb, FREE_WRITE);
1382                 return(0);
1383         }
1384 
1385         /*
1386          *      Our transport medium may have padded the buffer out. Now we know it
1387          *      is IP we can trim to the true length of the frame.
1388          *      Note this now means skb->len holds ntohs(iph->tot_len).
1389          */
1390 
1391         skb_trim(skb,ntohs(iph->tot_len));
1392         
1393         /*
1394          *      See if the firewall wants to dispose of the packet. 
1395          */
1396 
1397 #ifdef  CONFIG_IP_FIREWALL
1398         
1399         if ((err=ip_fw_chk(iph,dev,ip_fw_blk_chain,ip_fw_blk_policy, 0))<FW_ACCEPT)
1400         {
1401                 if(err==FW_REJECT)
1402                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev);
1403                 kfree_skb(skb, FREE_WRITE);
1404                 return 0;       
1405         }
1406 
1407 #endif
1408         
1409 
1410         /*
1411          *      Next analyse the packet for options. Studies show under one packet in
1412          *      a thousand have options....
1413          */
1414          
1415         target_addr = iph->daddr;
1416 
1417         if (iph->ihl != 5)
1418         { 
1419                 /* Humph.. options. Lots of annoying fiddly bits */
1420                 
1421                 /*
1422                  *      This is straight from the RFC. It might even be right ;)
1423                  *
1424                  *      RFC 1122: 3.2.1.8 STREAMID option is obsolete and MUST be ignored.
1425                  *      RFC 1122: 3.2.1.8 MUST NOT crash on a zero length option.
1426                  *      RFC 1122: 3.2.1.8 MUST support acting as final destination of a source route.
1427                  */
1428                  
1429                 int opt_space=4*(iph->ihl-5);
1430                 int opt_size;
1431                 unsigned char *opt_ptr=skb->h.raw+sizeof(struct iphdr);
1432         
1433                 skb->ip_summed=0;               /* Our free checksum is bogus for this case */
1434                         
1435                 while(opt_space>0)
1436                 {
1437                         if(*opt_ptr==IPOPT_NOOP)
1438                         {
1439                                 opt_ptr++;
1440                                 opt_space--;
1441                                 continue;
1442                         }
1443                         if(*opt_ptr==IPOPT_END)
1444                                 break;  /* Done */
1445                         if(opt_space<2 || (opt_size=opt_ptr[1])<2 || opt_ptr[1]>opt_space)
1446                         {
1447                                 /*
1448                                  *      RFC 1122: 3.2.2.5  SHOULD send parameter problem reports.
1449                                  */
1450                                 icmp_send(skb, ICMP_PARAMETERPROB, 0, 0, skb->dev);
1451                                 kfree_skb(skb, FREE_READ);
1452                                 return -EINVAL;
1453                         }
1454                         switch(opt_ptr[0])
1455                         {
1456                                 case IPOPT_SEC:
1457                                         /* Should we drop this ?? */
1458                                         break;
1459                                 case IPOPT_SSRR:        /* These work almost the same way */
1460                                         target_strict=1;
1461                                         /* Fall through */
1462                                 case IPOPT_LSRR:
1463 #ifdef CONFIG_IP_NOSR
1464                                         kfree_skb(skb, FREE_READ);
1465                                         return -EINVAL;
1466 #endif                                  
1467                                 case IPOPT_RR:
1468                                 /*
1469                                  *      RFC 1122: 3.2.1.8 Support for RR is OPTIONAL.
1470                                  */
1471                                         if (iph->daddr!=skb->dev->pa_addr && (brd = ip_chk_addr(iph->daddr)) == 0) 
1472                                                 break;
1473                                         if((opt_size<3) || ( opt_ptr[0]==IPOPT_RR && opt_ptr[2] > opt_size-4 ))
1474                                         {
1475                                                 if(ip_chk_addr(iph->daddr))
1476                                                         icmp_send(skb, ICMP_PARAMETERPROB, 0, 0, skb->dev);
1477                                                 kfree_skb(skb, FREE_READ);
1478                                                 return -EINVAL;
1479                                         }
1480                                         if(opt_ptr[2] > opt_size-4 )
1481                                                 break;
1482                                         /* Bytes are [IPOPT_xxRR][Length][EntryPointer][Entry0][Entry1].... */
1483                                         /* This isn't going to be too portable - FIXME */
1484                                         if(opt_ptr[0]!=IPOPT_RR)
1485                                         {
1486                                                 int t;
1487                                                 target_addr=*(u32 *)(&opt_ptr[opt_ptr[2]]);     /* Get hop */
1488                                                 t=ip_chk_addr(target_addr);
1489                                                 if(t==IS_MULTICAST||t==IS_BROADCAST)
1490                                                 {
1491                                                         if(ip_chk_addr(iph->daddr))
1492                                                                 icmp_send(skb, ICMP_PARAMETERPROB, 0, 0, skb->dev);
1493                                                         kfree_skb(skb,FREE_READ);
1494                                                         return -EINVAL;                                         
1495                                                 }
1496                                         }
1497                                         *(u32 *)(&opt_ptr[opt_ptr[2]])=skb->dev->pa_addr;       /* Record hop */
1498                                         break;
1499                                 case IPOPT_TIMESTAMP:
1500                                 /*
1501                                  *      RFC 1122: 3.2.1.8 The timestamp option is OPTIONAL but if implemented
1502                                  *      MUST meet various rules (read the spec).
1503                                  */
1504                                         NETDEBUG(printk("ICMP: Someone finish the timestamp routine ;)\n"));
1505                                         break;
1506                                 default:
1507                                         break;
1508                         }
1509                         opt_ptr+=opt_size;
1510                         opt_space-=opt_size;
1511                 }
1512                                         
1513         }
1514 
1515 
1516         /*
1517          *      Remember if the frame is fragmented.
1518          */
1519          
1520         if(iph->frag_off)
1521         {
1522                 if (iph->frag_off & htons(IP_MF))
1523                         is_frag|=1;
1524                 /*
1525                  *      Last fragment ?
1526                  */
1527         
1528                 if (iph->frag_off & htons(IP_OFFSET))
1529                         is_frag|=2;
1530         }
1531         
1532         /*
1533          *      Do any IP forwarding required.  chk_addr() is expensive -- avoid it someday.
1534          *
1535          *      This is inefficient. While finding out if it is for us we could also compute
1536          *      the routing table entry. This is where the great unified cache theory comes
1537          *      in as and when someone implements it
1538          *
1539          *      For most hosts over 99% of packets match the first conditional
1540          *      and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at
1541          *      function entry.
1542          */
1543 
1544         if ( iph->daddr == skb->dev->pa_addr || (brd = ip_chk_addr(iph->daddr)) != 0)
1545         {
1546 #ifdef CONFIG_IP_MULTICAST      
1547                 if(!(dev->flags&IFF_ALLMULTI) && brd==IS_MULTICAST && iph->daddr!=IGMP_ALL_HOSTS && !(dev->flags&IFF_LOOPBACK))
1548                 {
1549                         /*
1550                          *      Check it is for one of our groups
1551                          */
1552                         struct ip_mc_list *ip_mc=dev->ip_mc_list;
1553                         do
1554                         {
1555                                 if(ip_mc==NULL)
1556                                 {       
1557                                         kfree_skb(skb, FREE_WRITE);
1558                                         return 0;
1559                                 }
1560                                 if(ip_mc->multiaddr==iph->daddr)
1561                                         break;
1562                                 ip_mc=ip_mc->next;
1563                         }
1564                         while(1);
1565                 }
1566 #endif
1567 
1568 #ifdef CONFIG_IP_MASQUERADE
1569                 /*
1570                  * Do we need to de-masquerade this fragment?
1571                  */
1572                 if (ip_fw_demasquerade(skb)) 
1573                 {
1574                         struct iphdr *iph=skb->h.iph;
1575                         if(ip_forward(skb, dev, is_frag|4, iph->daddr, 0))
1576                                 kfree_skb(skb, FREE_WRITE);
1577                         return(0);
1578                 }
1579 #endif
1580 
1581                 /*
1582                  *      Account for the packet
1583                  */
1584  
1585 #ifdef CONFIG_IP_ACCT
1586                 ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
1587 #endif  
1588 
1589                 /*
1590                  *      Reassemble IP fragments.
1591                  */
1592 
1593                 if(is_frag)
1594                 {
1595                         /* Defragment. Obtain the complete packet if there is one */
1596                         skb=ip_defrag(iph,skb,dev);
1597                         if(skb==NULL)
1598                                 return 0;
1599                         skb->dev = dev;
1600                         iph=skb->h.iph;
1601                 }
1602 
1603                 /*
1604                  *      Point into the IP datagram, just past the header.
1605                  */
1606 
1607                 skb->ip_hdr = iph;
1608                 skb->h.raw += iph->ihl*4;
1609 
1610                 /*
1611                  *      Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies.
1612                  *
1613                  *      RFC 1122: SHOULD pass TOS value up to the transport layer.
1614                  */
1615  
1616                 hash = iph->protocol & (SOCK_ARRAY_SIZE-1);
1617 
1618                 /* 
1619                  *      If there maybe a raw socket we must check - if not we don't care less 
1620                  */
1621                  
1622                 if((raw_sk=raw_prot.sock_array[hash])!=NULL)
1623                 {
1624                         struct sock *sknext=NULL;
1625                         struct sk_buff *skb1;
1626                         raw_sk=get_sock_raw(raw_sk, iph->protocol,  iph->saddr, iph->daddr);
1627                         if(raw_sk)      /* Any raw sockets */
1628                         {
1629                                 do
1630                                 {
1631                                         /* Find the next */
1632                                         sknext=get_sock_raw(raw_sk->next, iph->protocol, iph->saddr, iph->daddr);
1633                                         if(sknext)
1634                                                 skb1=skb_clone(skb, GFP_ATOMIC);
1635                                         else
1636                                                 break;  /* One pending raw socket left */
1637                                         if(skb1)
1638                                                 raw_rcv(raw_sk, skb1, dev, iph->saddr,iph->daddr);
1639                                         raw_sk=sknext;
1640                                 }
1641                                 while(raw_sk!=NULL);
1642                                 
1643                                 /*
1644                                  *      Here either raw_sk is the last raw socket, or NULL if none 
1645                                  */
1646                                  
1647                                 /*
1648                                  *      We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy 
1649                                  */
1650                         }
1651                 }
1652         
1653                 /*
1654                  *      skb->h.raw now points at the protocol beyond the IP header.
1655                  */
1656         
1657                 hash = iph->protocol & (MAX_INET_PROTOS -1);
1658                 for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next)
1659                 {
1660                         struct sk_buff *skb2;
1661         
1662                         if (ipprot->protocol != iph->protocol)
1663                                 continue;
1664                        /*
1665                         *       See if we need to make a copy of it.  This will
1666                         *       only be set if more than one protocol wants it.
1667                         *       and then not for the last one. If there is a pending
1668                         *       raw delivery wait for that
1669                         */
1670         
1671                         if (ipprot->copy || raw_sk)
1672                         {
1673                                 skb2 = skb_clone(skb, GFP_ATOMIC);
1674                                 if(skb2==NULL)
1675                                         continue;
1676                         }
1677                         else
1678                         {
1679                                 skb2 = skb;
1680                         }
1681                         flag = 1;
1682 
1683                        /*
1684                         *       Pass on the datagram to each protocol that wants it,
1685                         *       based on the datagram protocol.  We should really
1686                         *       check the protocol handler's return values here...
1687                         */
1688 
1689                         ipprot->handler(skb2, dev, NULL, iph->daddr,
1690                                 (ntohs(iph->tot_len) - (iph->ihl * 4)),
1691                                 iph->saddr, 0, ipprot);
1692 
1693                 }
1694 
1695                 /*
1696                  *      All protocols checked.
1697                  *      If this packet was a broadcast, we may *not* reply to it, since that
1698                  *      causes (proven, grin) ARP storms and a leakage of memory (i.e. all
1699                  *      ICMP reply messages get queued up for transmission...)
1700                  */
1701 
1702                 if(raw_sk!=NULL)        /* Shift to last raw user */
1703                         raw_rcv(raw_sk, skb, dev, iph->saddr, iph->daddr);
1704                 else if (!flag)         /* Free and report errors */
1705                 {
1706                         if (brd != IS_BROADCAST && brd!=IS_MULTICAST)
1707                                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0, dev);   
1708                         kfree_skb(skb, FREE_WRITE);
1709                 }
1710 
1711                 return(0);
1712         }
1713 
1714         /*
1715          *      Do any IP forwarding required.
1716          */
1717         
1718         /*
1719          *      Don't forward multicast or broadcast frames.
1720          */
1721 
1722         if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST)
1723         {
1724                 kfree_skb(skb,FREE_WRITE);
1725                 return 0;
1726         }
1727 
1728         /*
1729          *      The packet is for another target. Forward the frame
1730          */
1731 
1732 #ifdef CONFIG_IP_FORWARD
1733         if(ip_forward(skb, dev, is_frag, target_addr, target_strict))
1734                 kfree_skb(skb, FREE_WRITE);
1735 #else
1736 /*      printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n",
1737                         iph->saddr,iph->daddr);*/
1738         ip_statistics.IpInAddrErrors++;
1739         kfree_skb(skb, FREE_WRITE);
1740 #endif
1741         return(0);
1742 }
1743         
1744 
1745 /*
1746  *      Loop a packet back to the sender.
1747  */
1748  
1749 static void ip_loopback(struct device *old_dev, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
1750 {
1751         struct device *dev=&loopback_dev;
1752         int len=ntohs(skb->ip_hdr->tot_len);
1753         struct sk_buff *newskb=dev_alloc_skb(len+dev->hard_header_len+15);
1754         
1755         if(newskb==NULL)
1756                 return;
1757                 
1758         newskb->link3=NULL;
1759         newskb->sk=NULL;
1760         newskb->dev=dev;
1761         newskb->saddr=skb->saddr;
1762         newskb->daddr=skb->daddr;
1763         newskb->raddr=skb->raddr;
1764         newskb->free=1;
1765         newskb->lock=0;
1766         newskb->users=0;
1767         newskb->pkt_type=skb->pkt_type;
1768         
1769         /*
1770          *      Put a MAC header on the packet
1771          */
1772         ip_send(newskb, skb->ip_hdr->daddr, len, dev, skb->ip_hdr->saddr);
1773         /*
1774          *      Add the rest of the data space. 
1775          */
1776         newskb->ip_hdr=(struct iphdr *)skb_put(newskb, len);
1777         /*
1778          *      Copy the data
1779          */
1780         memcpy(newskb->ip_hdr,skb->ip_hdr,len);
1781 
1782         /* Recurse. The device check against IFF_LOOPBACK will stop infinite recursion */
1783                 
1784         /*printk("Loopback output queued [%lX to %lX].\n", newskb->ip_hdr->saddr,newskb->ip_hdr->daddr);*/
1785         ip_queue_xmit(NULL, dev, newskb, 1);
1786 }
1787 
1788 
1789 /*
1790  * Queues a packet to be sent, and starts the transmitter
1791  * if necessary.  if free = 1 then we free the block after
1792  * transmit, otherwise we don't. If free==2 we not only
1793  * free the block but also don't assign a new ip seq number.
1794  * This routine also needs to put in the total length,
1795  * and compute the checksum
1796  */
1797 
1798 void ip_queue_xmit(struct sock *sk, struct device *dev,
     /* [previous][next][first][last][top][bottom][index][help] */
1799               struct sk_buff *skb, int free)
1800 {
1801         struct iphdr *iph;
1802 /*      unsigned char *ptr;*/
1803 
1804         /* Sanity check */
1805         if (dev == NULL)
1806         {
1807                 NETDEBUG(printk("IP: ip_queue_xmit dev = NULL\n"));
1808                 return;
1809         }
1810 
1811         IS_SKB(skb);
1812 
1813         /*
1814          *      Do some book-keeping in the packet for later
1815          */
1816 
1817 
1818         skb->dev = dev;
1819         skb->when = jiffies;
1820 
1821         /*
1822          *      Find the IP header and set the length. This is bad
1823          *      but once we get the skb data handling code in the
1824          *      hardware will push its header sensibly and we will
1825          *      set skb->ip_hdr to avoid this mess and the fixed
1826          *      header length problem
1827          */
1828 
1829 #if 0
1830         ptr = skb->data;
1831         ptr += dev->hard_header_len;
1832         iph = (struct iphdr *)ptr;      
1833         skb->ip_hdr = iph;
1834 #else
1835         iph = skb->ip_hdr;
1836 #endif
1837         iph->tot_len = ntohs(skb->len-(((unsigned char *)iph)-skb->data));
1838 
1839 #ifdef CONFIG_IP_FIREWALL
1840         if(ip_fw_chk(iph, dev, ip_fw_blk_chain, ip_fw_blk_policy, 0) < FW_ACCEPT)
1841                 /* just don't send this packet */
1842                 return;
1843 #endif  
1844 
1845         /*
1846          *      No reassigning numbers to fragments...
1847          */
1848 
1849         if(free!=2)
1850                 iph->id      = htons(ip_id_count++);
1851         else
1852                 free=1;
1853 
1854         /* All buffers without an owner socket get freed */
1855         if (sk == NULL)
1856                 free = 1;
1857 
1858         skb->free = free;
1859 
1860         /*
1861          *      Do we need to fragment. Again this is inefficient.
1862          *      We need to somehow lock the original buffer and use
1863          *      bits of it.
1864          */
1865 
1866         if(ntohs(iph->tot_len)> dev->mtu)
1867         {
1868                 ip_fragment(sk,skb,dev,0);
1869                 IS_SKB(skb);
1870                 kfree_skb(skb,FREE_WRITE);
1871                 return;
1872         }
1873 
1874         /*
1875          *      Add an IP checksum
1876          */
1877 
1878         ip_send_check(iph);
1879 
1880         /*
1881          *      Print the frame when debugging
1882          */
1883 
1884         /*
1885          *      More debugging. You cannot queue a packet already on a list
1886          *      Spot this and moan loudly.
1887          */
1888         if (skb->next != NULL)
1889         {
1890                 NETDEBUG(printk("ip_queue_xmit: next != NULL\n"));
1891                 skb_unlink(skb);
1892         }
1893 
1894         /*
1895          *      If a sender wishes the packet to remain unfreed
1896          *      we add it to his send queue. This arguably belongs
1897          *      in the TCP level since nobody else uses it. BUT
1898          *      remember IPng might change all the rules.
1899          */
1900 
1901         if (!free)
1902         {
1903                 unsigned long flags;
1904                 /* The socket now has more outstanding blocks */
1905 
1906                 sk->packets_out++;
1907 
1908                 /* Protect the list for a moment */
1909                 save_flags(flags);
1910                 cli();
1911 
1912                 if (skb->link3 != NULL)
1913                 {
1914                         NETDEBUG(printk("ip.c: link3 != NULL\n"));
1915                         skb->link3 = NULL;
1916                 }
1917                 if (sk->send_head == NULL)
1918                 {
1919                         sk->send_tail = skb;
1920                         sk->send_head = skb;
1921                 }
1922                 else
1923                 {
1924                         sk->send_tail->link3 = skb;
1925                         sk->send_tail = skb;
1926                 }
1927                 /* skb->link3 is NULL */
1928 
1929                 /* Interrupt restore */
1930                 restore_flags(flags);
1931         }
1932         else
1933                 /* Remember who owns the buffer */
1934                 skb->sk = sk;
1935 
1936         /*
1937          *      If the indicated interface is up and running, send the packet.
1938          */
1939          
1940         ip_statistics.IpOutRequests++;
1941 #ifdef CONFIG_IP_ACCT
1942         ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
1943 #endif  
1944         
1945 #ifdef CONFIG_IP_MULTICAST      
1946 
1947         /*
1948          *      Multicasts are looped back for other local users
1949          */
1950          
1951         if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK))
1952         {
1953                 if(sk==NULL || sk->ip_mc_loop)
1954                 {
1955                         if(iph->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
1956                         {
1957                                 ip_loopback(dev,skb);
1958                         }
1959                         else
1960                         {
1961                                 struct ip_mc_list *imc=dev->ip_mc_list;
1962                                 while(imc!=NULL)
1963                                 {
1964                                         if(imc->multiaddr==iph->daddr)
1965                                         {
1966                                                 ip_loopback(dev,skb);
1967                                                 break;
1968                                         }
1969                                         imc=imc->next;
1970                                 }
1971                         }
1972                 }
1973                 /* Multicasts with ttl 0 must not go beyond the host */
1974                 
1975                 if(skb->ip_hdr->ttl==0)
1976                 {
1977                         kfree_skb(skb, FREE_READ);
1978                         return;
1979                 }
1980         }
1981 #endif
1982         if((dev->flags&IFF_BROADCAST) && (iph->daddr==dev->pa_brdaddr||iph->daddr==0xFFFFFFFF) && !(dev->flags&IFF_LOOPBACK))
1983                 ip_loopback(dev,skb);
1984                 
1985         if (dev->flags & IFF_UP)
1986         {
1987                 /*
1988                  *      If we have an owner use its priority setting,
1989                  *      otherwise use NORMAL
1990                  */
1991 
1992                 if (sk != NULL)
1993                 {
1994                         dev_queue_xmit(skb, dev, sk->priority);
1995                 }
1996                 else
1997                 {
1998                         dev_queue_xmit(skb, dev, SOPRI_NORMAL);
1999                 }
2000         }
2001         else
2002         {
2003                 if(sk)
2004                         sk->err = ENETDOWN;
2005                 ip_statistics.IpOutDiscards++;
2006                 if (free)
2007                         kfree_skb(skb, FREE_WRITE);
2008         }
2009 }
2010 
2011 
2012 
2013 #ifdef CONFIG_IP_MULTICAST
2014 
2015 /*
2016  *      Write an multicast group list table for the IGMP daemon to
2017  *      read.
2018  */
2019  
2020 int ip_mc_procinfo(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
2021 {
2022         off_t pos=0, begin=0;
2023         struct ip_mc_list *im;
2024         unsigned long flags;
2025         int len=0;
2026         struct device *dev;
2027         
2028         len=sprintf(buffer,"Device    : Count\tGroup    Users Timer\n");  
2029         save_flags(flags);
2030         cli();
2031         
2032         for(dev = dev_base; dev; dev = dev->next)
2033         {
2034                 if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST))
2035                 {
2036                         len+=sprintf(buffer+len,"%-10s: %5d\n",
2037                                         dev->name, dev->mc_count);
2038                         for(im = dev->ip_mc_list; im; im = im->next)
2039                         {
2040                                 len+=sprintf(buffer+len,
2041                                         "\t\t\t%08lX %5d %d:%08lX\n",
2042                                         im->multiaddr, im->users,
2043                                         im->tm_running, im->timer.expires-jiffies);
2044                                 pos=begin+len;
2045                                 if(pos<offset)
2046                                 {
2047                                         len=0;
2048                                         begin=pos;
2049                                 }
2050                                 if(pos>offset+length)
2051                                         break;
2052                         }
2053                 }
2054         }
2055         restore_flags(flags);
2056         *start=buffer+(offset-begin);
2057         len-=(offset-begin);
2058         if(len>length)
2059                 len=length;     
2060         return len;
2061 }
2062 
2063 
2064 /*
2065  *      Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
2066  *      an IP socket.
2067  *
2068  *      We implement IP_TOS (type of service), IP_TTL (time to live).
2069  *
2070  *      Next release we will sort out IP_OPTIONS since for some people are kind of important.
2071  */
2072 
2073 static struct device *ip_mc_find_devfor(unsigned long addr)
     /* [previous][next][first][last][top][bottom][index][help] */
2074 {
2075         struct device *dev;
2076         for(dev = dev_base; dev; dev = dev->next)
2077         {
2078                 if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST)&&
2079                         (dev->pa_addr==addr))
2080                         return dev;
2081         }
2082 
2083         return NULL;
2084 }
2085 
2086 #endif
2087 
2088 int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
2089 {
2090         int val,err;
2091         unsigned char ucval;
2092 #if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT)
2093         struct ip_fw tmp_fw;
2094 #endif  
2095         if (optval == NULL)
2096         {
2097                 val=0;
2098                 ucval=0;
2099         }
2100         else
2101         {
2102                 err=verify_area(VERIFY_READ, optval, sizeof(int));
2103                 if(err)
2104                         return err;
2105                 val = get_user((int *) optval);
2106                 ucval=get_user((unsigned char *) optval);
2107         }
2108         
2109         if(level!=SOL_IP)
2110                 return -EOPNOTSUPP;
2111 #ifdef CONFIG_IP_MROUTE
2112         if(optname>=MRT_BASE && optname <=MRT_BASE+10)
2113         {
2114                 return ip_mroute_setsockopt(sk,optname,optval,optlen);
2115         }
2116 #endif
2117         
2118         switch(optname)
2119         {
2120                 case IP_TOS:
2121                         if(val<0||val>255)
2122                                 return -EINVAL;
2123                         sk->ip_tos=val;
2124                         if(val==IPTOS_LOWDELAY)
2125                                 sk->priority=SOPRI_INTERACTIVE;
2126                         if(val==IPTOS_THROUGHPUT)
2127                                 sk->priority=SOPRI_BACKGROUND;
2128                         return 0;
2129                 case IP_TTL:
2130                         if(val<1||val>255)
2131                                 return -EINVAL;
2132                         sk->ip_ttl=val;
2133                         return 0;
2134                 case IP_HDRINCL:
2135                         if(sk->type!=SOCK_RAW)
2136                                 return -ENOPROTOOPT;
2137                         sk->ip_hdrincl=val?1:0;
2138                         return 0;
2139 #ifdef CONFIG_IP_MULTICAST
2140                 case IP_MULTICAST_TTL: 
2141                 {
2142                         sk->ip_mc_ttl=(int)ucval;
2143                         return 0;
2144                 }
2145                 case IP_MULTICAST_LOOP: 
2146                 {
2147                         if(ucval!=0 && ucval!=1)
2148                                  return -EINVAL;
2149                         sk->ip_mc_loop=(int)ucval;
2150                         return 0;
2151                 }
2152                 case IP_MULTICAST_IF: 
2153                 {
2154                         struct in_addr addr;
2155                         struct device *dev=NULL;
2156                         
2157                         /*
2158                          *      Check the arguments are allowable
2159                          */
2160 
2161                         err=verify_area(VERIFY_READ, optval, sizeof(addr));
2162                         if(err)
2163                                 return err;
2164                                 
2165                         memcpy_fromfs(&addr,optval,sizeof(addr));
2166                         
2167                         
2168                         /*
2169                          *      What address has been requested
2170                          */
2171                         
2172                         if(addr.s_addr==INADDR_ANY)     /* Default */
2173                         {
2174                                 sk->ip_mc_name[0]=0;
2175                                 return 0;
2176                         }
2177                         
2178                         /*
2179                          *      Find the device
2180                          */
2181                          
2182                         dev=ip_mc_find_devfor(addr.s_addr);
2183                                                 
2184                         /*
2185                          *      Did we find one
2186                          */
2187                          
2188                         if(dev) 
2189                         {
2190                                 strcpy(sk->ip_mc_name,dev->name);
2191                                 return 0;
2192                         }
2193                         return -EADDRNOTAVAIL;
2194                 }
2195                 
2196                 case IP_ADD_MEMBERSHIP: 
2197                 {
2198                 
2199 /*
2200  *      FIXME: Add/Del membership should have a semaphore protecting them from re-entry
2201  */
2202                         struct ip_mreq mreq;
2203                         __u32 route_src;
2204                         struct rtable *rt;
2205                         struct device *dev=NULL;
2206                         
2207                         /*
2208                          *      Check the arguments.
2209                          */
2210 
2211                         err=verify_area(VERIFY_READ, optval, sizeof(mreq));
2212                         if(err)
2213                                 return err;
2214 
2215                         memcpy_fromfs(&mreq,optval,sizeof(mreq));
2216 
2217                         /* 
2218                          *      Get device for use later
2219                          */
2220 
2221                         if(mreq.imr_interface.s_addr==INADDR_ANY) 
2222                         {
2223                                 /*
2224                                  *      Not set so scan.
2225                                  */
2226                                 if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,NULL, &route_src))!=NULL)
2227                                 {
2228                                         dev=rt->rt_dev;
2229                                         rt->rt_use--;
2230                                 }
2231                         }
2232                         else
2233                         {
2234                                 /*
2235                                  *      Find a suitable device.
2236                                  */
2237                                 
2238                                 dev=ip_mc_find_devfor(mreq.imr_interface.s_addr);
2239                         }
2240                         
2241                         /*
2242                          *      No device, no cookies.
2243                          */
2244                          
2245                         if(!dev)
2246                                 return -ENODEV;
2247                                 
2248                         /*
2249                          *      Join group.
2250                          */
2251                          
2252                         return ip_mc_join_group(sk,dev,mreq.imr_multiaddr.s_addr);
2253                 }
2254                 
2255                 case IP_DROP_MEMBERSHIP: 
2256                 {
2257                         struct ip_mreq mreq;
2258                         struct rtable *rt;
2259                         __u32 route_src;
2260                         struct device *dev=NULL;
2261 
2262                         /*
2263                          *      Check the arguments
2264                          */
2265                          
2266                         err=verify_area(VERIFY_READ, optval, sizeof(mreq));
2267                         if(err)
2268                                 return err;
2269 
2270                         memcpy_fromfs(&mreq,optval,sizeof(mreq));
2271 
2272                         /*
2273                          *      Get device for use later 
2274                          */
2275  
2276                         if(mreq.imr_interface.s_addr==INADDR_ANY) 
2277                         {
2278                                 if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,NULL, &route_src))!=NULL)
2279                                 {
2280                                         dev=rt->rt_dev;
2281                                         rt->rt_use--;
2282                                 }
2283                         }
2284                         else 
2285                         {
2286                         
2287                                 dev=ip_mc_find_devfor(mreq.imr_interface.s_addr);
2288                         }
2289                         
2290                         /*
2291                          *      Did we find a suitable device.
2292                          */
2293                          
2294                         if(!dev)
2295                                 return -ENODEV;
2296                                 
2297                         /*
2298                          *      Leave group
2299                          */
2300                          
2301                         return ip_mc_leave_group(sk,dev,mreq.imr_multiaddr.s_addr);
2302                 }
2303 #endif                  
2304 #ifdef CONFIG_IP_FIREWALL
2305                 case IP_FW_ADD_BLK:
2306                 case IP_FW_DEL_BLK:
2307                 case IP_FW_ADD_FWD:
2308                 case IP_FW_DEL_FWD:
2309                 case IP_FW_CHK_BLK:
2310                 case IP_FW_CHK_FWD:
2311                 case IP_FW_FLUSH_BLK:
2312                 case IP_FW_FLUSH_FWD:
2313                 case IP_FW_ZERO_BLK:
2314                 case IP_FW_ZERO_FWD:
2315                 case IP_FW_POLICY_BLK:
2316                 case IP_FW_POLICY_FWD:
2317                         if(!suser())
2318                                 return -EPERM;
2319                         if(optlen>sizeof(tmp_fw) || optlen<1)
2320                                 return -EINVAL;
2321                         err=verify_area(VERIFY_READ,optval,optlen);
2322                         if(err)
2323                                 return err;
2324                         memcpy_fromfs(&tmp_fw,optval,optlen);
2325                         err=ip_fw_ctl(optname, &tmp_fw,optlen);
2326                         return -err;    /* -0 is 0 after all */
2327                         
2328 #endif
2329 #ifdef CONFIG_IP_ACCT
2330                 case IP_ACCT_DEL:
2331                 case IP_ACCT_ADD:
2332                 case IP_ACCT_FLUSH:
2333                 case IP_ACCT_ZERO:
2334                         if(!suser())
2335                                 return -EPERM;
2336                         if(optlen>sizeof(tmp_fw) || optlen<1)
2337                                 return -EINVAL;
2338                         err=verify_area(VERIFY_READ,optval,optlen);
2339                         if(err)
2340                                 return err;
2341                         memcpy_fromfs(&tmp_fw, optval,optlen);
2342                         err=ip_acct_ctl(optname, &tmp_fw,optlen);
2343                         return -err;    /* -0 is 0 after all */
2344 #endif
2345                 /* IP_OPTIONS and friends go here eventually */
2346                 default:
2347                         return(-ENOPROTOOPT);
2348         }
2349 }
2350 
2351 /*
2352  *      Get the options. Note for future reference. The GET of IP options gets the
2353  *      _received_ ones. The set sets the _sent_ ones.
2354  */
2355 
2356 int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
2357 {
2358         int val,err;
2359 #ifdef CONFIG_IP_MULTICAST
2360         int len;
2361 #endif
2362         
2363         if(level!=SOL_IP)
2364                 return -EOPNOTSUPP;
2365 
2366 #ifdef CONFIG_IP_MROUTE
2367         if(optname>=MRT_BASE && optname <=MRT_BASE+10)
2368         {
2369                 return ip_mroute_getsockopt(sk,optname,optval,optlen);
2370         }
2371 #endif
2372 
2373         switch(optname)
2374         {
2375                 case IP_TOS:
2376                         val=sk->ip_tos;
2377                         break;
2378                 case IP_TTL:
2379                         val=sk->ip_ttl;
2380                         break;
2381                 case IP_HDRINCL:
2382                         val=sk->ip_hdrincl;
2383                         break;
2384 #ifdef CONFIG_IP_MULTICAST                      
2385                 case IP_MULTICAST_TTL:
2386                         val=sk->ip_mc_ttl;
2387                         break;
2388                 case IP_MULTICAST_LOOP:
2389                         val=sk->ip_mc_loop;
2390                         break;
2391                 case IP_MULTICAST_IF:
2392                         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
2393                         if(err)
2394                                 return err;
2395                         len=strlen(sk->ip_mc_name);
2396                         err=verify_area(VERIFY_WRITE, optval, len);
2397                         if(err)
2398                                 return err;
2399                         put_user(len,(int *) optlen);
2400                         memcpy_tofs((void *)optval,sk->ip_mc_name, len);
2401                         return 0;
2402 #endif
2403                 default:
2404                         return(-ENOPROTOOPT);
2405         }
2406         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
2407         if(err)
2408                 return err;
2409         put_user(sizeof(int),(int *) optlen);
2410 
2411         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
2412         if(err)
2413                 return err;
2414         put_user(val,(int *) optval);
2415 
2416         return(0);
2417 }
2418 
2419 /*
2420  *      Build and send a packet, with as little as one copy
2421  *
2422  *      Doesn't care much about ip options... option length can be
2423  *      different for fragment at 0 and other fragments.
2424  *
2425  *      Note that the fragment at the highest offset is sent first,
2426  *      so the getfrag routine can fill in the TCP/UDP checksum header
2427  *      field in the last fragment it sends... actually it also helps
2428  *      the reassemblers, they can put most packets in at the head of
2429  *      the fragment queue, and they know the total size in advance. This
2430  *      last feature will measurable improve the Linux fragment handler.
2431  *
2432  *      The callback has five args, an arbitrary pointer (copy of frag),
2433  *      the source IP address (may depend on the routing table), the 
2434  *      destination adddress (char *), the offset to copy from, and the
2435  *      length to be copied.
2436  * 
2437  */
2438 
2439 int ip_build_xmit(struct sock *sk,
     /* [previous][next][first][last][top][bottom][index][help] */
2440                    void getfrag (const void *,
2441                                  __u32,
2442                                  char *,
2443                                  unsigned int,  
2444                                  unsigned int),
2445                    const void *frag,
2446                    unsigned short int length,
2447                    __u32 daddr,
2448                    int flags,
2449                    int type) 
2450 {
2451         struct rtable *rt;
2452         unsigned int fraglen, maxfraglen, fragheaderlen;
2453         int offset, mf;
2454         __u32 saddr;
2455         unsigned short id;
2456         struct iphdr *iph;
2457         int local=0;
2458         struct device *dev;
2459         int nfrags=0;
2460         
2461         ip_statistics.IpOutRequests++;
2462 
2463 #ifdef CONFIG_IP_MULTICAST      
2464         if(sk && MULTICAST(daddr) && *sk->ip_mc_name)
2465         {
2466                 dev=dev_get(sk->ip_mc_name);
2467                 if(!dev)
2468                         return -ENODEV;
2469                 rt=NULL;
2470                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
2471                         saddr = sk->saddr;
2472                 else
2473                         saddr = dev->pa_addr;
2474         }
2475         else
2476         {
2477 #endif  
2478                 /*
2479                  *      Perform the IP routing decisions
2480                  */
2481          
2482                 if(sk->localroute || flags&MSG_DONTROUTE)
2483                         local=1;
2484         
2485                 rt = sk->ip_route_cache;
2486                 
2487                 /*
2488                  *      See if the routing cache is outdated. We need to clean this up once we are happy it is reliable
2489                  *      by doing the invalidation actively in the route change and header change.
2490                  */
2491         
2492                 saddr=sk->ip_route_saddr;        
2493                 if(!rt || sk->ip_route_stamp != rt_stamp || daddr!=sk->ip_route_daddr || sk->ip_route_local!=local || sk->saddr!=sk->ip_route_saddr)
2494                 {
2495                         if(local)
2496                                 rt = ip_rt_local(daddr, NULL, &saddr);
2497                         else
2498                                 rt = ip_rt_route(daddr, NULL, &saddr);
2499                         sk->ip_route_local=local;
2500                         sk->ip_route_daddr=daddr;
2501                         sk->ip_route_saddr=saddr;
2502                         sk->ip_route_stamp=rt_stamp;
2503                         sk->ip_route_cache=rt;
2504                         sk->ip_hcache_ver=NULL;
2505                         sk->ip_hcache_state= 0;
2506                 }
2507                 else if(rt)
2508                 {
2509                         /*
2510                          *      Attempt header caches only if the cached route is being reused. Header cache
2511                          *      is not ultra cheap to set up. This means we only set it up on the second packet,
2512                          *      so one shot communications are not slowed. We assume (seems reasonable) that 2 is
2513                          *      probably going to be a stream of data.
2514                          */
2515                         if(rt->rt_dev->header_cache && sk->ip_hcache_state!= -1)
2516                         {
2517                                 if(sk->ip_hcache_ver==NULL || sk->ip_hcache_stamp!=*sk->ip_hcache_ver)
2518                                         rt->rt_dev->header_cache(rt->rt_dev,sk,saddr,daddr);
2519                                 else
2520                                         /* Can't cache. Remember this */
2521                                         sk->ip_hcache_state= -1;
2522                         }
2523                 }
2524                 
2525                 if (rt == NULL) 
2526                 {
2527                         ip_statistics.IpOutNoRoutes++;
2528                         return(-ENETUNREACH);
2529                 }
2530         
2531                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
2532                         saddr = sk->saddr;
2533                         
2534                 dev=rt->rt_dev;
2535 #ifdef CONFIG_IP_MULTICAST
2536         }
2537 #endif          
2538 
2539         /*
2540          *      Now compute the buffer space we require
2541          */ 
2542          
2543         /*
2544          *      Try the simple case first. This leaves broadcast, multicast, fragmented frames, and by
2545          *      choice RAW frames within 20 bytes of maximum size(rare) to the long path
2546          */
2547          
2548         if(length+20 <= dev->mtu && !MULTICAST(daddr) && daddr!=0xFFFFFFFF && daddr!=dev->pa_brdaddr)
2549         {       
2550                 int error;
2551                 struct sk_buff *skb=sock_alloc_send_skb(sk, length+20+15+dev->hard_header_len,0, 0,&error);
2552                 if(skb==NULL)
2553                 {
2554                         ip_statistics.IpOutDiscards++;
2555                         return error;
2556                 }
2557                 skb->dev=dev;
2558                 skb->free=1;
2559                 skb->when=jiffies;
2560                 skb->sk=sk;
2561                 skb->arp=0;
2562                 skb->saddr=saddr;
2563                 length+=20;     /* We do this twice so the subtract once is quicker */
2564                 skb->raddr=(rt&&rt->rt_gateway)?rt->rt_gateway:daddr;
2565                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
2566                 if(sk->ip_hcache_state>0)
2567                 {
2568                         memcpy(skb_push(skb,dev->hard_header_len),sk->ip_hcache_data,dev->hard_header_len);
2569                         skb->arp=1;
2570                 }
2571                 else if(dev->hard_header)
2572                 {
2573                         if(dev->hard_header(skb,dev,ETH_P_IP,NULL,NULL,0)>0)
2574                                 skb->arp=1;
2575                 }
2576                 else
2577                         skb->arp=1;
2578                 skb->ip_hdr=iph=(struct iphdr *)skb_put(skb,length);
2579                 dev_lock_list();
2580                 if(!sk->ip_hdrincl)
2581                 {
2582                         iph->version=4;
2583                         iph->ihl=5;
2584                         iph->tos=sk->ip_tos;
2585                         iph->tot_len = htons(length);
2586                         iph->id=htons(ip_id_count++);
2587                         iph->frag_off = 0;
2588                         iph->ttl=sk->ip_ttl;
2589                         iph->protocol=type;
2590                         iph->saddr=saddr;
2591                         iph->daddr=daddr;
2592                         iph->check=0;
2593                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
2594                         getfrag(frag,saddr,(void *)(iph+1),0, length-20);
2595                 }
2596                 else
2597                         getfrag(frag,saddr,(void *)iph,0,length-20);
2598                 dev_unlock_list();
2599 #ifdef CONFIG_IP_FIREWALL
2600                 if(ip_fw_chk(iph, dev, ip_fw_blk_chain, ip_fw_blk_policy,0) < FW_ACCEPT)
2601                 {
2602                         kfree_skb(skb, FREE_WRITE);
2603                         return -EPERM;
2604                 }
2605 #endif
2606 #ifdef CONFIG_IP_ACCT
2607                 ip_fw_chk((void *)skb->data,dev,ip_acct_chain, IP_FW_F_ACCEPT,1);
2608 #endif          
2609                 if(dev->flags&IFF_UP)
2610                         dev_queue_xmit(skb,dev,sk->priority);
2611                 else
2612                 {
2613                         ip_statistics.IpOutDiscards++;
2614                         kfree_skb(skb, FREE_WRITE);
2615                 }
2616                 return 0;
2617         }
2618                         
2619                         
2620         fragheaderlen = dev->hard_header_len;
2621         if(!sk->ip_hdrincl)
2622                 fragheaderlen += 20;
2623                 
2624         /*
2625          *      Fragheaderlen is the size of 'overhead' on each buffer. Now work
2626          *      out the size of the frames to send.
2627          */
2628          
2629         maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen;
2630         
2631         /*
2632          *      Start at the end of the frame by handling the remainder.
2633          */
2634          
2635         offset = length - (length % (maxfraglen - fragheaderlen));
2636         
2637         /*
2638          *      Amount of memory to allocate for final fragment.
2639          */
2640          
2641         fraglen = length - offset + fragheaderlen;
2642         
2643         if(length-offset==0)
2644         {
2645                 fraglen = maxfraglen;
2646                 offset -= maxfraglen-fragheaderlen;
2647         }
2648         
2649         
2650         /*
2651          *      The last fragment will not have MF (more fragments) set.
2652          */
2653          
2654         mf = 0;
2655 
2656         /*
2657          *      Can't fragment raw packets 
2658          */
2659          
2660         if (sk->ip_hdrincl && offset > 0)
2661                 return(-EMSGSIZE);
2662 
2663         /*
2664          *      Lock the device lists.
2665          */
2666 
2667         dev_lock_list();
2668         
2669         /*
2670          *      Get an identifier
2671          */
2672          
2673         id = htons(ip_id_count++);
2674 
2675         /*
2676          *      Being outputting the bytes.
2677          */
2678          
2679         do 
2680         {
2681                 struct sk_buff * skb;
2682                 int error;
2683                 char *data;
2684 
2685                 /*
2686                  *      Get the memory we require with some space left for alignment.
2687                  */
2688 
2689                 skb = sock_alloc_send_skb(sk, fraglen+15, 0, 0, &error);
2690                 if (skb == NULL)
2691                 {
2692                         ip_statistics.IpOutDiscards++;
2693                         if(nfrags>1)
2694                                 ip_statistics.IpFragCreates++;                  
2695                         dev_unlock_list();
2696                         return(error);
2697                 }
2698                 
2699                 /*
2700                  *      Fill in the control structures
2701                  */
2702                  
2703                 skb->next = skb->prev = NULL;
2704                 skb->dev = dev;
2705                 skb->when = jiffies;
2706                 skb->free = 1; /* dubious, this one */
2707                 skb->sk = sk;
2708                 skb->arp = 0;
2709                 skb->saddr = saddr;
2710                 skb->raddr = (rt&&rt->rt_gateway) ? rt->rt_gateway : daddr;
2711                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
2712                 data = skb_put(skb, fraglen-dev->hard_header_len);
2713 
2714                 /*
2715                  *      Save us ARP and stuff. In the optimal case we do no route lookup (route cache ok)
2716                  *      no ARP lookup (arp cache ok) and output. The cache checks are still too slow but
2717                  *      this can be fixed later. For gateway routes we ought to have a rt->.. header cache
2718                  *      pointer to speed header cache builds for identical targets.
2719                  */
2720                  
2721                 if(sk->ip_hcache_state>0)
2722                 {
2723                         memcpy(skb_push(skb,dev->hard_header_len),sk->ip_hcache_data, dev->hard_header_len);
2724                         skb->arp=1;
2725                 }
2726                 else if (dev->hard_header)
2727                 {
2728                         if(dev->hard_header(skb, dev, ETH_P_IP, 
2729                                                 NULL, NULL, 0)>0)
2730                                 skb->arp=1;
2731                 }
2732                 
2733                 /*
2734                  *      Find where to start putting bytes.
2735                  */
2736                  
2737                 skb->ip_hdr = iph = (struct iphdr *)data;
2738 
2739                 /*
2740                  *      Only write IP header onto non-raw packets 
2741                  */
2742                  
2743                 if(!sk->ip_hdrincl) 
2744                 {
2745 
2746                         iph->version = 4;
2747                         iph->ihl = 5; /* ugh */
2748                         iph->tos = sk->ip_tos;
2749                         iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
2750                         iph->id = id;
2751                         iph->frag_off = htons(offset>>3);
2752                         iph->frag_off |= mf;
2753 #ifdef CONFIG_IP_MULTICAST
2754                         if (MULTICAST(daddr))
2755                                 iph->ttl = sk->ip_mc_ttl;
2756                         else
2757 #endif
2758                                 iph->ttl = sk->ip_ttl;
2759                         iph->protocol = type;
2760                         iph->check = 0;
2761                         iph->saddr = saddr;
2762                         iph->daddr = daddr;
2763                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
2764                         data += iph->ihl*4;
2765                         
2766                         /*
2767                          *      Any further fragments will have MF set.
2768                          */
2769                          
2770                         mf = htons(IP_MF);
2771                 }
2772                 
2773                 /*
2774                  *      User data callback
2775                  */
2776 
2777                 getfrag(frag, saddr, data, offset, fraglen-fragheaderlen);
2778                 
2779                 /*
2780                  *      Account for the fragment.
2781                  */
2782                  
2783 #ifdef CONFIG_IP_FIREWALL
2784                 if(!offset && ip_fw_chk(iph, dev, ip_fw_blk_chain, ip_fw_blk_policy,0) < FW_ACCEPT)
2785                 {
2786                         kfree_skb(skb, FREE_WRITE);
2787                         dev_unlock_list();
2788                         return -EPERM;
2789                 }
2790 #endif          
2791 #ifdef CONFIG_IP_ACCT
2792                 if(!offset)
2793                         ip_fw_chk(iph, dev, ip_acct_chain, IP_FW_F_ACCEPT, 1);
2794 #endif  
2795                 offset -= (maxfraglen-fragheaderlen);
2796                 fraglen = maxfraglen;
2797 
2798 #ifdef CONFIG_IP_MULTICAST
2799 
2800                 /*
2801                  *      Multicasts are looped back for other local users
2802                  */
2803          
2804                 if (MULTICAST(daddr) && !(dev->flags&IFF_LOOPBACK)) 
2805                 {
2806                         /*
2807                          *      Loop back any frames. The check for IGMP_ALL_HOSTS is because
2808                          *      you are always magically a member of this group.
2809                          *
2810                          *      Always loop back all host messages when running as a multicast router.
2811                          */
2812                          
2813                         if(sk==NULL || sk->ip_mc_loop)
2814                         {
2815                                 if(skb->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
2816                                         ip_loopback(rt?rt->rt_dev:dev,skb);
2817                                 else 
2818                                 {
2819                                         struct ip_mc_list *imc=rt?rt->rt_dev->ip_mc_list:dev->ip_mc_list;
2820                                         while(imc!=NULL) 
2821                                         {
2822                                                 if(imc->multiaddr==daddr) 
2823                                                 {
2824                                                         ip_loopback(rt?rt->rt_dev:dev,skb);
2825                                                         break;
2826                                                 }
2827                                                 imc=imc->next;
2828                                         }
2829                                 }
2830                         }
2831 
2832                         /*
2833                          *      Multicasts with ttl 0 must not go beyond the host. Fixme: avoid the
2834                          *      extra clone.
2835                          */
2836 
2837                         if(skb->ip_hdr->ttl==0)
2838                                 kfree_skb(skb, FREE_READ);
2839                 }
2840 #endif
2841 
2842                 nfrags++;
2843                 
2844                 /*
2845                  *      BSD loops broadcasts
2846                  */
2847                  
2848                 if((dev->flags&IFF_BROADCAST) && (daddr==0xFFFFFFFF || daddr==dev->pa_brdaddr) && !(dev->flags&IFF_LOOPBACK))
2849                         ip_loopback(dev,skb);
2850 
2851                 /*
2852                  *      Now queue the bytes into the device.
2853                  */
2854                  
2855                 if (dev->flags & IFF_UP) 
2856                 {
2857                         dev_queue_xmit(skb, dev, sk->priority);
2858                 } 
2859                 else 
2860                 {
2861                         /*
2862                          *      Whoops... 
2863                          */
2864                          
2865                         ip_statistics.IpOutDiscards++;
2866                         if(nfrags>1)
2867                                 ip_statistics.IpFragCreates+=nfrags;
2868                         kfree_skb(skb, FREE_WRITE);
2869                         dev_unlock_list();
2870                         /*
2871                          *      BSD behaviour.
2872                          */
2873                         if(sk!=NULL)
2874                                 sk->err=ENETDOWN;
2875                         return(0); /* lose rest of fragments */
2876                 }
2877         } 
2878         while (offset >= 0);
2879         if(nfrags>1)
2880                 ip_statistics.IpFragCreates+=nfrags;
2881         dev_unlock_list();
2882         return(0);
2883 }
2884     
2885 
2886 /*
2887  *      IP protocol layer initialiser
2888  */
2889 
2890 static struct packet_type ip_packet_type =
2891 {
2892         0,      /* MUTTER ntohs(ETH_P_IP),*/
2893         NULL,   /* All devices */
2894         ip_rcv,
2895         NULL,
2896         NULL,
2897 };
2898 
2899 /*
2900  *      Device notifier
2901  */
2902  
2903 static int ip_rt_event(unsigned long event, void *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
2904 {
2905         if(event==NETDEV_DOWN)
2906                 ip_rt_flush(ptr);
2907         return NOTIFY_DONE;
2908 }
2909 
2910 struct notifier_block ip_rt_notifier={
2911         ip_rt_event,
2912         NULL,
2913         0
2914 };
2915 
2916 /*
2917  *      IP registers the packet type and then calls the subprotocol initialisers
2918  */
2919 
2920 void ip_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
2921 {
2922         ip_packet_type.type=htons(ETH_P_IP);
2923         dev_add_pack(&ip_packet_type);
2924 
2925         /* So we flush routes when a device is downed */        
2926         register_netdevice_notifier(&ip_rt_notifier);
2927 
2928 /*      ip_raw_init();
2929         ip_packet_init();
2930         ip_tcp_init();
2931         ip_udp_init();*/
2932 
2933 #ifdef CONFIG_IP_MULTICAST
2934         proc_net_register(&(struct proc_dir_entry) {
2935                 PROC_NET_IGMP, 4, "igmp",
2936                 S_IFREG | S_IRUGO, 1, 0, 0,
2937                 0, &proc_net_inode_operations,
2938                 ip_mc_procinfo
2939         });
2940 #endif
2941 }
2942 

/* [previous][next][first][last][top][bottom][index][help] */