root/net/ipv4/ip.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip_options_build
  2. ip_options_echo
  3. ip_options_fragment
  4. ip_options_compile
  5. ip_ioctl
  6. ip_send
  7. ip_send_room
  8. ip_build_header
  9. ip_send_check
  10. ip_frag_create
  11. ip_find
  12. ip_free
  13. ip_expire
  14. ip_create
  15. ip_done
  16. ip_glue
  17. ip_defrag
  18. ip_fragment
  19. ip_encap
  20. ip_forward
  21. ip_rcv
  22. ip_loopback
  23. ip_queue_xmit
  24. ip_mc_procinfo
  25. ip_mc_find_devfor
  26. ip_setsockopt
  27. ip_getsockopt
  28. ip_build_xmit
  29. ip_netlink_msg
  30. ip_rt_event
  31. ip_init

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              The Internet Protocol (IP) module.
   7  *
   8  * Version:     @(#)ip.c        1.0.16b 9/1/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Donald Becker, <becker@super.org>
  13  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  14  *              Richard Underwood
  15  *              Stefan Becker, <stefanb@yello.ping.de>
  16  *              Jorge Cwik, <jorge@laser.satlink.net>
  17  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  18  *              
  19  *
  20  * Fixes:
  21  *              Alan Cox        :       Commented a couple of minor bits of surplus code
  22  *              Alan Cox        :       Undefining IP_FORWARD doesn't include the code
  23  *                                      (just stops a compiler warning).
  24  *              Alan Cox        :       Frames with >=MAX_ROUTE record routes, strict routes or loose routes
  25  *                                      are junked rather than corrupting things.
  26  *              Alan Cox        :       Frames to bad broadcast subnets are dumped
  27  *                                      We used to process them non broadcast and
  28  *                                      boy could that cause havoc.
  29  *              Alan Cox        :       ip_forward sets the free flag on the
  30  *                                      new frame it queues. Still crap because
  31  *                                      it copies the frame but at least it
  32  *                                      doesn't eat memory too.
  33  *              Alan Cox        :       Generic queue code and memory fixes.
  34  *              Fred Van Kempen :       IP fragment support (borrowed from NET2E)
  35  *              Gerhard Koerting:       Forward fragmented frames correctly.
  36  *              Gerhard Koerting:       Fixes to my fix of the above 8-).
  37  *              Gerhard Koerting:       IP interface addressing fix.
  38  *              Linus Torvalds  :       More robustness checks
  39  *              Alan Cox        :       Even more checks: Still not as robust as it ought to be
  40  *              Alan Cox        :       Save IP header pointer for later
  41  *              Alan Cox        :       ip option setting
  42  *              Alan Cox        :       Use ip_tos/ip_ttl settings
  43  *              Alan Cox        :       Fragmentation bogosity removed
  44  *                                      (Thanks to Mark.Bush@prg.ox.ac.uk)
  45  *              Dmitry Gorodchanin :    Send of a raw packet crash fix.
  46  *              Alan Cox        :       Silly ip bug when an overlength
  47  *                                      fragment turns up. Now frees the
  48  *                                      queue.
  49  *              Linus Torvalds/ :       Memory leakage on fragmentation
  50  *              Alan Cox        :       handling.
  51  *              Gerhard Koerting:       Forwarding uses IP priority hints
  52  *              Teemu Rantanen  :       Fragment problems.
  53  *              Alan Cox        :       General cleanup, comments and reformat
  54  *              Alan Cox        :       SNMP statistics
  55  *              Alan Cox        :       BSD address rule semantics. Also see
  56  *                                      UDP as there is a nasty checksum issue
  57  *                                      if you do things the wrong way.
  58  *              Alan Cox        :       Always defrag, moved IP_FORWARD to the config.in file
  59  *              Alan Cox        :       IP options adjust sk->priority.
  60  *              Pedro Roque     :       Fix mtu/length error in ip_forward.
  61  *              Alan Cox        :       Avoid ip_chk_addr when possible.
  62  *      Richard Underwood       :       IP multicasting.
  63  *              Alan Cox        :       Cleaned up multicast handlers.
  64  *              Alan Cox        :       RAW sockets demultiplex in the BSD style.
  65  *              Gunther Mayer   :       Fix the SNMP reporting typo
  66  *              Alan Cox        :       Always in group 224.0.0.1
  67  *      Pauline Middelink       :       Fast ip_checksum update when forwarding
  68  *                                      Masquerading support.
  69  *              Alan Cox        :       Multicast loopback error for 224.0.0.1
  70  *              Alan Cox        :       IP_MULTICAST_LOOP option.
  71  *              Alan Cox        :       Use notifiers.
  72  *              Bjorn Ekwall    :       Removed ip_csum (from slhc.c too)
  73  *              Bjorn Ekwall    :       Moved ip_fast_csum to ip.h (inline!)
  74  *              Stefan Becker   :       Send out ICMP HOST REDIRECT
  75  *      Arnt Gulbrandsen        :       ip_build_xmit
  76  *              Alan Cox        :       Per socket routing cache
  77  *              Alan Cox        :       Fixed routing cache, added header cache.
  78  *              Alan Cox        :       Loopback didnt work right in original ip_build_xmit - fixed it.
  79  *              Alan Cox        :       Only send ICMP_REDIRECT if src/dest are the same net.
  80  *              Alan Cox        :       Incoming IP option handling.
  81  *              Alan Cox        :       Set saddr on raw output frames as per BSD.
  82  *              Alan Cox        :       Stopped broadcast source route explosions.
  83  *              Alan Cox        :       Can disable source routing
  84  *              Takeshi Sone    :       Masquerading didn't work.
  85  *      Dave Bonn,Alan Cox      :       Faster IP forwarding whenever possible.
  86  *              Alan Cox        :       Memory leaks, tramples, misc debugging.
  87  *              Alan Cox        :       Fixed multicast (by popular demand 8))
  88  *              Alan Cox        :       Fixed forwarding (by even more popular demand 8))
  89  *              Alan Cox        :       Fixed SNMP statistics [I think]
  90  *      Gerhard Koerting        :       IP fragmentation forwarding fix
  91  *              Alan Cox        :       Device lock against page fault.
  92  *              Alan Cox        :       IP_HDRINCL facility.
  93  *      Werner Almesberger      :       Zero fragment bug
  94  *              Alan Cox        :       RAW IP frame length bug
  95  *              Alan Cox        :       Outgoing firewall on build_xmit
  96  *              A.N.Kuznetsov   :       IP_OPTIONS support throughout the kernel
  97  *              Alan Cox        :       Multicast routing hooks
  98  *
  99  *  
 100  *
 101  * To Fix:
 102  *              IP option processing is mostly not needed. ip_forward needs to know about routing rules
 103  *              and time stamp but that's about all. Use the route mtu field here too
 104  *              IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
 105  *              and could be made very efficient with the addition of some virtual memory hacks to permit
 106  *              the allocation of a buffer that can then be 'grown' by twiddling page tables.
 107  *              Output fragmentation wants updating along with the buffer management to use a single 
 108  *              interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
 109  *              output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
 110  *              fragmentation anyway.
 111  *
 112  *              FIXME: copy frag 0 iph to qp->iph
 113  *
 114  *              This program is free software; you can redistribute it and/or
 115  *              modify it under the terms of the GNU General Public License
 116  *              as published by the Free Software Foundation; either version
 117  *              2 of the License, or (at your option) any later version.
 118  */
 119 
 120 #include <asm/segment.h>
 121 #include <asm/system.h>
 122 #include <linux/types.h>
 123 #include <linux/kernel.h>
 124 #include <linux/sched.h>
 125 #include <linux/mm.h>
 126 #include <linux/string.h>
 127 #include <linux/errno.h>
 128 #include <linux/config.h>
 129 
 130 #include <linux/socket.h>
 131 #include <linux/sockios.h>
 132 #include <linux/in.h>
 133 #include <linux/inet.h>
 134 #include <linux/netdevice.h>
 135 #include <linux/etherdevice.h>
 136 #include <linux/proc_fs.h>
 137 #include <linux/stat.h>
 138 
 139 #include <net/snmp.h>
 140 #include <net/ip.h>
 141 #include <net/protocol.h>
 142 #include <net/route.h>
 143 #include <net/tcp.h>
 144 #include <net/udp.h>
 145 #include <linux/skbuff.h>
 146 #include <net/sock.h>
 147 #include <net/arp.h>
 148 #include <net/icmp.h>
 149 #include <net/raw.h>
 150 #include <net/checksum.h>
 151 #include <linux/igmp.h>
 152 #include <linux/ip_fw.h>
 153 #include <linux/mroute.h>
 154 #include <net/netlink.h>
 155 
 156 #define CONFIG_IP_DEFRAG
 157 
 158 extern int last_retran;
 159 extern void sort_send(struct sock *sk);
 160 
 161 #define min(a,b)        ((a)<(b)?(a):(b))
 162 
 163 /*
 164  *      SNMP management statistics
 165  */
 166 
 167 #ifdef CONFIG_IP_FORWARD
 168 struct ip_mib ip_statistics={1,64,};    /* Forwarding=Yes, Default TTL=64 */
 169 #else
 170 struct ip_mib ip_statistics={2,64,};    /* Forwarding=No, Default TTL=64 */
 171 #endif
 172 
 173 /* 
 174  * Write options to IP header, record destination address to
 175  * source route option, address of outgoing interface
 176  * (we should already know it, so that this  function is allowed be
 177  * called only after routing decision) and timestamp,
 178  * if we originate this datagram.
 179  */
 180 
 181 static void ip_options_build(struct sk_buff * skb, struct options * opt,
     /* [previous][next][first][last][top][bottom][index][help] */
 182                             __u32 daddr, __u32 saddr,
 183                             int is_frag) {
 184         unsigned char * iph = (unsigned char*)skb->ip_hdr;
 185 
 186         memcpy(skb->proto_priv, opt, sizeof(struct options));
 187         memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
 188         opt = (struct options*)skb->proto_priv;
 189         opt->is_data = 0;
 190 
 191         if (opt->srr)
 192           memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4);
 193 
 194         if (!is_frag) {
 195                 if (opt->rr_needaddr)
 196                   memcpy(iph+opt->rr+iph[opt->rr+2]-5, &saddr, 4);
 197                 if (opt->ts_needaddr)
 198                   memcpy(iph+opt->ts+iph[opt->ts+2]-9, &saddr, 4);
 199                 if (opt->ts_needtime) {
 200                         struct timeval tv;
 201                         __u32 midtime;
 202                         do_gettimeofday(&tv);
 203                         midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
 204                         memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4);
 205                 }
 206                 return;
 207         }
 208         if (opt->rr) {
 209                 memset(iph+opt->rr, IPOPT_NOP, iph[opt->rr+1]);
 210                 opt->rr = 0;
 211                 opt->rr_needaddr = 0;
 212         }
 213         if (opt->ts) {
 214                 memset(iph+opt->ts, IPOPT_NOP, iph[opt->ts+1]);
 215                 opt->ts = 0;
 216                 opt->ts_needaddr = opt->ts_needtime = 0;
 217         }
 218 }
 219 
 220 int ip_options_echo(struct options * dopt, struct options * sopt,
     /* [previous][next][first][last][top][bottom][index][help] */
 221                      __u32 daddr, __u32 saddr,
 222                      struct sk_buff * skb) {
 223         unsigned char *sptr, *dptr;
 224         int soffset, doffset;
 225         int     optlen;
 226 
 227         memset(dopt, 0, sizeof(struct options));
 228 
 229         dopt->is_data = 1;
 230 
 231         if (!sopt)
 232           sopt = (struct options*)skb->proto_priv;
 233 
 234         if (sopt->optlen == 0) {
 235                 dopt->optlen = 0;
 236                 return 0;
 237         }
 238 
 239         sptr = (sopt->is_data ? sopt->__data - sizeof(struct iphdr) :
 240                 (unsigned char *)skb->ip_hdr);
 241         dptr = dopt->__data;
 242 
 243         if (sopt->rr) {
 244                 optlen  = sptr[sopt->rr+1];
 245                 soffset = sptr[sopt->rr+2];
 246                 dopt->rr = dopt->optlen + sizeof(struct iphdr);
 247                 memcpy(dptr, sptr+sopt->rr, optlen);
 248                 if (sopt->rr_needaddr && soffset <= optlen) {
 249                         if (soffset + 3 > optlen)
 250                           return -EINVAL;
 251                         dptr[2] = soffset + 4;
 252                         dopt->rr_needaddr = 1;
 253                 }
 254                 dptr     += optlen;
 255                 dopt->optlen += optlen;
 256         }
 257         if (sopt->ts) {
 258                 optlen = sptr[sopt->ts+1];
 259                 soffset = sptr[sopt->ts+2];
 260                 dopt->ts = dopt->optlen + sizeof(struct iphdr);
 261                 memcpy(dptr, sptr+sopt->ts, optlen);
 262                 if (soffset <= optlen) {
 263                         if (dopt->ts_needaddr) {
 264                                 if (soffset + 3 > optlen)
 265                                   return -EINVAL;
 266                                 dopt->ts_needaddr = 1;
 267                                 soffset += 4;
 268                         }
 269                         if (dopt->ts_needtime) {
 270                                 if (soffset + 3 > optlen)
 271                                   return -EINVAL;
 272                                 dopt->ts_needtime = 1;
 273                                 soffset += 4;
 274                         }
 275                         if (((struct timestamp*)(dptr+1))->flags == IPOPT_TS_PRESPEC) {
 276                                 __u32 addr;
 277                                 memcpy(&addr, sptr+soffset-9, 4);
 278                                 if (ip_chk_addr(addr) == 0) {
 279                                         dopt->ts_needtime = 0;
 280                                         dopt->ts_needaddr = 0;
 281                                         soffset -= 8;
 282                                 }
 283                         }
 284                         dptr[2] = soffset;
 285                 }
 286                 dptr += optlen;
 287                 dopt->optlen += optlen;
 288         }
 289         if (sopt->srr) {
 290                 unsigned char * start = sptr+sopt->srr;
 291                 __u32 faddr;
 292 
 293                 optlen  = start[1];
 294                 soffset = start[2];
 295                 doffset = 0;
 296                 if (soffset > optlen)
 297                   soffset = optlen + 1;
 298                 soffset -= 4;
 299                 if (soffset > 3) {
 300                         memcpy(&faddr, &start[soffset-1], 4);
 301                         for (soffset-=4, doffset=4; soffset > 3; soffset-=4, doffset+=4)
 302                           memcpy(&dptr[doffset-1], &start[soffset-1], 4);
 303                         /*
 304                          * RFC1812 requires to fix illegal source routes.
 305                          */
 306                         if (memcmp(&saddr, &start[soffset+3], 4) == 0)
 307                           doffset -= 4;
 308                 }
 309                 if (doffset > 3) {
 310                         memcpy(&start[doffset-1], &daddr, 4);
 311                         dopt->faddr = faddr;
 312                         dptr[0] = start[0];
 313                         dptr[1] = doffset+3;
 314                         dptr[2] = 4;
 315                         dptr += doffset+3;
 316                         dopt->srr = dopt->optlen + sizeof(struct iphdr);
 317                         dopt->optlen += doffset+3;
 318                         dopt->is_strictroute = sopt->is_strictroute;
 319                 }
 320         }
 321         while (dopt->optlen & 3) {
 322                 *dptr++ = IPOPT_END;
 323                 dopt->optlen++;
 324         }
 325         return 0;
 326 }
 327 
 328 static void ip_options_fragment(struct sk_buff * skb) {
     /* [previous][next][first][last][top][bottom][index][help] */
 329         unsigned char * optptr = (unsigned char*)skb->ip_hdr;
 330         struct options * opt = (struct options*)skb->proto_priv;
 331         int  l = opt->optlen;
 332         int  optlen;
 333 
 334         while (l > 0) {
 335                 switch (*optptr) {
 336                       case IPOPT_END:
 337                         return;
 338                       case IPOPT_NOOP:
 339                         l--;
 340                         optptr++;
 341                         continue;
 342                 }
 343                 optlen = optptr[1];
 344                 if (l<2 || optlen>l)
 345                   return;
 346                 if (!(*optptr & 0x80))
 347                   memset(optptr, IPOPT_NOOP, optlen);
 348                 l -= optlen;
 349                 optptr += optlen;
 350         }
 351         opt->ts = 0;
 352         opt->rr = 0;
 353         opt->rr_needaddr = 0;
 354         opt->ts_needaddr = 0;
 355         opt->ts_needtime = 0;
 356         return;
 357 }
 358 
 359 /*
 360  * Verify options and fill pointers in struct optinos.
 361  * Caller should clear *opt, and set opt->data.
 362  * If opt == NULL, then skb->data should point to IP header.
 363  */
 364 
 365 int ip_options_compile(struct options * opt, struct sk_buff * skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 366 {
 367         int l;
 368         unsigned char * iph;
 369         unsigned char * optptr;
 370         int optlen;
 371         unsigned char * pp_ptr = NULL;
 372 
 373         if (!opt) {
 374                 opt = (struct options*)skb->proto_priv;
 375                 memset(opt, 0, sizeof(struct options));
 376                 iph = (unsigned char*)skb->ip_hdr;
 377                 opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
 378                 optptr = iph + sizeof(struct iphdr);
 379                 opt->is_data = 0;
 380         } else {
 381                 optptr = opt->is_data ? opt->__data : (unsigned char*)&skb->ip_hdr[1];
 382                 iph = optptr - sizeof(struct iphdr);
 383         }
 384 
 385         for (l = opt->optlen; l > 0; ) {
 386                 switch (*optptr) {
 387                       case IPOPT_END:
 388                         for (optptr++, l--; l>0; l--) {
 389                                 if (*optptr != IPOPT_END) {
 390                                         *optptr = IPOPT_END;
 391                                         opt->is_changed = 1;
 392                                 }
 393                         }
 394                         goto eol;
 395                       case IPOPT_NOOP:
 396                         l--;
 397                         optptr++;
 398                         continue;
 399                 }
 400                 optlen = optptr[1];
 401                 if (l<2 || optlen>l) {
 402                         pp_ptr = optptr;
 403                         break;
 404                 }
 405                 switch (*optptr) {
 406                       case IPOPT_SSRR:
 407                       case IPOPT_LSRR:
 408                         if (optlen < 3) {
 409                                 pp_ptr = optptr + 1;
 410                                 break;
 411                         }
 412                         if (optptr[2] < 4) {
 413                                 pp_ptr = optptr + 2;
 414                                 break;
 415                         }
 416                         /* NB: cf RFC-1812 5.2.4.1 */
 417                         if (opt->srr) {
 418                                 pp_ptr = optptr;
 419                                 break;
 420                         }
 421                         if (!skb) {
 422                                 if (optptr[2] != 4 || optlen < 7 || ((optlen-3) & 3)) {
 423                                         pp_ptr = optptr + 1;
 424                                         break;
 425                                 }
 426                                 memcpy(&opt->faddr, &optptr[3], 4);
 427                                 if (optlen > 7)
 428                                   memmove(&optptr[3], &optptr[7], optlen-7);
 429                         }
 430                         opt->is_strictroute = (optptr[0] == IPOPT_SSRR);
 431                         opt->srr = optptr - iph;
 432                         break;
 433                       case IPOPT_RR:
 434                         if (opt->rr) {
 435                                 pp_ptr = optptr;
 436                                 break;
 437                         }
 438                         if (optlen < 3) {
 439                                 pp_ptr = optptr + 1;
 440                                 break;
 441                         }
 442                         if (optptr[2] < 4) {
 443                                 pp_ptr = optptr + 2;
 444                                 break;
 445                         }
 446                         if (optptr[2] <= optlen) {
 447                                 if (optptr[2]+3 > optlen) {
 448                                         pp_ptr = optptr + 2;
 449                                         break;
 450                                 }
 451                                 if (skb) {
 452                                         memcpy(&optptr[optptr[2]-1], &skb->dev->pa_addr, 4);
 453                                         opt->is_changed = 1;
 454                                 }
 455                                 optptr[2] += 4;
 456                                 opt->rr_needaddr = 1;
 457                         }
 458                         opt->rr = optptr - iph;
 459                         break;
 460                       case IPOPT_TIMESTAMP:
 461                         if (opt->ts) {
 462                                 pp_ptr = optptr;
 463                                 break;
 464                         }
 465                         if (optlen < 4) {
 466                                 pp_ptr = optptr + 1;
 467                                 break;
 468                         }
 469                         if (optptr[2] < 5) {
 470                                 pp_ptr = optptr + 2;
 471                                 break;
 472                         }
 473                         if (optptr[2] <= optlen) {
 474                                 struct timestamp * ts = (struct timestamp*)(optptr+1);
 475                                 __u32 * timeptr = NULL;
 476                                 if (ts->ptr+3 > ts->len) {
 477                                         pp_ptr = optptr + 2;
 478                                         break;
 479                                 }
 480                                 switch (ts->flags) {
 481                                       case IPOPT_TS_TSONLY:
 482                                         opt->ts = optptr - iph;
 483                                         if (skb) {
 484                                                 timeptr = (__u32*)&optptr[ts->ptr-1];
 485                                                 opt->is_changed = 1;
 486                                         }
 487                                         ts->ptr += 4;
 488                                         break;
 489                                       case IPOPT_TS_TSANDADDR:
 490                                         if (ts->ptr+7 > ts->len) {
 491                                                 pp_ptr = optptr + 2;
 492                                                 break;
 493                                         }
 494                                         opt->ts = optptr - iph;
 495                                         if (skb) {
 496                                                 memcpy(&optptr[ts->ptr-1], &skb->dev->pa_addr, 4);
 497                                                 timeptr = (__u32*)&optptr[ts->ptr+3];
 498                                         }
 499                                         opt->ts_needaddr = 1;
 500                                         opt->ts_needtime = 1;
 501                                         ts->ptr += 8;
 502                                         break;
 503                                       case IPOPT_TS_PRESPEC:
 504                                         if (ts->ptr+7 > ts->len) {
 505                                                 pp_ptr = optptr + 2;
 506                                                 break;
 507                                         }
 508                                         opt->ts = optptr - iph;
 509                                         {
 510                                                 __u32 addr;
 511                                                 memcpy(&addr, &optptr[ts->ptr-1], 4);
 512                                                 if (ip_chk_addr(addr) == 0)
 513                                                   break;
 514                                                 if (skb)
 515                                                   timeptr = (__u32*)&optptr[ts->ptr+3];
 516                                         }
 517                                         opt->ts_needaddr = 1;
 518                                         opt->ts_needtime = 1;
 519                                         ts->ptr += 8;
 520                                         break;
 521                                       default:
 522                                         pp_ptr = optptr + 3;
 523                                         break;
 524                                 }
 525                                 if (timeptr) {
 526                                         struct timeval tv;
 527                                         __u32  midtime;
 528                                         do_gettimeofday(&tv);
 529                                         midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
 530                                         memcpy(timeptr, &midtime, sizeof(__u32));
 531                                         opt->is_changed = 1;
 532                                 }
 533                         } else {
 534                                 struct timestamp * ts = (struct timestamp*)(optptr+1);
 535                                 if (ts->overflow == 15) {
 536                                         pp_ptr = optptr + 3;
 537                                         break;
 538                                 }
 539                                 opt->ts = optptr - iph;
 540                                 if (skb) {
 541                                         ts->overflow++;
 542                                         opt->is_changed = 1;
 543                                 }
 544                         }
 545                         break;
 546                       case IPOPT_SEC:
 547                       case IPOPT_SID:
 548                       default:
 549                         if (!skb) {
 550                                 pp_ptr = optptr;
 551                                 break;
 552                         }
 553                         break;
 554                 }
 555                 l -= optlen;
 556                 optptr += optlen;
 557         }
 558 
 559 eol:
 560         if (!pp_ptr)
 561           return 0;
 562 
 563         if (skb) {
 564                 icmp_send(skb, ICMP_PARAMETERPROB, 0, pp_ptr-iph, skb->dev);
 565                 kfree_skb(skb, FREE_READ);
 566         }
 567         return -EINVAL;
 568 }
 569 
 570 /*
 571  *      Handle the issuing of an ioctl() request
 572  *      for the ip device. This is scheduled to
 573  *      disappear
 574  */
 575 
 576 int ip_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 577 {
 578         switch(cmd)
 579         {
 580                 default:
 581                         return(-EINVAL);
 582         }
 583 }
 584 
 585 
 586 /*
 587  *      Take an skb, and fill in the MAC header.
 588  */
 589 
 590 static int ip_send(struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 591 {
 592         int mac = 0;
 593 
 594         skb->dev = dev;
 595         skb->arp = 1;
 596         if (dev->hard_header)
 597         {
 598                 /*
 599                  *      Build a hardware header. Source address is our mac, destination unknown
 600                  *      (rebuild header will sort this out)
 601                  */
 602                 skb_reserve(skb,(dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */
 603                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 604                 if (mac < 0)
 605                 {
 606                         mac = -mac;
 607                         skb->arp = 0;
 608                         skb->raddr = daddr;     /* next routing address */
 609                 }
 610         }
 611         return mac;
 612 }
 613 
 614 static int ip_send_room(struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 615 {
 616         int mac = 0;
 617 
 618         skb->dev = dev;
 619         skb->arp = 1;
 620         if (dev->hard_header)
 621         {
 622                 skb_reserve(skb,MAX_HEADER);
 623                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 624                 if (mac < 0)
 625                 {
 626                         mac = -mac;
 627                         skb->arp = 0;
 628                         skb->raddr = daddr;     /* next routing address */
 629                 }
 630         }
 631         return mac;
 632 }
 633 
 634 int ip_id_count = 0;
 635 
 636 /*
 637  * This routine builds the appropriate hardware/IP headers for
 638  * the routine.  It assumes that if *dev != NULL then the
 639  * protocol knows what it's doing, otherwise it uses the
 640  * routing/ARP tables to select a device struct.
 641  */
 642 int ip_build_header(struct sk_buff *skb, __u32 saddr, __u32 daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 643                 struct device **dev, int type, struct options *opt, int len, int tos, int ttl)
 644 {
 645         struct rtable *rt;
 646         __u32 raddr;
 647         int tmp;
 648         __u32 src;
 649         struct iphdr *iph;
 650         __u32 final_daddr = daddr;
 651 
 652         if (opt && opt->srr)
 653           daddr = opt->faddr;
 654 
 655         /*
 656          *      See if we need to look up the device.
 657          */
 658 
 659 #ifdef CONFIG_IP_MULTICAST      
 660         if(MULTICAST(daddr) && *dev==NULL && skb->sk && *skb->sk->ip_mc_name)
 661                 *dev=dev_get(skb->sk->ip_mc_name);
 662 #endif
 663         if (*dev == NULL)
 664         {
 665                 if(skb->localroute)
 666                         rt = ip_rt_local(daddr, NULL, &src);
 667                 else
 668                         rt = ip_rt_route(daddr, NULL, &src);
 669                 if (rt == NULL)
 670                 {
 671                         ip_statistics.IpOutNoRoutes++;
 672                         return(-ENETUNREACH);
 673                 }
 674 
 675                 *dev = rt->rt_dev;
 676                 /*
 677                  *      If the frame is from us and going off machine it MUST MUST MUST
 678                  *      have the output device ip address and never the loopback
 679                  */
 680                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 681                         saddr = src;/*rt->rt_dev->pa_addr;*/
 682                 raddr = rt->rt_gateway;
 683 
 684         }
 685         else
 686         {
 687                 /*
 688                  *      We still need the address of the first hop.
 689                  */
 690                 if(skb->localroute)
 691                         rt = ip_rt_local(daddr, NULL, &src);
 692                 else
 693                         rt = ip_rt_route(daddr, NULL, &src);
 694                 /*
 695                  *      If the frame is from us and going off machine it MUST MUST MUST
 696                  *      have the output device ip address and never the loopback
 697                  */
 698                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 699                         saddr = src;/*rt->rt_dev->pa_addr;*/
 700 
 701                 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
 702         }
 703 
 704         /*
 705          *      No source addr so make it our addr
 706          */
 707         if (saddr == 0)
 708                 saddr = src;
 709 
 710         /*
 711          *      No gateway so aim at the real destination
 712          */
 713         if (raddr == 0)
 714                 raddr = daddr;
 715 
 716         /*
 717          *      Now build the MAC header.
 718          */
 719 
 720         if(type==IPPROTO_TCP)
 721                 tmp = ip_send_room(skb, raddr, len, *dev, saddr);
 722         else
 723                 tmp = ip_send(skb, raddr, len, *dev, saddr);
 724 
 725         /*
 726          *      Book keeping
 727          */
 728 
 729         skb->dev = *dev;
 730         skb->saddr = saddr;
 731 
 732         /*
 733          *      Now build the IP header.
 734          */
 735 
 736         /*
 737          *      If we are using IPPROTO_RAW, then we don't need an IP header, since
 738          *      one is being supplied to us by the user
 739          */
 740 
 741         if(type == IPPROTO_RAW)
 742                 return (tmp);
 743 
 744         /*
 745          *      Build the IP addresses
 746          */
 747          
 748         if (opt)
 749           iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr) + opt->optlen);
 750         else
 751           iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr));
 752 
 753         iph->version  = 4;
 754         iph->ihl      = 5;
 755         iph->tos      = tos;
 756         iph->frag_off = 0;
 757         iph->ttl      = ttl;
 758         iph->daddr    = daddr;
 759         iph->saddr    = saddr;
 760         iph->protocol = type;
 761         skb->ip_hdr   = iph;
 762 
 763         if (!opt || !opt->optlen)
 764           return sizeof(struct iphdr) + tmp;
 765         if (opt->is_strictroute && rt && rt->rt_gateway) {
 766           ip_statistics.IpOutNoRoutes++;
 767           return -ENETUNREACH;
 768         }
 769         iph->ihl += opt->optlen>>2;
 770         ip_options_build(skb, opt, final_daddr, (*dev)->pa_addr, 0);
 771         return iph->ihl*4 + tmp;
 772 }
 773 
 774 
 775 /*
 776  *      Generate a checksum for an outgoing IP datagram.
 777  */
 778 
 779 void ip_send_check(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 780 {
 781         iph->check = 0;
 782         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 783 }
 784 
 785 
 786 /************************ Fragment Handlers From NET2E **********************************/
 787 
 788 
 789 /*
 790  *      This fragment handler is a bit of a heap. On the other hand it works quite
 791  *      happily and handles things quite well.
 792  */
 793 
 794 static struct ipq *ipqueue = NULL;              /* IP fragment queue    */
 795 
 796 /*
 797  *      Create a new fragment entry.
 798  */
 799 
 800 static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, unsigned char *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
 801 {
 802         struct ipfrag *fp;
 803 
 804         fp = (struct ipfrag *) kmalloc(sizeof(struct ipfrag), GFP_ATOMIC);
 805         if (fp == NULL)
 806         {
 807                 NETDEBUG(printk("IP: frag_create: no memory left !\n"));
 808                 return(NULL);
 809         }
 810         memset(fp, 0, sizeof(struct ipfrag));
 811 
 812         /* Fill in the structure. */
 813         fp->offset = offset;
 814         fp->end = end;
 815         fp->len = end - offset;
 816         fp->skb = skb;
 817         fp->ptr = ptr;
 818 
 819         return(fp);
 820 }
 821 
 822 
 823 /*
 824  *      Find the correct entry in the "incomplete datagrams" queue for
 825  *      this IP datagram, and return the queue entry address if found.
 826  */
 827 
 828 static struct ipq *ip_find(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 829 {
 830         struct ipq *qp;
 831         struct ipq *qplast;
 832 
 833         cli();
 834         qplast = NULL;
 835         for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
 836         {
 837                 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
 838                         iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
 839                 {
 840                         del_timer(&qp->timer);  /* So it doesn't vanish on us. The timer will be reset anyway */
 841                         sti();
 842                         return(qp);
 843                 }
 844         }
 845         sti();
 846         return(NULL);
 847 }
 848 
 849 
 850 /*
 851  *      Remove an entry from the "incomplete datagrams" queue, either
 852  *      because we completed, reassembled and processed it, or because
 853  *      it timed out.
 854  */
 855 
 856 static void ip_free(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 857 {
 858         struct ipfrag *fp;
 859         struct ipfrag *xp;
 860 
 861         /*
 862          * Stop the timer for this entry.
 863          */
 864 
 865         del_timer(&qp->timer);
 866 
 867         /* Remove this entry from the "incomplete datagrams" queue. */
 868         cli();
 869         if (qp->prev == NULL)
 870         {
 871                 ipqueue = qp->next;
 872                 if (ipqueue != NULL)
 873                         ipqueue->prev = NULL;
 874         }
 875         else
 876         {
 877                 qp->prev->next = qp->next;
 878                 if (qp->next != NULL)
 879                         qp->next->prev = qp->prev;
 880         }
 881 
 882         /* Release all fragment data. */
 883 
 884         fp = qp->fragments;
 885         while (fp != NULL)
 886         {
 887                 xp = fp->next;
 888                 IS_SKB(fp->skb);
 889                 kfree_skb(fp->skb,FREE_READ);
 890                 kfree_s(fp, sizeof(struct ipfrag));
 891                 fp = xp;
 892         }
 893 
 894         /* Release the IP header. */
 895         kfree_s(qp->iph, 64 + 8);
 896 
 897         /* Finally, release the queue descriptor itself. */
 898         kfree_s(qp, sizeof(struct ipq));
 899         sti();
 900 }
 901 
 902 
 903 /*
 904  *      Oops- a fragment queue timed out.  Kill it and send an ICMP reply.
 905  */
 906 
 907 static void ip_expire(unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 908 {
 909         struct ipq *qp;
 910 
 911         qp = (struct ipq *)arg;
 912 
 913         /*
 914          *      Send an ICMP "Fragment Reassembly Timeout" message.
 915          */
 916 
 917         ip_statistics.IpReasmTimeout++;
 918         ip_statistics.IpReasmFails++;   
 919         /* This if is always true... shrug */
 920         if(qp->fragments!=NULL)
 921                 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
 922                                 ICMP_EXC_FRAGTIME, 0, qp->dev);
 923 
 924         /*
 925          *      Nuke the fragment queue.
 926          */
 927         ip_free(qp);
 928 }
 929 
 930 
 931 /*
 932  *      Add an entry to the 'ipq' queue for a newly received IP datagram.
 933  *      We will (hopefully :-) receive all other fragments of this datagram
 934  *      in time, so we just create a queue for this datagram, in which we
 935  *      will insert the received fragments at their respective positions.
 936  */
 937 
 938 static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 939 {
 940         struct ipq *qp;
 941         int ihlen;
 942 
 943         qp = (struct ipq *) kmalloc(sizeof(struct ipq), GFP_ATOMIC);
 944         if (qp == NULL)
 945         {
 946                 NETDEBUG(printk("IP: create: no memory left !\n"));
 947                 return(NULL);
 948                 skb->dev = qp->dev;
 949         }
 950         memset(qp, 0, sizeof(struct ipq));
 951 
 952         /*
 953          *      Allocate memory for the IP header (plus 8 octets for ICMP).
 954          */
 955 
 956         ihlen = iph->ihl * 4;
 957         qp->iph = (struct iphdr *) kmalloc(64 + 8, GFP_ATOMIC);
 958         if (qp->iph == NULL)
 959         {
 960                 NETDEBUG(printk("IP: create: no memory left !\n"));
 961                 kfree_s(qp, sizeof(struct ipq));
 962                 return(NULL);
 963         }
 964 
 965         memcpy(qp->iph, iph, ihlen + 8);
 966         qp->len = 0;
 967         qp->ihlen = ihlen;
 968         qp->fragments = NULL;
 969         qp->dev = dev;
 970 
 971         /* Start a timer for this entry. */
 972         qp->timer.expires = jiffies + IP_FRAG_TIME;     /* about 30 seconds     */
 973         qp->timer.data = (unsigned long) qp;            /* pointer to queue     */
 974         qp->timer.function = ip_expire;                 /* expire function      */
 975         add_timer(&qp->timer);
 976 
 977         /* Add this entry to the queue. */
 978         qp->prev = NULL;
 979         cli();
 980         qp->next = ipqueue;
 981         if (qp->next != NULL)
 982                 qp->next->prev = qp;
 983         ipqueue = qp;
 984         sti();
 985         return(qp);
 986 }
 987 
 988 
 989 /*
 990  *      See if a fragment queue is complete.
 991  */
 992 
 993 static int ip_done(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 994 {
 995         struct ipfrag *fp;
 996         int offset;
 997 
 998         /* Only possible if we received the final fragment. */
 999         if (qp->len == 0)
1000                 return(0);
1001 
1002         /* Check all fragment offsets to see if they connect. */
1003         fp = qp->fragments;
1004         offset = 0;
1005         while (fp != NULL)
1006         {
1007                 if (fp->offset > offset)
1008                         return(0);      /* fragment(s) missing */
1009                 offset = fp->end;
1010                 fp = fp->next;
1011         }
1012 
1013         /* All fragments are present. */
1014         return(1);
1015 }
1016 
1017 
1018 /*
1019  *      Build a new IP datagram from all its fragments.
1020  *
1021  *      FIXME: We copy here because we lack an effective way of handling lists
1022  *      of bits on input. Until the new skb data handling is in I'm not going
1023  *      to touch this with a bargepole. 
1024  */
1025 
1026 static struct sk_buff *ip_glue(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
1027 {
1028         struct sk_buff *skb;
1029         struct iphdr *iph;
1030         struct ipfrag *fp;
1031         unsigned char *ptr;
1032         int count, len;
1033 
1034         /*
1035          *      Allocate a new buffer for the datagram.
1036          */
1037         len = qp->ihlen + qp->len;
1038 
1039         if ((skb = dev_alloc_skb(len)) == NULL)
1040         {
1041                 ip_statistics.IpReasmFails++;
1042                 NETDEBUG(printk("IP: queue_glue: no memory for gluing queue %p\n", qp));
1043                 ip_free(qp);
1044                 return(NULL);
1045         }
1046 
1047         /* Fill in the basic details. */
1048         skb_put(skb,len);
1049         skb->h.raw = skb->data;
1050         skb->free = 1;
1051 
1052         /* Copy the original IP headers into the new buffer. */
1053         ptr = (unsigned char *) skb->h.raw;
1054         memcpy(ptr, ((unsigned char *) qp->iph), qp->ihlen);
1055         ptr += qp->ihlen;
1056 
1057         count = 0;
1058 
1059         /* Copy the data portions of all fragments into the new buffer. */
1060         fp = qp->fragments;
1061         while(fp != NULL)
1062         {
1063                 if(count+fp->len > skb->len)
1064                 {
1065                         NETDEBUG(printk("Invalid fragment list: Fragment over size.\n"));
1066                         ip_free(qp);
1067                         kfree_skb(skb,FREE_WRITE);
1068                         ip_statistics.IpReasmFails++;
1069                         return NULL;
1070                 }
1071                 memcpy((ptr + fp->offset), fp->ptr, fp->len);
1072                 count += fp->len;
1073                 fp = fp->next;
1074         }
1075 
1076         /* We glued together all fragments, so remove the queue entry. */
1077         ip_free(qp);
1078 
1079         /* Done with all fragments. Fixup the new IP header. */
1080         iph = skb->h.iph;
1081         iph->frag_off = 0;
1082         iph->tot_len = htons((iph->ihl * 4) + count);
1083         skb->ip_hdr = iph;
1084 
1085         ip_statistics.IpReasmOKs++;
1086         return(skb);
1087 }
1088 
1089 
1090 /*
1091  *      Process an incoming IP datagram fragment.
1092  */
1093 
1094 static struct sk_buff *ip_defrag(struct iphdr *iph, struct sk_buff *skb, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1095 {
1096         struct ipfrag *prev, *next, *tmp;
1097         struct ipfrag *tfp;
1098         struct ipq *qp;
1099         struct sk_buff *skb2;
1100         unsigned char *ptr;
1101         int flags, offset;
1102         int i, ihl, end;
1103 
1104         ip_statistics.IpReasmReqds++;
1105 
1106         /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
1107         qp = ip_find(iph);
1108 
1109         /* Is this a non-fragmented datagram? */
1110         offset = ntohs(iph->frag_off);
1111         flags = offset & ~IP_OFFSET;
1112         offset &= IP_OFFSET;
1113         if (((flags & IP_MF) == 0) && (offset == 0))
1114         {
1115                 if (qp != NULL)
1116                         ip_free(qp);    /* Huh? How could this exist?? */
1117                 return(skb);
1118         }
1119 
1120         offset <<= 3;           /* offset is in 8-byte chunks */
1121         ihl = iph->ihl * 4;
1122 
1123         /*
1124          * If the queue already existed, keep restarting its timer as long
1125          * as we still are receiving fragments.  Otherwise, create a fresh
1126          * queue entry.
1127          */
1128 
1129         if (qp != NULL)
1130         {
1131                 /* ANK. If the first fragment is received,
1132                  * we should remember the correct IP header (with options)
1133                  */
1134                 if (offset == 0)
1135                 {
1136                         qp->ihlen = ihl;
1137                         memcpy(qp->iph, iph, ihl+8);
1138                 }
1139                 del_timer(&qp->timer);
1140                 qp->timer.expires = jiffies + IP_FRAG_TIME;     /* about 30 seconds */
1141                 qp->timer.data = (unsigned long) qp;    /* pointer to queue */
1142                 qp->timer.function = ip_expire;         /* expire function */
1143                 add_timer(&qp->timer);
1144         }
1145         else
1146         {
1147                 /*
1148                  *      If we failed to create it, then discard the frame
1149                  */
1150                 if ((qp = ip_create(skb, iph, dev)) == NULL)
1151                 {
1152                         skb->sk = NULL;
1153                         kfree_skb(skb, FREE_READ);
1154                         ip_statistics.IpReasmFails++;
1155                         return NULL;
1156                 }
1157         }
1158 
1159         /*
1160          *      Determine the position of this fragment.
1161          */
1162 
1163         end = offset + ntohs(iph->tot_len) - ihl;
1164 
1165         /*
1166          *      Point into the IP datagram 'data' part.
1167          */
1168 
1169         ptr = skb->data + ihl;
1170 
1171         /*
1172          *      Is this the final fragment?
1173          */
1174 
1175         if ((flags & IP_MF) == 0)
1176                 qp->len = end;
1177 
1178         /*
1179          *      Find out which fragments are in front and at the back of us
1180          *      in the chain of fragments so far.  We must know where to put
1181          *      this fragment, right?
1182          */
1183 
1184         prev = NULL;
1185         for(next = qp->fragments; next != NULL; next = next->next)
1186         {
1187                 if (next->offset > offset)
1188                         break;  /* bingo! */
1189                 prev = next;
1190         }
1191 
1192         /*
1193          *      We found where to put this one.
1194          *      Check for overlap with preceding fragment, and, if needed,
1195          *      align things so that any overlaps are eliminated.
1196          */
1197         if (prev != NULL && offset < prev->end)
1198         {
1199                 i = prev->end - offset;
1200                 offset += i;    /* ptr into datagram */
1201                 ptr += i;       /* ptr into fragment data */
1202         }
1203 
1204         /*
1205          * Look for overlap with succeeding segments.
1206          * If we can merge fragments, do it.
1207          */
1208 
1209         for(tmp=next; tmp != NULL; tmp = tfp)
1210         {
1211                 tfp = tmp->next;
1212                 if (tmp->offset >= end)
1213                         break;          /* no overlaps at all */
1214 
1215                 i = end - next->offset;                 /* overlap is 'i' bytes */
1216                 tmp->len -= i;                          /* so reduce size of    */
1217                 tmp->offset += i;                       /* next fragment        */
1218                 tmp->ptr += i;
1219                 /*
1220                  *      If we get a frag size of <= 0, remove it and the packet
1221                  *      that it goes with.
1222                  */
1223                 if (tmp->len <= 0)
1224                 {
1225                         if (tmp->prev != NULL)
1226                                 tmp->prev->next = tmp->next;
1227                         else
1228                                 qp->fragments = tmp->next;
1229 
1230                         if (tfp->next != NULL)
1231                                 tmp->next->prev = tmp->prev;
1232                         
1233                         next=tfp;       /* We have killed the original next frame */
1234 
1235                         kfree_skb(tmp->skb,FREE_READ);
1236                         kfree_s(tmp, sizeof(struct ipfrag));
1237                 }
1238         }
1239 
1240         /*
1241          *      Insert this fragment in the chain of fragments.
1242          */
1243 
1244         tfp = NULL;
1245         tfp = ip_frag_create(offset, end, skb, ptr);
1246 
1247         /*
1248          *      No memory to save the fragment - so throw the lot
1249          */
1250 
1251         if (!tfp)
1252         {
1253                 skb->sk = NULL;
1254                 kfree_skb(skb, FREE_READ);
1255                 return NULL;
1256         }
1257         tfp->prev = prev;
1258         tfp->next = next;
1259         if (prev != NULL)
1260                 prev->next = tfp;
1261         else
1262                 qp->fragments = tfp;
1263 
1264         if (next != NULL)
1265                 next->prev = tfp;
1266 
1267         /*
1268          *      OK, so we inserted this new fragment into the chain.
1269          *      Check if we now have a full IP datagram which we can
1270          *      bump up to the IP layer...
1271          */
1272 
1273         if (ip_done(qp))
1274         {
1275                 skb2 = ip_glue(qp);             /* glue together the fragments */
1276                 return(skb2);
1277         }
1278         return(NULL);
1279 }
1280 
1281 
1282 /*
1283  *      This IP datagram is too large to be sent in one piece.  Break it up into
1284  *      smaller pieces (each of size equal to the MAC header plus IP header plus
1285  *      a block of the data of the original IP data part) that will yet fit in a
1286  *      single device frame, and queue such a frame for sending by calling the
1287  *      ip_queue_xmit().  Note that this is recursion, and bad things will happen
1288  *      if this function causes a loop...
1289  *
1290  *      Yes this is inefficient, feel free to submit a quicker one.
1291  *
1292  */
1293  
1294 static void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int is_frag)
     /* [previous][next][first][last][top][bottom][index][help] */
1295 {
1296         struct iphdr *iph;
1297         unsigned char *raw;
1298         unsigned char *ptr;
1299         struct sk_buff *skb2;
1300         int left, mtu, hlen, len;
1301         int offset;
1302         unsigned long flags;
1303 
1304         /*
1305          *      Point into the IP datagram header.
1306          */
1307 
1308         raw = skb->data;
1309 #if 0
1310         iph = (struct iphdr *) (raw + dev->hard_header_len);    
1311         skb->ip_hdr = iph;
1312 #else
1313         iph = skb->ip_hdr;
1314 #endif
1315 
1316         /*
1317          *      Setup starting values.
1318          */
1319 
1320         hlen = iph->ihl * 4;
1321         left = ntohs(iph->tot_len) - hlen;      /* Space per frame */
1322         hlen += dev->hard_header_len;           /* Total header size */
1323         mtu = (dev->mtu - hlen);                /* Size of data space */
1324         ptr = (raw + hlen);                     /* Where to start from */
1325 
1326         /*
1327          *      Check for any "DF" flag. [DF means do not fragment]
1328          */
1329 
1330         if (ntohs(iph->frag_off) & IP_DF)
1331         {
1332                 ip_statistics.IpFragFails++;
1333                 printk("ip_queue_xmit: frag needed\n");
1334                 return;
1335         }
1336 
1337         /*
1338          *      The protocol doesn't seem to say what to do in the case that the
1339          *      frame + options doesn't fit the mtu. As it used to fall down dead
1340          *      in this case we were fortunate it didn't happen
1341          */
1342 
1343         if(mtu<8)
1344         {
1345                 /* It's wrong but it's better than nothing */
1346                 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev->mtu, dev);
1347                 ip_statistics.IpFragFails++;
1348                 return;
1349         }
1350 
1351         /*
1352          *      Fragment the datagram.
1353          */
1354 
1355         /*
1356          *      The initial offset is 0 for a complete frame. When
1357          *      fragmenting fragments it's wherever this one starts.
1358          */
1359 
1360         if (is_frag & 2)
1361                 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
1362         else
1363                 offset = 0;
1364 
1365 
1366         /*
1367          *      Keep copying data until we run out.
1368          */
1369 
1370         while(left > 0)
1371         {
1372                 len = left;
1373                 /* IF: it doesn't fit, use 'mtu' - the data space left */
1374                 if (len > mtu)
1375                         len = mtu;
1376                 /* IF: we are not sending upto and including the packet end
1377                    then align the next start on an eight byte boundary */
1378                 if (len < left)
1379                 {
1380                         len/=8;
1381                         len*=8;
1382                 }
1383                 /*
1384                  *      Allocate buffer.
1385                  */
1386 
1387                 if ((skb2 = alloc_skb(len + hlen+15,GFP_ATOMIC)) == NULL)
1388                 {
1389                         NETDEBUG(printk("IP: frag: no memory for new fragment!\n"));
1390                         ip_statistics.IpFragFails++;
1391                         return;
1392                 }
1393 
1394                 /*
1395                  *      Set up data on packet
1396                  */
1397 
1398                 skb2->arp = skb->arp;
1399                 if(skb->free==0)
1400                         printk("IP fragmenter: BUG free!=1 in fragmenter\n");
1401                 skb2->free = 1;
1402                 skb_put(skb2,len + hlen);
1403                 skb2->h.raw=(char *) skb2->data;
1404                 /*
1405                  *      Charge the memory for the fragment to any owner
1406                  *      it might possess
1407                  */
1408 
1409                 save_flags(flags);
1410                 if (sk)
1411                 {
1412                         cli();
1413                         sk->wmem_alloc += skb2->truesize;
1414                         skb2->sk=sk;
1415                 }
1416                 restore_flags(flags);
1417                 skb2->raddr = skb->raddr;       /* For rebuild_header - must be here */
1418 
1419                 /*
1420                  *      Copy the packet header into the new buffer.
1421                  */
1422 
1423                 memcpy(skb2->h.raw, raw, hlen);
1424 
1425                 /*
1426                  *      Copy a block of the IP datagram.
1427                  */
1428                 memcpy(skb2->h.raw + hlen, ptr, len);
1429                 left -= len;
1430 
1431                 skb2->h.raw+=dev->hard_header_len;
1432 
1433                 /*
1434                  *      Fill in the new header fields.
1435                  */
1436                 iph = (struct iphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
1437                 iph->frag_off = htons((offset >> 3));
1438                 skb2->ip_hdr = iph;
1439 
1440                 /* ANK: dirty, but effective trick. Upgrade options only if
1441                  * the segment to be fragmented was THE FIRST (otherwise,
1442                  * options are already fixed) and make it ONCE
1443                  * on the initial skb, so that all the following fragments
1444                  * will inherit fixed options.
1445                  */
1446                 if (offset == 0)
1447                   ip_options_fragment(skb);
1448 
1449                 /*
1450                  *      Added AC : If we are fragmenting a fragment thats not the
1451                  *                 last fragment then keep MF on each bit
1452                  */
1453                 if (left > 0 || (is_frag & 1))
1454                         iph->frag_off |= htons(IP_MF);
1455                 ptr += len;
1456                 offset += len;
1457 
1458                 /*
1459                  *      Put this fragment into the sending queue.
1460                  */
1461 
1462                 ip_statistics.IpFragCreates++;
1463 
1464                 ip_queue_xmit(sk, dev, skb2, 2);
1465         }
1466         ip_statistics.IpFragOKs++;
1467 }
1468 
1469 
1470 
1471 #ifdef CONFIG_IP_FORWARD
1472 #ifdef CONFIG_IP_MROUTE
1473 
1474 /*
1475  *      Encapsulate a packet by attaching a valid IPIP header to it.
1476  *      This avoids tunnel drivers and other mess and gives us the speed so
1477  *      important for multicast video.
1478  */
1479  
1480 static void ip_encap(struct sk_buff *skb, int len, struct device *out, __u32 daddr)
     /* [previous][next][first][last][top][bottom][index][help] */
1481 {
1482         /*
1483          *      There is space for the IPIP header and MAC left.
1484          *
1485          *      Firstly push down and install the IPIP header.
1486          */
1487         struct iphdr *iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1488         if(len>65515)
1489                 len=65515;
1490         iph->version    =       4;
1491         iph->tos        =       skb->ip_hdr->tos;
1492         iph->ttl        =       skb->ip_hdr->ttl;
1493         iph->frag_off   =       0;
1494         iph->daddr      =       daddr;
1495         iph->saddr      =       out->pa_addr;
1496         iph->protocol   =       IPPROTO_IPIP;
1497         iph->ihl        =       5;
1498         iph->tot_len    =       htons(skb->len);
1499         iph->id         =       htons(ip_id_count++);
1500         ip_send_check(iph);
1501 
1502         skb->dev = out;
1503         skb->arp = 1;
1504         skb->raddr=daddr;
1505         /*
1506          *      Now add the physical header (driver will push it down).
1507          */
1508         if (out->hard_header && out->hard_header(skb, out, ETH_P_IP, NULL, NULL, len)<0)
1509                         skb->arp=0;
1510         /*
1511          *      Read to queue for transmission.
1512          */
1513 }
1514 
1515 #endif
1516 
1517 /*
1518  *      Forward an IP datagram to its next destination.
1519  */
1520 
1521 int ip_forward(struct sk_buff *skb, struct device *dev, int is_frag,
     /* [previous][next][first][last][top][bottom][index][help] */
1522                __u32 target_addr)
1523 {
1524         struct device *dev2;    /* Output device */
1525         struct iphdr *iph;      /* Our header */
1526         struct sk_buff *skb2;   /* Output packet */
1527         struct rtable *rt;      /* Route we use */
1528         unsigned char *ptr;     /* Data pointer */
1529         unsigned long raddr;    /* Router IP address */
1530         struct   options * opt  = (struct options*)skb->proto_priv;
1531         int encap = 0;          /* Encap length */
1532 #ifdef CONFIG_IP_FIREWALL
1533         int fw_res = 0;         /* Forwarding result */ 
1534 #ifdef CONFIG_IP_MASQUERADE     
1535         struct sk_buff *skb_in = skb;   /* So we can remember if the masquerader did some swaps */
1536 #endif
1537         
1538         /* 
1539          *      See if we are allowed to forward this.
1540          *      Note: demasqueraded fragments are always 'back'warded.
1541          */
1542 
1543         
1544         if(!(is_frag&4))
1545         {
1546                 fw_res=ip_fw_chk(skb->h.iph, dev, ip_fw_fwd_chain, ip_fw_fwd_policy, 0);
1547                 switch (fw_res) {
1548                 case FW_ACCEPT:
1549 #ifdef CONFIG_IP_MASQUERADE
1550                 case FW_MASQUERADE:
1551 #endif
1552                         break;
1553                 case FW_REJECT:
1554                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev);
1555                         /* fall thru */
1556                 default:
1557                         return -1;
1558                 }
1559         }
1560 #endif
1561         /*
1562          *      According to the RFC, we must first decrease the TTL field. If
1563          *      that reaches zero, we must reply an ICMP control message telling
1564          *      that the packet's lifetime expired.
1565          *
1566          *      Exception:
1567          *      We may not generate an ICMP for an ICMP. icmp_send does the
1568          *      enforcement of this so we can forget it here. It is however
1569          *      sometimes VERY important.
1570          */
1571 
1572         iph = skb->h.iph;
1573         iph->ttl--;
1574 
1575         /*
1576          *      Re-compute the IP header checksum.
1577          *      This is inefficient. We know what has happened to the header
1578          *      and could thus adjust the checksum as Phil Karn does in KA9Q
1579          */
1580 
1581         iph->check = ntohs(iph->check) + 0x0100;
1582         if ((iph->check & 0xFF00) == 0)
1583                 iph->check++;           /* carry overflow */
1584         iph->check = htons(iph->check);
1585 
1586         if (iph->ttl <= 0)
1587         {
1588                 /* Tell the sender its packet died... */
1589                 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0, dev);
1590                 return -1;
1591         }
1592 
1593 #ifdef CONFIG_IP_MROUTE
1594         if(!(is_frag&8))
1595         {
1596 #endif  
1597                 /*
1598                  * OK, the packet is still valid.  Fetch its destination address,
1599                  * and give it to the IP sender for further processing.
1600                  */
1601 
1602                 rt = ip_rt_route(target_addr, NULL, NULL);
1603                 if (rt == NULL)
1604                 {
1605                         /*
1606                          *      Tell the sender its packet cannot be delivered. Again
1607                          *      ICMP is screened later.
1608                          */
1609                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0, dev);
1610                         return -1;
1611                 }
1612         
1613         
1614                 /*
1615                  * Gosh.  Not only is the packet valid; we even know how to
1616                  * forward it onto its final destination.  Can we say this
1617                  * is being plain lucky?
1618                  * If the router told us that there is no GW, use the dest.
1619                  * IP address itself- we seem to be connected directly...
1620                  */
1621 
1622                 raddr = rt->rt_gateway;
1623         
1624                 if (raddr != 0)
1625                 {
1626                         /*
1627                          *      Strict routing permits no gatewaying
1628                          */
1629         
1630                         if (opt->is_strictroute)
1631                         {
1632                                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0, dev);
1633                                 return -1;
1634                         }
1635                 
1636                         /*
1637                          *      There is a gateway so find the correct route for it.
1638                          *      Gateways cannot in turn be gatewayed.
1639                          */
1640                 }
1641                 else
1642                         raddr = target_addr;
1643 
1644                 /*
1645                  *      Having picked a route we can now send the frame out.
1646                  */
1647 
1648                 dev2 = rt->rt_dev;
1649                 /*
1650                  *      In IP you never have to forward a frame on the interface that it 
1651                  *      arrived upon. We now generate an ICMP HOST REDIRECT giving the route
1652                  *      we calculated.
1653                  */
1654 #ifndef CONFIG_IP_NO_ICMP_REDIRECT
1655                 if (dev == dev2 && !((iph->saddr^iph->daddr)&dev->pa_mask) &&
1656                     (rt->rt_flags&RTF_MODIFIED) && !opt->srr)
1657                         icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, raddr, dev);
1658 #endif
1659 #ifdef CONFIG_IP_MROUTE
1660         }
1661         else
1662         {
1663                 /*
1664                  *      Multicast route forward. Routing is already done
1665                  */
1666                 dev2=skb->dev;
1667                 raddr=skb->raddr;
1668                 if(is_frag&16)          /* VIFF_TUNNEL mode */
1669                         encap=20;
1670         }
1671 #endif  
1672         
1673 
1674         /*
1675          * We now may allocate a new buffer, and copy the datagram into it.
1676          * If the indicated interface is up and running, kick it.
1677          */
1678 
1679         if (dev2->flags & IFF_UP)
1680         {
1681 #ifdef CONFIG_IP_MASQUERADE
1682                 /*
1683                  * If this fragment needs masquerading, make it so...
1684                  * (Dont masquerade de-masqueraded fragments)
1685                  */
1686                 if (!(is_frag&4) && fw_res==FW_MASQUERADE)
1687                         ip_fw_masquerade(&skb, dev2);
1688 #endif
1689                 IS_SKB(skb);
1690 
1691                 if (skb->len+encap > dev2->mtu && (ntohs(iph->frag_off) & IP_DF)) {
1692                   ip_statistics.IpFragFails++;
1693                   icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev2->mtu, dev);
1694                   return -1;
1695                 }
1696 
1697 #ifdef CONFIG_IP_MROUTE
1698                 if(skb_headroom(skb)-encap<dev2->hard_header_len)
1699                 {
1700                         skb2 = alloc_skb(dev2->hard_header_len + skb->len + encap + 15, GFP_ATOMIC);
1701 #else
1702                 if(skb_headroom(skb)<dev2->hard_header_len)
1703                 {
1704                         skb2 = alloc_skb(dev2->hard_header_len + skb->len + 15, GFP_ATOMIC);
1705 #endif          
1706                         /*
1707                          *      This is rare and since IP is tolerant of network failures
1708                          *      quite harmless.
1709                          */
1710                 
1711                         if (skb2 == NULL)
1712                         {
1713                                 NETDEBUG(printk("\nIP: No memory available for IP forward\n"));
1714                                 return -1;
1715                         }
1716                 
1717                         IS_SKB(skb2);
1718                         /*
1719                          *      Add the physical headers.
1720                          */
1721 #ifdef CONFIG_IP_MROUTE
1722                         if(is_frag&16)
1723                         {
1724                                 skb_reserve(skb,(encap+dev->hard_header_len+15)&~15);   /* 16 byte aligned IP headers are good */
1725                                 ip_encap(skb2,skb->len, dev2, raddr);
1726                         }
1727                         else
1728 #endif                  
1729                                 ip_send(skb2,raddr,skb->len,dev2,dev2->pa_addr);
1730 
1731                         /*
1732                          *      We have to copy the bytes over as the new header wouldn't fit
1733                          *      the old buffer. This should be very rare.
1734                          */              
1735                         
1736                         ptr = skb_put(skb2,skb->len);
1737                         skb2->free = 1;
1738                         skb2->h.raw = ptr;
1739 
1740                         /*
1741                          *      Copy the packet data into the new buffer.
1742                          */
1743                         memcpy(ptr, skb->h.raw, skb->len);
1744                         memcpy(skb2->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
1745                         iph = skb2->ip_hdr = skb2->h.iph;
1746                 }
1747                 else
1748                 {
1749                         /* 
1750                          *      Build a new MAC header. 
1751                          */
1752 
1753                         skb2 = skb;             
1754                         skb2->dev=dev2;
1755 #ifdef CONFIG_IP_MROUTE
1756                         if(is_frag&16)
1757                                 ip_encap(skb,skb->len, dev2, raddr);
1758                         else
1759                         {
1760 #endif
1761                                 skb->arp=1;
1762                                 skb->raddr=raddr;
1763                                 if(dev2->hard_header)
1764                                 {
1765                                         if(dev2->hard_header(skb, dev2, ETH_P_IP, NULL, NULL, skb->len)<0)
1766                                                 skb->arp=0;
1767                                 }
1768 #ifdef CONFIG_IP_MROUTE
1769                         }                               
1770 #endif                  
1771                         ip_statistics.IpForwDatagrams++;
1772                 }
1773 
1774                 if (opt->optlen) {
1775                         unsigned char * optptr;
1776                         if (opt->rr_needaddr) {
1777                                 optptr = (unsigned char *)iph + opt->rr;
1778                                 memcpy(&optptr[optptr[2]-5], &dev2->pa_addr, 4);
1779                                 opt->is_changed = 1;
1780                         }
1781                         if (opt->srr_is_hit) {
1782                                 int srrptr, srrspace;
1783 
1784                                 optptr = (unsigned char *)iph + opt->srr;
1785 
1786                                 for ( srrptr=optptr[2], srrspace = optptr[1];
1787                                       srrptr <= srrspace;
1788                                      srrptr += 4
1789                                     ) {
1790                                         if (srrptr + 3 > srrspace)
1791                                           break;
1792                                         if (memcmp(&target_addr, &optptr[srrptr-1], 4) == 0)
1793                                           break;
1794                                 }
1795                                 if (srrptr + 3 <= srrspace) {
1796                                         opt->is_changed = 1;
1797                                         memcpy(&optptr[srrptr-1], &dev2->pa_addr, 4);
1798                                         iph->daddr = target_addr;
1799                                         optptr[2] = srrptr+4;
1800                                 } else
1801                                         printk("ip_forward(): Argh! Destination lost!\n");
1802                         }
1803                         if (opt->ts_needaddr) {
1804                                 optptr = (unsigned char *)iph + opt->ts;
1805                                 memcpy(&optptr[optptr[2]-9], &dev2->pa_addr, 4);
1806                                 opt->is_changed = 1;
1807                         }
1808                         if (opt->is_changed) {
1809                                 opt->is_changed = 0;
1810                                 ip_send_check(iph);
1811                         }
1812                 }
1813 /*
1814  * ANK:  this is point of "no return", we cannot send an ICMP,
1815  *       because we changed SRR option.
1816  */
1817 
1818                 /*
1819                  *      See if it needs fragmenting. Note in ip_rcv we tagged
1820                  *      the fragment type. This must be right so that
1821                  *      the fragmenter does the right thing.
1822                  */
1823 
1824                 if(skb2->len > dev2->mtu + dev2->hard_header_len)
1825                 {
1826                         ip_fragment(NULL,skb2,dev2, is_frag);
1827                         kfree_skb(skb2,FREE_WRITE);
1828                 }
1829                 else
1830                 {
1831 #ifdef CONFIG_IP_ACCT           
1832                         /*
1833                          *      Count mapping we shortcut
1834                          */
1835                          
1836                         ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
1837 #endif                  
1838                         
1839                         /*
1840                          *      Map service types to priority. We lie about
1841                          *      throughput being low priority, but it's a good
1842                          *      choice to help improve general usage.
1843                          */
1844                         if(iph->tos & IPTOS_LOWDELAY)
1845                                 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
1846                         else if(iph->tos & IPTOS_THROUGHPUT)
1847                                 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
1848                         else
1849                                 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
1850                 }
1851         }
1852         else
1853                 return -1;
1854         
1855         /*
1856          *      Tell the caller if their buffer is free.
1857          */      
1858          
1859         if(skb==skb2)
1860                 return 0;       
1861 
1862 #ifdef CONFIG_IP_MASQUERADE     
1863         /*
1864          *      The original is free. Free our copy and
1865          *      tell the caller not to free.
1866          */
1867         if(skb!=skb_in)
1868         {
1869                 kfree_skb(skb_in, FREE_WRITE);
1870                 return 0;
1871         }
1872 #endif  
1873         return 1;
1874 }
1875 
1876 
1877 #endif
1878 
1879 
1880 /*
1881  *      This function receives all incoming IP datagrams.
1882  *
1883  *      On entry skb->data points to the start of the IP header and
1884  *      the MAC header has been removed.
1885  */
1886 
1887 int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
     /* [previous][next][first][last][top][bottom][index][help] */
1888 {
1889         struct iphdr *iph = skb->h.iph;
1890         struct sock *raw_sk=NULL;
1891         unsigned char hash;
1892         unsigned char flag = 0;
1893         struct inet_protocol *ipprot;
1894         int brd=IS_MYADDR;
1895         struct options * opt = NULL;
1896         int is_frag=0;
1897 #ifdef CONFIG_IP_FIREWALL
1898         int err;
1899 #endif  
1900 #ifdef CONFIG_IP_MROUTE
1901         int mroute_pkt=0;
1902 #endif  
1903 
1904 #ifdef CONFIG_NET_IPV6
1905         /* 
1906          *      Intercept IPv6 frames. We dump ST-II and invalid types just below..
1907          */
1908          
1909         if(iph->version == 6)
1910                 return ipv6_rcv(skb,dev,pt);
1911 #endif          
1912 
1913         ip_statistics.IpInReceives++;
1914 
1915         /*
1916          *      Tag the ip header of this packet so we can find it
1917          */
1918 
1919         skb->ip_hdr = iph;
1920 
1921         /*
1922          *      RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
1923          *      RFC1122: 3.1.2.3 MUST discard a frame with invalid source address [NEEDS FIXING].
1924          *
1925          *      Is the datagram acceptable?
1926          *
1927          *      1.      Length at least the size of an ip header
1928          *      2.      Version of 4
1929          *      3.      Checksums correctly. [Speed optimisation for later, skip loopback checksums]
1930          *      4.      Doesn't have a bogus length
1931          *      (5.     We ought to check for IP multicast addresses and undefined types.. does this matter ?)
1932          */
1933 
1934         if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0
1935                 || skb->len < ntohs(iph->tot_len))
1936         {
1937                 ip_statistics.IpInHdrErrors++;
1938                 kfree_skb(skb, FREE_WRITE);
1939                 return(0);
1940         }
1941 
1942         /*
1943          *      Our transport medium may have padded the buffer out. Now we know it
1944          *      is IP we can trim to the true length of the frame.
1945          *      Note this now means skb->len holds ntohs(iph->tot_len).
1946          */
1947 
1948         skb_trim(skb,ntohs(iph->tot_len));
1949 
1950         if (iph->ihl > 5) {
1951                 skb->ip_summed = 0;
1952                 if (ip_options_compile(NULL, skb))
1953                         return(0);
1954                 opt = (struct options*)skb->proto_priv;
1955 #ifdef CONFIG_IP_NOSR
1956                 if (opt->srr) {
1957                         kfree_skb(skb, FREE_READ);
1958                         return -EINVAL;
1959                 }
1960 #endif                                  
1961         }
1962         
1963         /*
1964          *      See if the firewall wants to dispose of the packet. 
1965          */
1966 
1967 #ifdef  CONFIG_IP_FIREWALL
1968         
1969         if ((err=ip_fw_chk(iph,dev,ip_fw_blk_chain,ip_fw_blk_policy, 0))<FW_ACCEPT)
1970         {
1971                 if(err==FW_REJECT)
1972                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev);
1973                 kfree_skb(skb, FREE_WRITE);
1974                 return 0;       
1975         }
1976 
1977 #endif
1978         
1979         /*
1980          *      Remember if the frame is fragmented.
1981          */
1982          
1983         if(iph->frag_off)
1984         {
1985                 if (iph->frag_off & htons(IP_MF))
1986                         is_frag|=1;
1987                 /*
1988                  *      Last fragment ?
1989                  */
1990         
1991                 if (iph->frag_off & htons(IP_OFFSET))
1992                         is_frag|=2;
1993         }
1994         
1995         /*
1996          *      Do any IP forwarding required.  chk_addr() is expensive -- avoid it someday.
1997          *
1998          *      This is inefficient. While finding out if it is for us we could also compute
1999          *      the routing table entry. This is where the great unified cache theory comes
2000          *      in as and when someone implements it
2001          *
2002          *      For most hosts over 99% of packets match the first conditional
2003          *      and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at
2004          *      function entry.
2005          */
2006 
2007         if ( iph->daddr == skb->dev->pa_addr || (brd = ip_chk_addr(iph->daddr)) != 0)
2008         {
2009                 if (opt && opt->srr) {
2010                         int srrspace, srrptr;
2011                         __u32 nexthop;
2012                         unsigned char * optptr = ((unsigned char *)iph) + opt->srr;
2013 
2014                         if (brd != IS_MYADDR || skb->pkt_type != PACKET_HOST) {
2015                                 kfree_skb(skb, FREE_WRITE);
2016                                 return 0;
2017                         }
2018 
2019                         for ( srrptr=optptr[2], srrspace = optptr[1];
2020                               srrptr <= srrspace;
2021                               srrptr += 4
2022                              ) 
2023                         {
2024                                 int brd2;
2025                                 if (srrptr + 3 > srrspace) 
2026                                 {
2027                                         icmp_send(skb, ICMP_PARAMETERPROB, 0, opt->srr+2,
2028                                                   skb->dev);
2029                                         kfree_skb(skb, FREE_WRITE);
2030                                         return 0;
2031                                 }
2032                                 memcpy(&nexthop, &optptr[srrptr-1], 4);
2033                                 if ((brd2 = ip_chk_addr(nexthop)) == 0)
2034                                         break;
2035                                 if (brd2 != IS_MYADDR) 
2036                                 {
2037 
2038                                         /*
2039                                          *      ANK: should we implement weak tunneling of multicasts?
2040                                          *      Are they obsolete? DVMRP specs (RFC-1075) is old enough...
2041                                          *      [They are obsolete]
2042                                          */
2043                                         kfree_skb(skb, FREE_WRITE);
2044                                         return -EINVAL;
2045                                 }
2046                         }
2047                         if (srrptr <= srrspace) 
2048                         {
2049                                 opt->srr_is_hit = 1;
2050                                 opt->is_changed = 1;
2051 #ifdef CONFIG_IP_FORWARD
2052                                 if (ip_forward(skb, dev, is_frag, nexthop))
2053                                         kfree_skb(skb, FREE_WRITE);
2054 #else
2055                                 ip_statistics.IpInAddrErrors++;
2056                                 kfree_skb(skb, FREE_WRITE);
2057 #endif
2058                                 return 0;
2059                         }
2060                 }
2061 
2062 #ifdef CONFIG_IP_MULTICAST      
2063                 if(!(dev->flags&IFF_ALLMULTI) && brd==IS_MULTICAST && iph->daddr!=IGMP_ALL_HOSTS && !(dev->flags&IFF_LOOPBACK))
2064                 {
2065                         /*
2066                          *      Check it is for one of our groups
2067                          */
2068                         struct ip_mc_list *ip_mc=dev->ip_mc_list;
2069                         do
2070                         {
2071                                 if(ip_mc==NULL)
2072                                 {       
2073                                         kfree_skb(skb, FREE_WRITE);
2074                                         return 0;
2075                                 }
2076                                 if(ip_mc->multiaddr==iph->daddr)
2077                                         break;
2078                                 ip_mc=ip_mc->next;
2079                         }
2080                         while(1);
2081                 }
2082 #endif
2083 
2084 #ifdef CONFIG_IP_MASQUERADE
2085                 /*
2086                  * Do we need to de-masquerade this fragment?
2087                  */
2088                 if (ip_fw_demasquerade(skb)) 
2089                 {
2090                         struct iphdr *iph=skb->h.iph;
2091                         if (ip_forward(skb, dev, is_frag|4, iph->daddr))
2092                                 kfree_skb(skb, FREE_WRITE);
2093                         return(0);
2094                 }
2095 #endif
2096 
2097                 /*
2098                  *      Account for the packet
2099                  */
2100  
2101 #ifdef CONFIG_IP_ACCT
2102                 ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
2103 #endif  
2104 
2105                 /*
2106                  *      Reassemble IP fragments.
2107                  */
2108 
2109                 if(is_frag)
2110                 {
2111                         /* Defragment. Obtain the complete packet if there is one */
2112                         skb=ip_defrag(iph,skb,dev);
2113                         if(skb==NULL)
2114                                 return 0;
2115                         skb->dev = dev;
2116                         iph=skb->h.iph;
2117                 }
2118 
2119                 /*
2120                  *      Point into the IP datagram, just past the header.
2121                  */
2122 
2123                 skb->ip_hdr = iph;
2124                 skb->h.raw += iph->ihl*4;
2125 
2126 #ifdef CONFIG_IP_MROUTE         
2127                 /*
2128                  *      Check the state on multicast routing (multicast and not 224.0.0.z)
2129                  */
2130                  
2131                 if(brd==IS_MULTICAST && (iph->daddr&htonl(0xFFFFFF00))!=htonl(0xE0000000))
2132                         mroute_pkt=1;
2133 
2134 #endif
2135                 /*
2136                  *      Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies.
2137                  *
2138                  *      RFC 1122: SHOULD pass TOS value up to the transport layer.
2139                  */
2140  
2141                 hash = iph->protocol & (SOCK_ARRAY_SIZE-1);
2142 
2143                 /* 
2144                  *      If there maybe a raw socket we must check - if not we don't care less 
2145                  */
2146                  
2147                 if((raw_sk=raw_prot.sock_array[hash])!=NULL)
2148                 {
2149                         struct sock *sknext=NULL;
2150                         struct sk_buff *skb1;
2151                         raw_sk=get_sock_raw(raw_sk, iph->protocol,  iph->saddr, iph->daddr);
2152                         if(raw_sk)      /* Any raw sockets */
2153                         {
2154                                 do
2155                                 {
2156                                         /* Find the next */
2157                                         sknext=get_sock_raw(raw_sk->next, iph->protocol, iph->saddr, iph->daddr);
2158                                         if(sknext)
2159                                                 skb1=skb_clone(skb, GFP_ATOMIC);
2160                                         else
2161                                                 break;  /* One pending raw socket left */
2162                                         if(skb1)
2163                                                 raw_rcv(raw_sk, skb1, dev, iph->saddr,iph->daddr);
2164                                         raw_sk=sknext;
2165                                 }
2166                                 while(raw_sk!=NULL);
2167                                 
2168                                 /*
2169                                  *      Here either raw_sk is the last raw socket, or NULL if none 
2170                                  */
2171                                  
2172                                 /*
2173                                  *      We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy 
2174                                  */
2175                         }
2176                 }
2177         
2178                 /*
2179                  *      skb->h.raw now points at the protocol beyond the IP header.
2180                  */
2181         
2182                 hash = iph->protocol & (MAX_INET_PROTOS -1);
2183                 for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next)
2184                 {
2185                         struct sk_buff *skb2;
2186         
2187                         if (ipprot->protocol != iph->protocol)
2188                                 continue;
2189                        /*
2190                         *       See if we need to make a copy of it.  This will
2191                         *       only be set if more than one protocol wants it.
2192                         *       and then not for the last one. If there is a pending
2193                         *       raw delivery wait for that
2194                         */
2195         
2196 #ifdef CONFIG_IP_MROUTE
2197                         if (ipprot->copy || raw_sk || mroute_pkt)
2198 #else   
2199                         if (ipprot->copy || raw_sk)
2200 #endif                  
2201                         {
2202                                 skb2 = skb_clone(skb, GFP_ATOMIC);
2203                                 if(skb2==NULL)
2204                                         continue;
2205                         }
2206                         else
2207                         {
2208                                 skb2 = skb;
2209                         }
2210                         flag = 1;
2211 
2212                        /*
2213                         *       Pass on the datagram to each protocol that wants it,
2214                         *       based on the datagram protocol.  We should really
2215                         *       check the protocol handler's return values here...
2216                         */
2217 
2218                         ipprot->handler(skb2, dev, opt, iph->daddr,
2219                                 (ntohs(iph->tot_len) - (iph->ihl * 4)),
2220                                 iph->saddr, 0, ipprot);
2221                 }
2222 
2223                 /*
2224                  *      All protocols checked.
2225                  *      If this packet was a broadcast, we may *not* reply to it, since that
2226                  *      causes (proven, grin) ARP storms and a leakage of memory (i.e. all
2227                  *      ICMP reply messages get queued up for transmission...)
2228                  */
2229 
2230 #ifdef CONFIG_IP_MROUTE          
2231                 /*
2232                  *      Forward the last copy to the multicast router. If
2233                  *      there is a pending raw deliery however make a copy
2234                  *      and forward that.
2235                  */
2236                  
2237                 if(mroute_pkt)
2238                 {
2239                         flag=1;
2240                         if(raw_sk==NULL)
2241                                 ipmr_forward(skb, is_frag);
2242                         else
2243                         {
2244                                 struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC);
2245                                 if(skb2)
2246                                 {
2247                                         skb2->free=1;
2248                                         ipmr_forward(skb2, is_frag);
2249                                 }
2250                         }
2251                 }
2252 #endif          
2253 
2254                 if(raw_sk!=NULL)        /* Shift to last raw user */
2255                         raw_rcv(raw_sk, skb, dev, iph->saddr, iph->daddr);
2256                 else if (!flag)         /* Free and report errors */
2257                 {
2258                         if (brd != IS_BROADCAST && brd!=IS_MULTICAST)
2259                                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0, dev);   
2260                         kfree_skb(skb, FREE_WRITE);
2261                 }
2262 
2263                 return(0);
2264         }
2265 
2266         /*
2267          *      Do any unicast IP forwarding required.
2268          */
2269         
2270         /*
2271          *      Don't forward multicast or broadcast frames.
2272          */
2273 
2274         if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST)
2275         {
2276                 kfree_skb(skb,FREE_WRITE);
2277                 return 0;
2278         }
2279 
2280         /*
2281          *      The packet is for another target. Forward the frame
2282          */
2283 
2284 #ifdef CONFIG_IP_FORWARD
2285         if (opt && opt->is_strictroute) {
2286               icmp_send(skb, ICMP_PARAMETERPROB, 0, 16, skb->dev);
2287               kfree_skb(skb, FREE_WRITE);
2288               return -1;
2289         }
2290         if (ip_forward(skb, dev, is_frag, iph->daddr))
2291                 kfree_skb(skb, FREE_WRITE);
2292 #else
2293 /*      printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n",
2294                         iph->saddr,iph->daddr);*/
2295         ip_statistics.IpInAddrErrors++;
2296         kfree_skb(skb, FREE_WRITE);
2297 #endif
2298         return(0);
2299 }
2300         
2301 
2302 /*
2303  *      Loop a packet back to the sender.
2304  */
2305  
2306 static void ip_loopback(struct device *old_dev, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
2307 {
2308         struct device *dev=&loopback_dev;
2309         int len=ntohs(skb->ip_hdr->tot_len);
2310         struct sk_buff *newskb=dev_alloc_skb(len+dev->hard_header_len+15);
2311         
2312         if(newskb==NULL)
2313                 return;
2314                 
2315         newskb->link3=NULL;
2316         newskb->sk=NULL;
2317         newskb->dev=dev;
2318         newskb->saddr=skb->saddr;
2319         newskb->daddr=skb->daddr;
2320         newskb->raddr=skb->raddr;
2321         newskb->free=1;
2322         newskb->lock=0;
2323         newskb->users=0;
2324         newskb->pkt_type=skb->pkt_type;
2325         
2326         /*
2327          *      Put a MAC header on the packet
2328          */
2329         ip_send(newskb, skb->ip_hdr->daddr, len, dev, skb->ip_hdr->saddr);
2330         /*
2331          *      Add the rest of the data space. 
2332          */
2333         newskb->ip_hdr=(struct iphdr *)skb_put(newskb, len);
2334         memcpy(newskb->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
2335 
2336         /*
2337          *      Copy the data
2338          */
2339         memcpy(newskb->ip_hdr,skb->ip_hdr,len);
2340 
2341         /* Recurse. The device check against IFF_LOOPBACK will stop infinite recursion */
2342                 
2343         /*printk("Loopback output queued [%lX to %lX].\n", newskb->ip_hdr->saddr,newskb->ip_hdr->daddr);*/
2344         ip_queue_xmit(NULL, dev, newskb, 1);
2345 }
2346 
2347 
2348 /*
2349  * Queues a packet to be sent, and starts the transmitter
2350  * if necessary.  if free = 1 then we free the block after
2351  * transmit, otherwise we don't. If free==2 we not only
2352  * free the block but also don't assign a new ip seq number.
2353  * This routine also needs to put in the total length,
2354  * and compute the checksum
2355  */
2356 
2357 void ip_queue_xmit(struct sock *sk, struct device *dev,
     /* [previous][next][first][last][top][bottom][index][help] */
2358               struct sk_buff *skb, int free)
2359 {
2360         struct iphdr *iph;
2361 /*      unsigned char *ptr;*/
2362 
2363         /* Sanity check */
2364         if (dev == NULL)
2365         {
2366                 NETDEBUG(printk("IP: ip_queue_xmit dev = NULL\n"));
2367                 return;
2368         }
2369 
2370         IS_SKB(skb);
2371 
2372         /*
2373          *      Do some book-keeping in the packet for later
2374          */
2375 
2376 
2377         skb->dev = dev;
2378         skb->when = jiffies;
2379 
2380         /*
2381          *      Find the IP header and set the length. This is bad
2382          *      but once we get the skb data handling code in the
2383          *      hardware will push its header sensibly and we will
2384          *      set skb->ip_hdr to avoid this mess and the fixed
2385          *      header length problem
2386          */
2387 
2388         iph = skb->ip_hdr;
2389         iph->tot_len = ntohs(skb->len-(((unsigned char *)iph)-skb->data));
2390 
2391 #ifdef CONFIG_IP_FIREWALL
2392         if(ip_fw_chk(iph, dev, ip_fw_blk_chain, ip_fw_blk_policy, 0) < FW_ACCEPT)
2393                 /* just don't send this packet */
2394                 return;
2395 #endif  
2396 
2397         /*
2398          *      No reassigning numbers to fragments...
2399          */
2400 
2401         if(free!=2)
2402                 iph->id      = htons(ip_id_count++);
2403         else
2404                 free=1;
2405 
2406         /* All buffers without an owner socket get freed */
2407         if (sk == NULL)
2408                 free = 1;
2409 
2410         skb->free = free;
2411 
2412         /*
2413          *      Do we need to fragment. Again this is inefficient.
2414          *      We need to somehow lock the original buffer and use
2415          *      bits of it.
2416          */
2417 
2418         if(ntohs(iph->tot_len)> dev->mtu)
2419         {
2420                 ip_fragment(sk,skb,dev,0);
2421                 IS_SKB(skb);
2422                 kfree_skb(skb,FREE_WRITE);
2423                 return;
2424         }
2425 
2426         /*
2427          *      Add an IP checksum
2428          */
2429 
2430         ip_send_check(iph);
2431 
2432         /*
2433          *      Print the frame when debugging
2434          */
2435 
2436         /*
2437          *      More debugging. You cannot queue a packet already on a list
2438          *      Spot this and moan loudly.
2439          */
2440         if (skb->next != NULL)
2441         {
2442                 NETDEBUG(printk("ip_queue_xmit: next != NULL\n"));
2443                 skb_unlink(skb);
2444         }
2445 
2446         /*
2447          *      If a sender wishes the packet to remain unfreed
2448          *      we add it to his send queue. This arguably belongs
2449          *      in the TCP level since nobody else uses it. BUT
2450          *      remember IPng might change all the rules.
2451          */
2452 
2453         if (!free)
2454         {
2455                 unsigned long flags;
2456                 /* The socket now has more outstanding blocks */
2457 
2458                 sk->packets_out++;
2459 
2460                 /* Protect the list for a moment */
2461                 save_flags(flags);
2462                 cli();
2463 
2464                 if (skb->link3 != NULL)
2465                 {
2466                         NETDEBUG(printk("ip.c: link3 != NULL\n"));
2467                         skb->link3 = NULL;
2468                 }
2469                 if (sk->send_head == NULL)
2470                 {
2471                         sk->send_tail = skb;
2472                         sk->send_head = skb;
2473                 }
2474                 else
2475                 {
2476                         sk->send_tail->link3 = skb;
2477                         sk->send_tail = skb;
2478                 }
2479                 /* skb->link3 is NULL */
2480 
2481                 /* Interrupt restore */
2482                 restore_flags(flags);
2483         }
2484         else
2485                 /* Remember who owns the buffer */
2486                 skb->sk = sk;
2487 
2488         /*
2489          *      If the indicated interface is up and running, send the packet.
2490          */
2491          
2492         ip_statistics.IpOutRequests++;
2493 #ifdef CONFIG_IP_ACCT
2494         ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
2495 #endif  
2496         
2497 #ifdef CONFIG_IP_MULTICAST      
2498 
2499         /*
2500          *      Multicasts are looped back for other local users
2501          */
2502          
2503         if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK))
2504         {
2505                 if(sk==NULL || sk->ip_mc_loop)
2506                 {
2507                         if(iph->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
2508                         {
2509                                 ip_loopback(dev,skb);
2510                         }
2511                         else
2512                         {
2513                                 struct ip_mc_list *imc=dev->ip_mc_list;
2514                                 while(imc!=NULL)
2515                                 {
2516                                         if(imc->multiaddr==iph->daddr)
2517                                         {
2518                                                 ip_loopback(dev,skb);
2519                                                 break;
2520                                         }
2521                                         imc=imc->next;
2522                                 }
2523                         }
2524                 }
2525                 /* Multicasts with ttl 0 must not go beyond the host */
2526                 
2527                 if(skb->ip_hdr->ttl==0)
2528                 {
2529                         kfree_skb(skb, FREE_READ);
2530                         return;
2531                 }
2532         }
2533 #endif
2534         if((dev->flags&IFF_BROADCAST) && (iph->daddr==dev->pa_brdaddr||iph->daddr==0xFFFFFFFF) && !(dev->flags&IFF_LOOPBACK))
2535                 ip_loopback(dev,skb);
2536                 
2537         if (dev->flags & IFF_UP)
2538         {
2539                 /*
2540                  *      If we have an owner use its priority setting,
2541                  *      otherwise use NORMAL
2542                  */
2543 
2544                 if (sk != NULL)
2545                 {
2546                         dev_queue_xmit(skb, dev, sk->priority);
2547                 }
2548                 else
2549                 {
2550                         dev_queue_xmit(skb, dev, SOPRI_NORMAL);
2551                 }
2552         }
2553         else
2554         {
2555                 if(sk)
2556                         sk->err = ENETDOWN;
2557                 ip_statistics.IpOutDiscards++;
2558                 if (free)
2559                         kfree_skb(skb, FREE_WRITE);
2560         }
2561 }
2562 
2563 
2564 
2565 #ifdef CONFIG_IP_MULTICAST
2566 
2567 /*
2568  *      Write an multicast group list table for the IGMP daemon to
2569  *      read.
2570  */
2571  
2572 int ip_mc_procinfo(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
2573 {
2574         off_t pos=0, begin=0;
2575         struct ip_mc_list *im;
2576         unsigned long flags;
2577         int len=0;
2578         struct device *dev;
2579         
2580         len=sprintf(buffer,"Device    : Count\tGroup    Users Timer\n");  
2581         save_flags(flags);
2582         cli();
2583         
2584         for(dev = dev_base; dev; dev = dev->next)
2585         {
2586                 if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST))
2587                 {
2588                         len+=sprintf(buffer+len,"%-10s: %5d\n",
2589                                         dev->name, dev->mc_count);
2590                         for(im = dev->ip_mc_list; im; im = im->next)
2591                         {
2592                                 len+=sprintf(buffer+len,
2593                                         "\t\t\t%08lX %5d %d:%08lX\n",
2594                                         im->multiaddr, im->users,
2595                                         im->tm_running, im->timer.expires-jiffies);
2596                                 pos=begin+len;
2597                                 if(pos<offset)
2598                                 {
2599                                         len=0;
2600                                         begin=pos;
2601                                 }
2602                                 if(pos>offset+length)
2603                                         break;
2604                         }
2605                 }
2606         }
2607         restore_flags(flags);
2608         *start=buffer+(offset-begin);
2609         len-=(offset-begin);
2610         if(len>length)
2611                 len=length;     
2612         return len;
2613 }
2614 
2615 
2616 /*
2617  *      Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
2618  *      an IP socket.
2619  *
2620  *      We implement IP_TOS (type of service), IP_TTL (time to live).
2621  *
2622  *      Next release we will sort out IP_OPTIONS since for some people are kind of important.
2623  */
2624 
2625 static struct device *ip_mc_find_devfor(unsigned long addr)
     /* [previous][next][first][last][top][bottom][index][help] */
2626 {
2627         struct device *dev;
2628         for(dev = dev_base; dev; dev = dev->next)
2629         {
2630                 if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST)&&
2631                         (dev->pa_addr==addr))
2632                         return dev;
2633         }
2634 
2635         return NULL;
2636 }
2637 
2638 #endif
2639 
2640 int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
2641 {
2642         int val,err;
2643         unsigned char ucval;
2644 #if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT)
2645         struct ip_fw tmp_fw;
2646 #endif  
2647         if (optval == NULL)
2648         {
2649                 val=0;
2650                 ucval=0;
2651         }
2652         else
2653         {
2654                 err=verify_area(VERIFY_READ, optval, sizeof(int));
2655                 if(err)
2656                         return err;
2657                 val = get_user((int *) optval);
2658                 ucval=get_user((unsigned char *) optval);
2659         }
2660         
2661         if(level!=SOL_IP)
2662                 return -EOPNOTSUPP;
2663 #ifdef CONFIG_IP_MROUTE
2664         if(optname>=MRT_BASE && optname <=MRT_BASE+10)
2665         {
2666                 return ip_mroute_setsockopt(sk,optname,optval,optlen);
2667         }
2668 #endif
2669         
2670         switch(optname)
2671         {
2672                 case IP_OPTIONS:
2673                   {
2674                           struct options * opt = NULL;
2675                           struct options * old_opt;
2676                           if (optlen > 40 || optlen < 0)
2677                             return -EINVAL;
2678                           err = verify_area(VERIFY_READ, optval, optlen);
2679                           if (err)
2680                             return err;
2681                           opt = kmalloc(sizeof(struct options)+((optlen+3)&~3), GFP_KERNEL);
2682                           if (!opt)
2683                             return -ENOMEM;
2684                           memset(opt, 0, sizeof(struct options));
2685                           if (optlen)
2686                             memcpy_fromfs(opt->__data, optval, optlen);
2687                           while (optlen & 3)
2688                             opt->__data[optlen++] = IPOPT_END;
2689                           opt->optlen = optlen;
2690                           opt->is_data = 1;
2691                           opt->is_setbyuser = 1;
2692                           if (optlen && ip_options_compile(opt, NULL)) {
2693                                   kfree_s(opt, sizeof(struct options) + optlen);
2694                                   return -EINVAL;
2695                           }
2696                           /*
2697                            * ANK: I'm afraid that receive handler may change
2698                            * options from under us.
2699                            */
2700                           cli();
2701                           old_opt = sk->opt;
2702                           sk->opt = opt;
2703                           sti();
2704                           if (old_opt)
2705                             kfree_s(old_opt, sizeof(struct optlen) + old_opt->optlen);
2706                           return 0;
2707                   }
2708                 case IP_TOS:
2709                         if(val<0||val>255)
2710                                 return -EINVAL;
2711                         sk->ip_tos=val;
2712                         if(val==IPTOS_LOWDELAY)
2713                                 sk->priority=SOPRI_INTERACTIVE;
2714                         if(val==IPTOS_THROUGHPUT)
2715                                 sk->priority=SOPRI_BACKGROUND;
2716                         return 0;
2717                 case IP_TTL:
2718                         if(val<1||val>255)
2719                                 return -EINVAL;
2720                         sk->ip_ttl=val;
2721                         return 0;
2722                 case IP_HDRINCL:
2723                         if(sk->type!=SOCK_RAW)
2724                                 return -ENOPROTOOPT;
2725                         sk->ip_hdrincl=val?1:0;
2726                         return 0;
2727 #ifdef CONFIG_IP_MULTICAST
2728                 case IP_MULTICAST_TTL: 
2729                 {
2730                         sk->ip_mc_ttl=(int)ucval;
2731                         return 0;
2732                 }
2733                 case IP_MULTICAST_LOOP: 
2734                 {
2735                         if(ucval!=0 && ucval!=1)
2736                                  return -EINVAL;
2737                         sk->ip_mc_loop=(int)ucval;
2738                         return 0;
2739                 }
2740                 case IP_MULTICAST_IF: 
2741                 {
2742                         struct in_addr addr;
2743                         struct device *dev=NULL;
2744                         
2745                         /*
2746                          *      Check the arguments are allowable
2747                          */
2748 
2749                         err=verify_area(VERIFY_READ, optval, sizeof(addr));
2750                         if(err)
2751                                 return err;
2752                                 
2753                         memcpy_fromfs(&addr,optval,sizeof(addr));
2754                         
2755                         
2756                         /*
2757                          *      What address has been requested
2758                          */
2759                         
2760                         if(addr.s_addr==INADDR_ANY)     /* Default */
2761                         {
2762                                 sk->ip_mc_name[0]=0;
2763                                 return 0;
2764                         }
2765                         
2766                         /*
2767                          *      Find the device
2768                          */
2769                          
2770                         dev=ip_mc_find_devfor(addr.s_addr);
2771                                                 
2772                         /*
2773                          *      Did we find one
2774                          */
2775                          
2776                         if(dev) 
2777                         {
2778                                 strcpy(sk->ip_mc_name,dev->name);
2779                                 return 0;
2780                         }
2781                         return -EADDRNOTAVAIL;
2782                 }
2783                 
2784                 case IP_ADD_MEMBERSHIP: 
2785                 {
2786                 
2787 /*
2788  *      FIXME: Add/Del membership should have a semaphore protecting them from re-entry
2789  */
2790                         struct ip_mreq mreq;
2791                         __u32 route_src;
2792                         struct rtable *rt;
2793                         struct device *dev=NULL;
2794                         
2795                         /*
2796                          *      Check the arguments.
2797                          */
2798 
2799                         err=verify_area(VERIFY_READ, optval, sizeof(mreq));
2800                         if(err)
2801                                 return err;
2802 
2803                         memcpy_fromfs(&mreq,optval,sizeof(mreq));
2804 
2805                         /* 
2806                          *      Get device for use later
2807                          */
2808 
2809                         if(mreq.imr_interface.s_addr==INADDR_ANY) 
2810                         {
2811                                 /*
2812                                  *      Not set so scan.
2813                                  */
2814                                 if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,NULL, &route_src))!=NULL)
2815                                 {
2816                                         dev=rt->rt_dev;
2817                                         rt->rt_use--;
2818                                 }
2819                         }
2820                         else
2821                         {
2822                                 /*
2823                                  *      Find a suitable device.
2824                                  */
2825                                 
2826                                 dev=ip_mc_find_devfor(mreq.imr_interface.s_addr);
2827                         }
2828                         
2829                         /*
2830                          *      No device, no cookies.
2831                          */
2832                          
2833                         if(!dev)
2834                                 return -ENODEV;
2835                                 
2836                         /*
2837                          *      Join group.
2838                          */
2839                          
2840                         return ip_mc_join_group(sk,dev,mreq.imr_multiaddr.s_addr);
2841                 }
2842                 
2843                 case IP_DROP_MEMBERSHIP: 
2844                 {
2845                         struct ip_mreq mreq;
2846                         struct rtable *rt;
2847                         __u32 route_src;
2848                         struct device *dev=NULL;
2849 
2850                         /*
2851                          *      Check the arguments
2852                          */
2853                          
2854                         err=verify_area(VERIFY_READ, optval, sizeof(mreq));
2855                         if(err)
2856                                 return err;
2857 
2858                         memcpy_fromfs(&mreq,optval,sizeof(mreq));
2859 
2860                         /*
2861                          *      Get device for use later 
2862                          */
2863  
2864                         if(mreq.imr_interface.s_addr==INADDR_ANY) 
2865                         {
2866                                 if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,NULL, &route_src))!=NULL)
2867                                 {
2868                                         dev=rt->rt_dev;
2869                                         rt->rt_use--;
2870                                 }
2871                         }
2872                         else 
2873                         {
2874                         
2875                                 dev=ip_mc_find_devfor(mreq.imr_interface.s_addr);
2876                         }
2877                         
2878                         /*
2879                          *      Did we find a suitable device.
2880                          */
2881                          
2882                         if(!dev)
2883                                 return -ENODEV;
2884                                 
2885                         /*
2886                          *      Leave group
2887                          */
2888                          
2889                         return ip_mc_leave_group(sk,dev,mreq.imr_multiaddr.s_addr);
2890                 }
2891 #endif                  
2892 #ifdef CONFIG_IP_FIREWALL
2893                 case IP_FW_ADD_BLK:
2894                 case IP_FW_DEL_BLK:
2895                 case IP_FW_ADD_FWD:
2896                 case IP_FW_DEL_FWD:
2897                 case IP_FW_CHK_BLK:
2898                 case IP_FW_CHK_FWD:
2899                 case IP_FW_FLUSH_BLK:
2900                 case IP_FW_FLUSH_FWD:
2901                 case IP_FW_ZERO_BLK:
2902                 case IP_FW_ZERO_FWD:
2903                 case IP_FW_POLICY_BLK:
2904                 case IP_FW_POLICY_FWD:
2905                         if(!suser())
2906                                 return -EPERM;
2907                         if(optlen>sizeof(tmp_fw) || optlen<1)
2908                                 return -EINVAL;
2909                         err=verify_area(VERIFY_READ,optval,optlen);
2910                         if(err)
2911                                 return err;
2912                         memcpy_fromfs(&tmp_fw,optval,optlen);
2913                         err=ip_fw_ctl(optname, &tmp_fw,optlen);
2914                         return -err;    /* -0 is 0 after all */
2915                         
2916 #endif
2917 #ifdef CONFIG_IP_ACCT
2918                 case IP_ACCT_DEL:
2919                 case IP_ACCT_ADD:
2920                 case IP_ACCT_FLUSH:
2921                 case IP_ACCT_ZERO:
2922                         if(!suser())
2923                                 return -EPERM;
2924                         if(optlen>sizeof(tmp_fw) || optlen<1)
2925                                 return -EINVAL;
2926                         err=verify_area(VERIFY_READ,optval,optlen);
2927                         if(err)
2928                                 return err;
2929                         memcpy_fromfs(&tmp_fw, optval,optlen);
2930                         err=ip_acct_ctl(optname, &tmp_fw,optlen);
2931                         return -err;    /* -0 is 0 after all */
2932 #endif
2933                 /* IP_OPTIONS and friends go here eventually */
2934                 default:
2935                         return(-ENOPROTOOPT);
2936         }
2937 }
2938 
2939 /*
2940  *      Get the options. Note for future reference. The GET of IP options gets the
2941  *      _received_ ones. The set sets the _sent_ ones.
2942  */
2943 
2944 int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
2945 {
2946         int val,err;
2947 #ifdef CONFIG_IP_MULTICAST
2948         int len;
2949 #endif
2950         
2951         if(level!=SOL_IP)
2952                 return -EOPNOTSUPP;
2953 
2954 #ifdef CONFIG_IP_MROUTE
2955         if(optname>=MRT_BASE && optname <=MRT_BASE+10)
2956         {
2957                 return ip_mroute_getsockopt(sk,optname,optval,optlen);
2958         }
2959 #endif
2960 
2961         switch(optname)
2962         {
2963                 case IP_OPTIONS:
2964                         {
2965                                 unsigned char optbuf[sizeof(struct options)+40];
2966                                 struct options * opt = (struct options*)optbuf;
2967                                 err = verify_area(VERIFY_WRITE, optlen, sizeof(int));
2968                                 if (err)
2969                                   return err;
2970                                 cli();
2971                                 opt->optlen = 0;
2972                                 if (sk->opt)
2973                                   memcpy(optbuf, sk->opt, sizeof(struct options)+sk->opt->optlen);
2974                                 sti();
2975                                 if (opt->optlen == 0) {
2976                                         put_fs_long(0,(unsigned long *) optlen);
2977                                         return 0;
2978                                 }
2979                                 err = verify_area(VERIFY_WRITE, optval, opt->optlen);
2980                                 if (err)
2981                                   return err;
2982 /*
2983  * Now we should undo all the changes done by ip_options_compile().
2984  */
2985                                 if (opt->srr) {
2986                                         unsigned  char * optptr = opt->__data+opt->srr-sizeof(struct  iphdr);
2987                                         memmove(optptr+7, optptr+4, optptr[1]-7);
2988                                         memcpy(optptr+3, &opt->faddr, 4);
2989                                 }
2990                                 if (opt->rr_needaddr) {
2991                                         unsigned  char * optptr = opt->__data+opt->rr-sizeof(struct  iphdr);
2992                                         memset(&optptr[optptr[2]-1], 0, 4);
2993                                         optptr[2] -= 4;
2994                                 }
2995                                 if (opt->ts) {
2996                                         unsigned  char * optptr = opt->__data+opt->ts-sizeof(struct  iphdr);
2997                                         if (opt->ts_needtime) {
2998                                                 memset(&optptr[optptr[2]-1], 0, 4);
2999                                                 optptr[2] -= 4;
3000                                         }
3001                                         if (opt->ts_needaddr) {
3002                                                 memset(&optptr[optptr[2]-1], 0, 4);
3003                                                 optptr[2] -= 4;
3004                                         }
3005                                 }
3006                                 put_fs_long(opt->optlen, (unsigned long *) optlen);
3007                                 memcpy_tofs(optval, opt->__data, opt->optlen);
3008                         }
3009                         return 0;
3010                 case IP_TOS:
3011                         val=sk->ip_tos;
3012                         break;
3013                 case IP_TTL:
3014                         val=sk->ip_ttl;
3015                         break;
3016                 case IP_HDRINCL:
3017                         val=sk->ip_hdrincl;
3018                         break;
3019 #ifdef CONFIG_IP_MULTICAST                      
3020                 case IP_MULTICAST_TTL:
3021                         val=sk->ip_mc_ttl;
3022                         break;
3023                 case IP_MULTICAST_LOOP:
3024                         val=sk->ip_mc_loop;
3025                         break;
3026                 case IP_MULTICAST_IF:
3027                         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
3028                         if(err)
3029                                 return err;
3030                         len=strlen(sk->ip_mc_name);
3031                         err=verify_area(VERIFY_WRITE, optval, len);
3032                         if(err)
3033                                 return err;
3034                         put_user(len,(int *) optlen);
3035                         memcpy_tofs((void *)optval,sk->ip_mc_name, len);
3036                         return 0;
3037 #endif
3038                 default:
3039                         return(-ENOPROTOOPT);
3040         }
3041         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
3042         if(err)
3043                 return err;
3044         put_user(sizeof(int),(int *) optlen);
3045 
3046         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
3047         if(err)
3048                 return err;
3049         put_user(val,(int *) optval);
3050 
3051         return(0);
3052 }
3053 
3054 /*
3055  *      Build and send a packet, with as little as one copy
3056  *
3057  *      Doesn't care much about ip options... option length can be
3058  *      different for fragment at 0 and other fragments.
3059  *
3060  *      Note that the fragment at the highest offset is sent first,
3061  *      so the getfrag routine can fill in the TCP/UDP checksum header
3062  *      field in the last fragment it sends... actually it also helps
3063  *      the reassemblers, they can put most packets in at the head of
3064  *      the fragment queue, and they know the total size in advance. This
3065  *      last feature will measurable improve the Linux fragment handler.
3066  *
3067  *      The callback has five args, an arbitrary pointer (copy of frag),
3068  *      the source IP address (may depend on the routing table), the 
3069  *      destination adddress (char *), the offset to copy from, and the
3070  *      length to be copied.
3071  * 
3072  */
3073 
3074 int ip_build_xmit(struct sock *sk,
     /* [previous][next][first][last][top][bottom][index][help] */
3075                    void getfrag (const void *,
3076                                  __u32,
3077                                  char *,
3078                                  unsigned int,  
3079                                  unsigned int),
3080                    const void *frag,
3081                    unsigned short int length,
3082                    __u32 daddr,
3083                    __u32 user_saddr,
3084                    struct options * opt,
3085                    int flags,
3086                    int type) 
3087 {
3088         struct rtable *rt;
3089         unsigned int fraglen, maxfraglen, fragheaderlen;
3090         int offset, mf;
3091         __u32 saddr;
3092         unsigned short id;
3093         struct iphdr *iph;
3094         int local=0;
3095         struct device *dev;
3096         int nfrags=0;
3097         __u32 true_daddr = daddr;
3098 
3099         if (opt && opt->srr && !sk->ip_hdrincl)
3100           daddr = opt->faddr;
3101         
3102         ip_statistics.IpOutRequests++;
3103 
3104 #ifdef CONFIG_IP_MULTICAST      
3105         if(sk && MULTICAST(daddr) && *sk->ip_mc_name)
3106         {
3107                 dev=dev_get(sk->ip_mc_name);
3108                 if(!dev)
3109                         return -ENODEV;
3110                 rt=NULL;
3111                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
3112                         saddr = sk->saddr;
3113                 else
3114                         saddr = dev->pa_addr;
3115         }
3116         else
3117         {
3118 #endif  
3119                 /*
3120                  *      Perform the IP routing decisions
3121                  */
3122          
3123                 if(sk->localroute || flags&MSG_DONTROUTE)
3124                         local=1;
3125         
3126                 rt = sk->ip_route_cache;
3127                 
3128                 /*
3129                  *      See if the routing cache is outdated. We need to clean this up once we are happy it is reliable
3130                  *      by doing the invalidation actively in the route change and header change.
3131                  */
3132         
3133                 saddr=sk->ip_route_saddr;        
3134                 if(!rt || sk->ip_route_stamp != rt_stamp ||
3135                    daddr!=sk->ip_route_daddr || sk->ip_route_local!=local ||
3136                    (sk->saddr && sk->saddr != saddr))
3137                 {
3138                         if(local)
3139                                 rt = ip_rt_local(daddr, NULL, &saddr);
3140                         else
3141                                 rt = ip_rt_route(daddr, NULL, &saddr);
3142                         sk->ip_route_local=local;
3143                         sk->ip_route_daddr=daddr;
3144                         sk->ip_route_saddr=saddr;
3145                         sk->ip_route_stamp=rt_stamp;
3146                         sk->ip_route_cache=rt;
3147                         sk->ip_hcache_ver=NULL;
3148                         sk->ip_hcache_state= 0;
3149                 }
3150                 else if(rt)
3151                 {
3152                         /*
3153                          *      Attempt header caches only if the cached route is being reused. Header cache
3154                          *      is not ultra cheap to set up. This means we only set it up on the second packet,
3155                          *      so one shot communications are not slowed. We assume (seems reasonable) that 2 is
3156                          *      probably going to be a stream of data.
3157                          */
3158                         if(rt->rt_dev->header_cache && sk->ip_hcache_state!= -1)
3159                         {
3160                                 if(sk->ip_hcache_ver==NULL || sk->ip_hcache_stamp!=*sk->ip_hcache_ver)
3161                                         rt->rt_dev->header_cache(rt->rt_dev,sk,saddr,daddr);
3162                                 else
3163                                         /* Can't cache. Remember this */
3164                                         sk->ip_hcache_state= -1;
3165                         }
3166                 }
3167                 
3168                 if (rt == NULL) 
3169                 {
3170                         ip_statistics.IpOutNoRoutes++;
3171                         return(-ENETUNREACH);
3172                 }
3173         
3174                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
3175                         saddr = sk->saddr;
3176                         
3177                 dev=rt->rt_dev;
3178 #ifdef CONFIG_IP_MULTICAST
3179         }
3180 #endif          
3181         if (user_saddr)
3182           saddr = user_saddr;
3183 
3184         /*
3185          *      Now compute the buffer space we require
3186          */ 
3187          
3188         /*
3189          *      Try the simple case first. This leaves broadcast, multicast, fragmented frames, and by
3190          *      choice RAW frames within 20 bytes of maximum size(rare) to the long path
3191          */
3192 
3193         length += 20;
3194         if (!sk->ip_hdrincl && opt) {
3195                 length += opt->optlen;
3196                 if (opt->is_strictroute && rt && rt->rt_gateway) {
3197                         ip_statistics.IpOutNoRoutes++;
3198                         return -ENETUNREACH;
3199                 }
3200         }
3201         if(length <= dev->mtu && !MULTICAST(daddr) && daddr!=0xFFFFFFFF && daddr!=dev->pa_brdaddr)
3202         {       
3203                 int error;
3204                 struct sk_buff *skb=sock_alloc_send_skb(sk, length+15+dev->hard_header_len,0, 0,&error);
3205                 if(skb==NULL)
3206                 {
3207                         ip_statistics.IpOutDiscards++;
3208                         return error;
3209                 }
3210                 skb->dev=dev;
3211                 skb->free=1;
3212                 skb->when=jiffies;
3213                 skb->sk=sk;
3214                 skb->arp=0;
3215                 skb->saddr=saddr;
3216                 skb->raddr=(rt&&rt->rt_gateway)?rt->rt_gateway:daddr;
3217                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
3218                 if(sk->ip_hcache_state>0)
3219                 {
3220                         memcpy(skb_push(skb,dev->hard_header_len),sk->ip_hcache_data,dev->hard_header_len);
3221                         skb->arp=1;
3222                 }
3223                 else if(dev->hard_header)
3224                 {
3225                         if(dev->hard_header(skb,dev,ETH_P_IP,NULL,NULL,0)>0)
3226                                 skb->arp=1;
3227                 }
3228                 else
3229                         skb->arp=1;
3230                 skb->ip_hdr=iph=(struct iphdr *)skb_put(skb,length);
3231                 dev_lock_list();
3232                 if(!sk->ip_hdrincl)
3233                 {
3234                         iph->version=4;
3235                         iph->ihl=5;
3236                         iph->tos=sk->ip_tos;
3237                         iph->tot_len = htons(length);
3238                         iph->id=htons(ip_id_count++);
3239                         iph->frag_off = 0;
3240                         iph->ttl=sk->ip_ttl;
3241                         iph->protocol=type;
3242                         iph->saddr=saddr;
3243                         iph->daddr=daddr;
3244                         if (opt) {
3245                                 iph->ihl += opt->optlen>>2;
3246                                 ip_options_build(skb, opt,
3247                                                  true_daddr, dev->pa_addr, 0);
3248                         }
3249                         iph->check=0;
3250                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
3251                         getfrag(frag,saddr,((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
3252                 }
3253                 else
3254                         getfrag(frag,saddr,(void *)iph,0,length-20);
3255                 dev_unlock_list();
3256 #ifdef CONFIG_IP_FIREWALL
3257                 if(ip_fw_chk(iph, dev, ip_fw_blk_chain, ip_fw_blk_policy,0) < FW_ACCEPT)
3258                 {
3259                         kfree_skb(skb, FREE_WRITE);
3260                         return -EPERM;
3261                 }
3262 #endif
3263 #ifdef CONFIG_IP_ACCT
3264                 ip_fw_chk((void *)skb->data,dev,ip_acct_chain, IP_FW_F_ACCEPT,1);
3265 #endif          
3266                 if(dev->flags&IFF_UP)
3267                         dev_queue_xmit(skb,dev,sk->priority);
3268                 else
3269                 {
3270                         ip_statistics.IpOutDiscards++;
3271                         kfree_skb(skb, FREE_WRITE);
3272                 }
3273                 return 0;
3274         }
3275         length-=20;
3276         if (sk && !sk->ip_hdrincl && opt) {
3277                 length -= opt->optlen;
3278                 fragheaderlen = dev->hard_header_len + sizeof(struct iphdr) + opt->optlen;
3279                 maxfraglen = ((dev->mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
3280         } else {
3281                 fragheaderlen = dev->hard_header_len;
3282                 if(!sk->ip_hdrincl)
3283                   fragheaderlen += 20;
3284                 
3285         /*
3286          *      Fragheaderlen is the size of 'overhead' on each buffer. Now work
3287          *      out the size of the frames to send.
3288          */
3289          
3290                 maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen;
3291         }
3292         
3293         /*
3294          *      Start at the end of the frame by handling the remainder.
3295          */
3296          
3297         offset = length - (length % (maxfraglen - fragheaderlen));
3298         
3299         /*
3300          *      Amount of memory to allocate for final fragment.
3301          */
3302          
3303         fraglen = length - offset + fragheaderlen;
3304         
3305         if(length-offset==0)
3306         {
3307                 fraglen = maxfraglen;
3308                 offset -= maxfraglen-fragheaderlen;
3309         }
3310         
3311         
3312         /*
3313          *      The last fragment will not have MF (more fragments) set.
3314          */
3315          
3316         mf = 0;
3317 
3318         /*
3319          *      Can't fragment raw packets 
3320          */
3321          
3322         if (sk->ip_hdrincl && offset > 0)
3323                 return(-EMSGSIZE);
3324 
3325         /*
3326          *      Lock the device lists.
3327          */
3328 
3329         dev_lock_list();
3330         
3331         /*
3332          *      Get an identifier
3333          */
3334          
3335         id = htons(ip_id_count++);
3336 
3337         /*
3338          *      Being outputting the bytes.
3339          */
3340          
3341         do 
3342         {
3343                 struct sk_buff * skb;
3344                 int error;
3345                 char *data;
3346 
3347                 /*
3348                  *      Get the memory we require with some space left for alignment.
3349                  */
3350 
3351                 skb = sock_alloc_send_skb(sk, fraglen+15, 0, 0, &error);
3352                 if (skb == NULL)
3353                 {
3354                         ip_statistics.IpOutDiscards++;
3355                         if(nfrags>1)
3356                                 ip_statistics.IpFragCreates++;                  
3357                         dev_unlock_list();
3358                         return(error);
3359                 }
3360                 
3361                 /*
3362                  *      Fill in the control structures
3363                  */
3364                  
3365                 skb->next = skb->prev = NULL;
3366                 skb->dev = dev;
3367                 skb->when = jiffies;
3368                 skb->free = 1; /* dubious, this one */
3369                 skb->sk = sk;
3370                 skb->arp = 0;
3371                 skb->saddr = saddr;
3372                 skb->raddr = (rt&&rt->rt_gateway) ? rt->rt_gateway : daddr;
3373                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
3374                 data = skb_put(skb, fraglen-dev->hard_header_len);
3375 
3376                 /*
3377                  *      Save us ARP and stuff. In the optimal case we do no route lookup (route cache ok)
3378                  *      no ARP lookup (arp cache ok) and output. The cache checks are still too slow but
3379                  *      this can be fixed later. For gateway routes we ought to have a rt->.. header cache
3380                  *      pointer to speed header cache builds for identical targets.
3381                  */
3382                  
3383                 if(sk->ip_hcache_state>0)
3384                 {
3385                         memcpy(skb_push(skb,dev->hard_header_len),sk->ip_hcache_data, dev->hard_header_len);
3386                         skb->arp=1;
3387                 }
3388                 else if (dev->hard_header)
3389                 {
3390                         if(dev->hard_header(skb, dev, ETH_P_IP, 
3391                                                 NULL, NULL, 0)>0)
3392                                 skb->arp=1;
3393                 }
3394                 
3395                 /*
3396                  *      Find where to start putting bytes.
3397                  */
3398                  
3399                 skb->ip_hdr = iph = (struct iphdr *)data;
3400 
3401                 /*
3402                  *      Only write IP header onto non-raw packets 
3403                  */
3404                  
3405                 if(!sk->ip_hdrincl) 
3406                 {
3407 
3408                         iph->version = 4;
3409                         iph->ihl = 5; /* ugh */
3410                         if (opt) {
3411                                 iph->ihl += opt->optlen>>2;
3412                                 ip_options_build(skb, opt,
3413                                                  true_daddr, dev->pa_addr, offset);
3414                         }
3415                         iph->tos = sk->ip_tos;
3416                         iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
3417                         iph->id = id;
3418                         iph->frag_off = htons(offset>>3);
3419                         iph->frag_off |= mf;
3420 #ifdef CONFIG_IP_MULTICAST
3421                         if (MULTICAST(daddr))
3422                                 iph->ttl = sk->ip_mc_ttl;
3423                         else
3424 #endif
3425                                 iph->ttl = sk->ip_ttl;
3426                         iph->protocol = type;
3427                         iph->check = 0;
3428                         iph->saddr = saddr;
3429                         iph->daddr = daddr;
3430                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
3431                         data += iph->ihl*4;
3432                         
3433                         /*
3434                          *      Any further fragments will have MF set.
3435                          */
3436                          
3437                         mf = htons(IP_MF);
3438                 }
3439                 
3440                 /*
3441                  *      User data callback
3442                  */
3443 
3444                 getfrag(frag, saddr, data, offset, fraglen-fragheaderlen);
3445                 
3446                 /*
3447                  *      Account for the fragment.
3448                  */
3449                  
3450 #ifdef CONFIG_IP_FIREWALL
3451                 if(!offset && ip_fw_chk(iph, dev, ip_fw_blk_chain, ip_fw_blk_policy,0) < FW_ACCEPT)
3452                 {
3453                         kfree_skb(skb, FREE_WRITE);
3454                         dev_unlock_list();
3455                         return -EPERM;
3456                 }
3457 #endif          
3458 #ifdef CONFIG_IP_ACCT
3459                 if(!offset)
3460                         ip_fw_chk(iph, dev, ip_acct_chain, IP_FW_F_ACCEPT, 1);
3461 #endif  
3462                 offset -= (maxfraglen-fragheaderlen);
3463                 fraglen = maxfraglen;
3464 
3465 #ifdef CONFIG_IP_MULTICAST
3466 
3467                 /*
3468                  *      Multicasts are looped back for other local users
3469                  */
3470          
3471                 if (MULTICAST(daddr) && !(dev->flags&IFF_LOOPBACK)) 
3472                 {
3473                         /*
3474                          *      Loop back any frames. The check for IGMP_ALL_HOSTS is because
3475                          *      you are always magically a member of this group.
3476                          *
3477                          *      Always loop back all host messages when running as a multicast router.
3478                          */
3479                          
3480                         if(sk==NULL || sk->ip_mc_loop)
3481                         {
3482                                 if(skb->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
3483                                         ip_loopback(rt?rt->rt_dev:dev,skb);
3484                                 else 
3485                                 {
3486                                         struct ip_mc_list *imc=rt?rt->rt_dev->ip_mc_list:dev->ip_mc_list;
3487                                         while(imc!=NULL) 
3488                                         {
3489                                                 if(imc->multiaddr==daddr) 
3490                                                 {
3491                                                         ip_loopback(rt?rt->rt_dev:dev,skb);
3492                                                         break;
3493                                                 }
3494                                                 imc=imc->next;
3495                                         }
3496                                 }
3497                         }
3498 
3499                         /*
3500                          *      Multicasts with ttl 0 must not go beyond the host. Fixme: avoid the
3501                          *      extra clone.
3502                          */
3503 
3504                         if(skb->ip_hdr->ttl==0)
3505                                 kfree_skb(skb, FREE_READ);
3506                 }
3507 #endif
3508 
3509                 nfrags++;
3510                 
3511                 /*
3512                  *      BSD loops broadcasts
3513                  */
3514                  
3515                 if((dev->flags&IFF_BROADCAST) && (daddr==0xFFFFFFFF || daddr==dev->pa_brdaddr) && !(dev->flags&IFF_LOOPBACK))
3516                         ip_loopback(dev,skb);
3517 
3518                 /*
3519                  *      Now queue the bytes into the device.
3520                  */
3521                  
3522                 if (dev->flags & IFF_UP) 
3523                 {
3524                         dev_queue_xmit(skb, dev, sk->priority);
3525                 } 
3526                 else 
3527                 {
3528                         /*
3529                          *      Whoops... 
3530                          */
3531                          
3532                         ip_statistics.IpOutDiscards++;
3533                         if(nfrags>1)
3534                                 ip_statistics.IpFragCreates+=nfrags;
3535                         kfree_skb(skb, FREE_WRITE);
3536                         dev_unlock_list();
3537                         /*
3538                          *      BSD behaviour.
3539                          */
3540                         if(sk!=NULL)
3541                                 sk->err=ENETDOWN;
3542                         return(0); /* lose rest of fragments */
3543                 }
3544         } 
3545         while (offset >= 0);
3546         if(nfrags>1)
3547                 ip_statistics.IpFragCreates+=nfrags;
3548         dev_unlock_list();
3549         return(0);
3550 }
3551     
3552 
3553 /*
3554  *      IP protocol layer initialiser
3555  */
3556 
3557 static struct packet_type ip_packet_type =
3558 {
3559         0,      /* MUTTER ntohs(ETH_P_IP),*/
3560         NULL,   /* All devices */
3561         ip_rcv,
3562         NULL,
3563         NULL,
3564 };
3565 
3566 #ifdef CONFIG_RTNETLINK
3567 
3568 /*
3569  *      Netlink hooks for IP
3570  */
3571  
3572 void ip_netlink_msg(unsigned long msg, __u32 daddr, __u32 gw, __u32 mask, short flags, short metric, char *name)
     /* [previous][next][first][last][top][bottom][index][help] */
3573 {
3574         struct sk_buff *skb=alloc_skb(sizeof(struct netlink_rtinfo), GFP_ATOMIC);
3575         struct netlink_rtinfo *nrt;
3576         struct sockaddr_in *s;
3577         if(skb==NULL)
3578                 return;
3579         nrt=(struct netlink_rtinfo *)skb_put(skb, sizeof(struct netlink_rtinfo));
3580         nrt->rtmsg_type=msg;
3581         s=(struct sockaddr_in *)&nrt->rtmsg_dst;
3582         s->sin_family=AF_INET;
3583         s->sin_addr.s_addr=daddr;
3584         s=(struct sockaddr_in *)&nrt->rtmsg_gateway;
3585         s->sin_family=AF_INET;
3586         s->sin_addr.s_addr=gw;
3587         s=(struct sockaddr_in *)&nrt->rtmsg_genmask;
3588         s->sin_family=AF_INET;
3589         s->sin_addr.s_addr=mask;
3590         nrt->rtmsg_flags=flags;
3591         nrt->rtmsg_metric=metric;
3592         strcpy(nrt->rtmsg_device,name);
3593         netlink_post(NETLINK_ROUTE, skb);
3594 }       
3595 
3596 #endif
3597 
3598 /*
3599  *      Device notifier
3600  */
3601  
3602 static int ip_rt_event(unsigned long event, void *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
3603 {
3604         struct device *dev=ptr;
3605         if(event==NETDEV_DOWN)
3606         {
3607                 ip_netlink_msg(RTMSG_DELDEVICE, 0,0,0,0,0,dev->name);
3608                 ip_rt_flush(dev);
3609         }
3610 /*
3611  *      Join the intial group if multicast.
3612  */             
3613         if(event==NETDEV_UP)
3614         {
3615 #ifdef CONFIG_IP_MULTICAST      
3616                 ip_mc_allhost(dev);
3617 #endif          
3618                 ip_netlink_msg(RTMSG_NEWDEVICE, 0,0,0,0,0,dev->name);
3619         }
3620         return NOTIFY_DONE;
3621 }
3622 
3623 struct notifier_block ip_rt_notifier={
3624         ip_rt_event,
3625         NULL,
3626         0
3627 };
3628 
3629 /*
3630  *      IP registers the packet type and then calls the subprotocol initialisers
3631  */
3632 
3633 void ip_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
3634 {
3635         ip_packet_type.type=htons(ETH_P_IP);
3636         dev_add_pack(&ip_packet_type);
3637 
3638         /* So we flush routes when a device is downed */        
3639         register_netdevice_notifier(&ip_rt_notifier);
3640 
3641 /*      ip_raw_init();
3642         ip_packet_init();
3643         ip_tcp_init();
3644         ip_udp_init();*/
3645 
3646 #ifdef CONFIG_IP_MULTICAST
3647         proc_net_register(&(struct proc_dir_entry) {
3648                 PROC_NET_IGMP, 4, "igmp",
3649                 S_IFREG | S_IRUGO, 1, 0, 0,
3650                 0, &proc_net_inode_operations,
3651                 ip_mc_procinfo
3652         });
3653 #endif
3654 }
3655 

/* [previous][next][first][last][top][bottom][index][help] */